summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2009-04-18 00:02:25 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2009-04-18 00:02:25 +0000
commitbeb6a9a183c1ca25c99e4401b58266ce73b8c846 (patch)
treeb02a2cfe46761e177303c1dbaf420f7cfb14642f /src
parentedd03a831f350bc72d76d4fad2b390d43faccb79 (diff)
downloadgperftools-beb6a9a183c1ca25c99e4401b58266ce73b8c846.tar.gz
Fri Apr 17 16:40:48 2009 Google Inc. <opensource@google.com>
* google-perftools: version 1.2 release * Allow large_alloc_threshold=0 to turn it off entirely (csilvers) * Die more helpfully when out of memory for internal data (csilvers) * Refactor profile-data gathering, add a new unittest (cgd, nabeelmian) * BUGFIX: fix rounding errors with static thread-size caches (addi) * BUGFIX: disable hooks better when forking in leak-checker (csilvers) * BUGFIX: fix realloc of crt pointers on windows (csilvers) * BUGFIX: do a better job of finding binaries in .sh tests (csilvers) * WINDOWS: allow overriding malloc/etc instead of patching (mbelshe) * PORTING: fix compilation error in a ppc-specific file (csilvers) * PORTING: deal with quirks in cygwin's /proc/self/maps (csilvers) * PORTING: use 'A' version of functions for ascii input (mbelshe) * PORTING: generate .so's on cygwin and mingw (ajenjo) * PORTING: disable profiler methods on cygwin (jperkins) * Updated autoconf version to 2.61 and libtool version to 1.5.26 git-svn-id: http://gperftools.googlecode.com/svn/trunk@68 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src')
-rw-r--r--src/base/atomicops-internals-linuxppc.h5
-rw-r--r--src/base/logging.cc10
-rw-r--r--src/base/logging.h4
-rw-r--r--src/base/simple_mutex.h13
-rw-r--r--src/base/sysinfo.cc44
-rw-r--r--src/base/sysinfo.h4
-rw-r--r--src/config.h.in6
-rw-r--r--src/google/heap-checker.h60
-rw-r--r--src/google/profiler.h4
-rw-r--r--src/heap-checker.cc82
-rw-r--r--src/heap-profile-table.cc8
-rw-r--r--src/heap-profile-table.h5
-rw-r--r--src/page_heap_allocator.h6
-rwxr-xr-xsrc/pprof99
-rw-r--r--src/profile-handler.cc498
-rw-r--r--src/profile-handler.h147
-rw-r--r--src/profiledata.cc14
-rw-r--r--src/profiledata.h14
-rw-r--r--src/profiler.cc400
-rwxr-xr-xsrc/sampler.cc36
-rw-r--r--src/stacktrace_win32-inl.h2
-rw-r--r--src/stacktrace_x86-inl.h2
-rw-r--r--src/tcmalloc.cc47
-rw-r--r--src/tests/frag_unittest.cc25
-rw-r--r--src/tests/profile-handler_unittest.cc445
-rw-r--r--src/tests/profiledata_unittest.cc298
-rw-r--r--src/tests/sampling_test.cc2
-rwxr-xr-xsrc/tests/sampling_test.sh18
-rw-r--r--src/tests/testutil.cc2
-rw-r--r--src/thread_cache.cc17
-rw-r--r--src/windows/addr2line-pdb.c1
-rw-r--r--src/windows/config.h16
-rw-r--r--src/windows/mingw.h2
-rw-r--r--src/windows/nm-pdb.c1
-rw-r--r--src/windows/override_functions.cc118
-rw-r--r--src/windows/patch_functions.cc38
-rw-r--r--src/windows/port.cc3
-rw-r--r--src/windows/port.h2
38 files changed, 1931 insertions, 567 deletions
diff --git a/src/base/atomicops-internals-linuxppc.h b/src/base/atomicops-internals-linuxppc.h
index dcf143e..d8e23fe 100644
--- a/src/base/atomicops-internals-linuxppc.h
+++ b/src/base/atomicops-internals-linuxppc.h
@@ -407,9 +407,4 @@ inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
} // namespace base::subtle
} // namespace base
-// NOTE(vchen): The following is also deprecated. New callers should use
-// the base::subtle namespace.
-inline void MemoryBarrier() {
- base::subtle::MemoryBarrier();
-}
#endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
diff --git a/src/base/logging.cc b/src/base/logging.cc
index 2f56fce..a68401c 100644
--- a/src/base/logging.cc
+++ b/src/base/logging.cc
@@ -39,7 +39,7 @@ DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0),
"--verbose == -4 means we log fatal errors only.");
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
// While windows does have a POSIX-compatible API
// (_open/_write/_close), it acquires memory. Using this lower-level
@@ -49,8 +49,8 @@ RawFD RawOpenForWriting(const char* filename) {
// that ever becomes a problem then we ought to compute the absolute
// path on its behalf (perhaps the ntdll/kernel function isn't aware
// of the working directory?)
- RawFD fd = CreateFile(filename, GENERIC_WRITE, 0, NULL,
- CREATE_ALWAYS, 0, NULL);
+ RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL,
+ CREATE_ALWAYS, 0, NULL);
if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS)
SetEndOfFile(fd); // truncate the existing file
return fd;
@@ -71,7 +71,7 @@ void RawClose(RawFD handle) {
CloseHandle(handle);
}
-#else // _WIN32
+#else // _WIN32 || __CYGWIN__ || __CYGWIN32__
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
@@ -104,4 +104,4 @@ void RawClose(RawFD fd) {
NO_INTR(close(fd));
}
-#endif // _WIN32
+#endif // _WIN32 || __CYGWIN__ || __CYGWIN32__
diff --git a/src/base/logging.h b/src/base/logging.h
index 77ee988..bc1a4c2 100644
--- a/src/base/logging.h
+++ b/src/base/logging.h
@@ -208,14 +208,14 @@ inline void LOG_IF(int lvl, bool cond, const char* pat, ...) {
// to allow even more low-level stuff in the future.
// Like other "raw" routines, these functions are best effort, and
// thus don't return error codes (except RawOpenForWriting()).
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
#include <windows.h>
typedef HANDLE RawFD;
const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE;
#else
typedef int RawFD;
const RawFD kIllegalRawFD = -1; // what open returns if it fails
-#endif // _WIN32
+#endif // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
RawFD RawOpenForWriting(const char* filename); // uses default permissions
void RawWrite(RawFD fd, const char* buf, size_t len);
diff --git a/src/base/simple_mutex.h b/src/base/simple_mutex.h
index d59f5a0..0eed34f 100644
--- a/src/base/simple_mutex.h
+++ b/src/base/simple_mutex.h
@@ -95,8 +95,10 @@
#if defined(NO_THREADS)
typedef int MutexType; // to keep a lock-count
-#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
-# define WIN32_LEAN_AND_MEAN // We only need minimal includes
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN // We only need minimal includes
+# endif
// We need Windows NT or later for TryEnterCriticalSection(). If you
// don't need that functionality, you can remove these _WIN32_WINNT
// lines, and change TryLock() to assert(0) or something.
@@ -152,7 +154,7 @@ class Mutex {
inline void SetIsSafe() { is_safe_ = true; }
// Catch the error of writing Mutex when intending MutexLock.
- Mutex(Mutex *ignored) {}
+ Mutex(Mutex* /*ignored*/) {}
// Disallow "evil" constructors
Mutex(const Mutex&);
void operator=(const Mutex&);
@@ -180,7 +182,7 @@ bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; }
void Mutex::ReaderLock() { assert(++mutex_ > 0); }
void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
-#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
Mutex::Mutex() { InitializeCriticalSection(&mutex_); SetIsSafe(); }
Mutex::~Mutex() { DeleteCriticalSection(&mutex_); }
@@ -206,7 +208,8 @@ Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy); }
void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock); }
void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
bool Mutex::TryLock() { return is_safe_ ?
- pthread_rwlock_trywrlock(&mutex_) == 0 : true; }
+ pthread_rwlock_trywrlock(&mutex_) == 0 :
+ true; }
void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock); }
void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
#undef SAFE_PTHREAD
diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc
index 1f542ae..a6bd3a0 100644
--- a/src/base/sysinfo.cc
+++ b/src/base/sysinfo.cc
@@ -31,6 +31,10 @@
// Author: Mike Burrows
#include "config.h"
+#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
+# define OS_WINDOWS 1
+#endif
+
#include <stdlib.h> // for getenv()
#include <stdio.h> // for snprintf(), sscanf()
#include <string.h> // for memmove(), memchr(), etc.
@@ -48,7 +52,7 @@
#include <sys/sysctl.h>
#elif defined __sun__ // Solaris
#include <procfs.h> // for, e.g., prmap_t
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
#include <process.h> // for getpid() (actually, _getpid())
#include <shlwapi.h> // for SHGetValueA()
#include <tlhelp32.h> // for Module32First()
@@ -58,7 +62,7 @@
#include "base/logging.h"
#include "base/cycleclock.h"
-#ifdef _WIN32
+#ifdef OS_WINDOWS
#ifdef MODULEENTRY32
// In a change from the usual W-A pattern, there is no A variant of
// MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A.
@@ -75,7 +79,7 @@
#ifndef TH32CS_SNAPMODULE32
#define TH32CS_SNAPMODULE32 0
#endif /* TH32CS_SNAPMODULE32 */
-#endif /* _WIN32 */
+#endif /* OS_WINDOWS */
// Re-run fn until it doesn't cause EINTR.
#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
@@ -177,7 +181,7 @@ static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous
static int cpuinfo_num_cpus = 1; // Conservative guess
static void SleepForMilliseconds(int milliseconds) {
-#ifdef _WIN32
+#ifdef OS_WINDOWS
_sleep(milliseconds); // Windows's _sleep takes milliseconds argument
#else
// Sleep for a few milliseconds
@@ -334,7 +338,7 @@ static void InitializeSystemInfo() {
}
// TODO(csilvers): also figure out cpuinfo_num_cpus
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate.
@@ -410,7 +414,7 @@ bool HasPosixThreads() {
if (confstr(_CS_GNU_LIBPTHREAD_VERSION, buf, sizeof(buf)) == 0)
return false;
return strncmp(buf, "NPTL", 4) == 0;
-#elif defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+#elif defined(OS_WINDOWS) || defined(__CYGWIN__) || defined(__CYGWIN32__)
return false;
#else // other OS
return true; // Assume that everything else has Posix
@@ -492,7 +496,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
#elif defined(__MACH__)
current_image_ = _dyld_image_count(); // count down from the top
current_load_cmd_ = -1;
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE |
TH32CS_SNAPMODULE32,
GetCurrentProcessId());
@@ -504,7 +508,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
}
ProcMapsIterator::~ProcMapsIterator() {
-#if defined(_WIN32) || defined(__MINGW32__)
+#if defined(OS_WINDOWS)
if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_);
#elif defined(__MACH__)
// no cleanup necessary!
@@ -515,7 +519,7 @@ ProcMapsIterator::~ProcMapsIterator() {
}
bool ProcMapsIterator::Valid() const {
-#if defined(_WIN32) || defined(__MINGW32__)
+#if defined(OS_WINDOWS)
return snapshot_ != INVALID_HANDLE_VALUE;
#elif defined(__MACH__)
return 1;
@@ -579,7 +583,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
int64 tmpinode;
int major, minor;
unsigned filename_offset = 0;
-#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+#if defined(__linux__)
// for now, assume all linuxes have the same format
if (sscanf(stext_, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
start ? start : &tmpstart,
@@ -588,6 +592,24 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
offset ? offset : &tmpoffset,
&major, &minor,
inode ? inode : &tmpinode, &filename_offset) != 7) continue;
+#elif defined(__CYGWIN__) || defined(__CYGWIN32__)
+ // cygwin is like linux, except the third field is the "entry point"
+ // rather than the offset (see format_process_maps at
+ // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src
+ // Offset is always be 0 on cygwin: cygwin implements an mmap
+ // by loading the whole file and then calling NtMapViewOfSection.
+ // Cygwin also seems to set its flags kinda randomly; use windows default.
+ char tmpflags[5];
+ if (offset)
+ *offset = 0;
+ strcpy(flags_, "r-xp");
+ if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
+ start ? start : &tmpstart,
+ end ? end : &tmpend,
+ tmpflags,
+ &tmpoffset,
+ &major, &minor,
+ inode ? inode : &tmpinode, &filename_offset) != 7) continue;
#elif defined(__FreeBSD__)
// For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup
tmpstart = tmpend = tmpoffset = 0;
@@ -722,7 +744,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
// If we get here, no more load_cmd's in this image talk about
// segments. Go on to the next image.
}
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
static char kDefaultPerms[5] = "r-xp";
BOOL ok;
if (module_.dwSize == 0) { // only possible before first call
diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h
index 86d998c..fb276eb 100644
--- a/src/base/sysinfo.h
+++ b/src/base/sysinfo.h
@@ -39,7 +39,7 @@
#include "config.h"
#include <time.h>
-#if defined(_WIN32) || defined(__MINGW32__)
+#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
#include <windows.h> // for DWORD
#include <TlHelp32.h> // for CreateToolhelp32Snapshot
#endif
@@ -190,7 +190,7 @@ class ProcMapsIterator {
char *etext_; // end of text
char *nextline_; // start of next line
char *ebuf_; // end of buffer (1 char for a nul)
-#if defined(_WIN32) || defined(__MINGW32__)
+#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
HANDLE snapshot_; // filehandle on dll info
// In a change from the usual W-A pattern, there is no A variant of
// MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A.
diff --git a/src/config.h.in b/src/config.h.in
index d225d49..bfac21c 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -150,9 +150,6 @@
/* Define to 1 if you have the <unwind.h> header file. */
#undef HAVE_UNWIND_H
-/* Define to 1 if you have the <windows.h> header file. */
-#undef HAVE_WINDOWS_H
-
/* define if your compiler has __attribute__ */
#undef HAVE___ATTRIBUTE__
@@ -165,6 +162,9 @@
/* Define to 1 if int32_t is equivalent to intptr_t */
#undef INT32_EQUALS_INTPTR
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+
/* Name of package */
#undef PACKAGE
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index 89a2512..acedd46 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -120,41 +120,22 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
// has been called at least once).
~HeapLeakChecker();
- // Return true iff the heap does not have more objects allocated
- // w.r.t. its state at the time of our construction.
- // This does full pprof heap change checking and reporting.
- // To detect tricky leaks it depends on correct working pprof implementation
- // referred by FLAGS_heap_profile_pprof.
- // (By 'tricky leaks' we mean a change of heap state that e.g. for SameHeap
- // preserves the number of allocated objects and bytes
- // -- see TestHeapLeakCheckerTrick in heap-checker_unittest.cc --
- // and thus is not detected by BriefNoLeaks.)
- // CAVEAT: pprof will do no checking over stripped binaries
- // (our automatic test binaries are stripped)
- // NOTE: All *NoLeaks() and *SameHeap() methods can be called many times
- // to check for leaks at different end-points in program's execution.
- bool NoLeaks() { return DoNoLeaks(NO_LEAKS, USE_PPROF, PPROF_REPORT); }
-
- // Return true iff the heap does not seem to have more objects allocated
- // w.r.t. its state at the time of our construction
- // by looking at the number of objects & bytes allocated.
- // This also tries to do pprof reporting of detected leaks.
- bool QuickNoLeaks() { return DoNoLeaks(NO_LEAKS, USE_COUNTS, PPROF_REPORT); }
-
- // Return true iff the heap does not seem to have more objects allocated
- // w.r.t. its state at the time of our construction
- // by looking at the number of objects & bytes allocated.
- // This does not try to use pprof at all.
- bool BriefNoLeaks() { return DoNoLeaks(NO_LEAKS, USE_COUNTS, NO_REPORT); }
-
- // These are similar to their *NoLeaks counterparts,
- // but they in addition require no negative leaks,
- // i.e. the state of the heap must be exactly the same
- // as at the time of our construction.
- bool SameHeap() { return DoNoLeaks(SAME_HEAP, USE_PPROF, PPROF_REPORT); }
- bool QuickSameHeap()
- { return DoNoLeaks(SAME_HEAP, USE_COUNTS, PPROF_REPORT); }
- bool BriefSameHeap() { return DoNoLeaks(SAME_HEAP, USE_COUNTS, NO_REPORT); }
+ // These used to be different but are all the same now: they return
+ // true iff all memory allocated since this HeapLeakChecker object
+ // was constructor is still reachable from global state.
+ //
+ // Because we fork to convert addresses to symbol-names, and forking
+ // is not thread-safe, and we may be called in a threaded context,
+ // we do not try to symbolize addresses when called manually.
+ bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); }
+
+ // These forms are obsolete; use NoLeaks() instead.
+ // TODO(csilvers): mark with ATTRIBUTE_DEPRECATED.
+ bool QuickNoLeaks() { return NoLeaks(); }
+ bool BriefNoLeaks() { return NoLeaks(); }
+ bool SameHeap() { return NoLeaks(); }
+ bool QuickSameHeap() { return NoLeaks(); }
+ bool BriefSameHeap() { return NoLeaks(); }
// Detailed information about the number of leaked bytes and objects
// (both of these can be negative as well).
@@ -231,15 +212,10 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
// Helper for constructors
void Create(const char *name, bool make_start_snapshot);
- // Types for DoNoLeaks and its helpers.
- enum CheckType { SAME_HEAP, NO_LEAKS };
- enum CheckFullness { USE_PPROF, USE_COUNTS };
- enum ReportMode { PPROF_REPORT, NO_REPORT };
+ enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE };
// Helper for *NoLeaks and *SameHeap
- bool DoNoLeaks(CheckType check_type,
- CheckFullness fullness,
- ReportMode report_mode);
+ bool DoNoLeaks(ShouldSymbolize should_symbolize);
// These used to be public, but they are now deprecated.
// Will remove entirely when all internal uses are fixed.
diff --git a/src/google/profiler.h b/src/google/profiler.h
index be7dbf3..74b936f 100644
--- a/src/google/profiler.h
+++ b/src/google/profiler.h
@@ -146,9 +146,7 @@ PERFTOOLS_DLL_DECL void ProfilerDisable();
/* Returns nonzero if profile is currently enabled, zero if it's not. */
PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads();
-/* Routine for registering new threads with the profiler. This routine
- * is called by the Thread module in google3/thread whenever a new
- * thread is created.
+/* Routine for registering new threads with the profiler.
*/
PERFTOOLS_DLL_DECL void ProfilerRegisterThread();
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index ef37df2..4c446c1 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -124,9 +124,7 @@ DEFINE_string(heap_check,
" or the empty string are the supported choices. "
"(See HeapLeakChecker::InternalInitStart for details.)");
-DEFINE_bool(heap_check_report,
- EnvToBool("HEAP_CHECK_REPORT", true),
- "If overall heap check should report the found leaks via pprof");
+DEFINE_bool(heap_check_report, true, "Obsolete");
DEFINE_bool(heap_check_before_constructors,
true,
@@ -137,13 +135,7 @@ DEFINE_bool(heap_check_after_destructors,
"If overall heap check is to end after global destructors "
"or right after all REGISTER_HEAPCHECK_CLEANUP's");
-DEFINE_bool(heap_check_strict_check,
- EnvToBool("HEAP_CHECK_STRICT_CHECK", true),
- "If overall heap check is to be done "
- "via HeapLeakChecker::*SameHeap "
- "or HeapLeakChecker::*NoLeaks call");
- // heap_check_strict_check == false
- // is useful only when heap_check_before_constructors == false
+DEFINE_bool(heap_check_strict_check, true, "Obsolete");
DEFINE_bool(heap_check_ignore_global_live,
EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true),
@@ -264,6 +256,9 @@ static const int heap_checker_info_level = 0;
// The larger it can be, the lesser is the chance of missing real leaks.
static const size_t kPointerSourceAlignment = sizeof(void*);
+// Cancel our InitialMallocHook_* if present.
+static void CancelInitialMallocHooks(); // defined below
+
//----------------------------------------------------------------------
// HeapLeakChecker's own memory allocator that is
// independent of the normal program allocator.
@@ -573,11 +568,13 @@ enum StackDirection {
// Determine which way the stack grows:
-static StackDirection ATTRIBUTE_NOINLINE GetStackDirection() {
- if (__builtin_frame_address(0) > __builtin_frame_address(1))
- return GROWS_TOWARDS_HIGH_ADDRESSES;
- if (__builtin_frame_address(0) < __builtin_frame_address(1))
+static StackDirection ATTRIBUTE_NOINLINE GetStackDirection(
+ const uintptr_t *const ptr) {
+ uintptr_t x;
+ if (&x < ptr)
return GROWS_TOWARDS_LOW_ADDRESSES;
+ if (ptr < &x)
+ return GROWS_TOWARDS_HIGH_ADDRESSES;
RAW_CHECK(0, ""); // Couldn't determine the stack direction.
@@ -597,7 +594,7 @@ static void RegisterStackLocked(const void* top_ptr) {
// make sure stack_direction is initialized
if (stack_direction == UNKNOWN_DIRECTION) {
- stack_direction = GetStackDirection();
+ stack_direction = GetStackDirection(&top);
}
// Find memory region with this stack
@@ -1454,7 +1451,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) {
//----------------------------------------------------------------------
char* HeapLeakChecker::MakeProfileNameLocked() {
- RAW_DCHECK(lock_.IsHeld(), "");
+ RAW_DCHECK(lock_->IsHeld(), "");
RAW_DCHECK(heap_checker_lock.IsHeld(), "");
const int len = profile_name_prefix->size() + strlen(name_) + 5 +
strlen(HeapProfileTable::kFileExt) + 1;
@@ -1596,14 +1593,23 @@ static void SuggestPprofCommand(const char* pprof_file_arg) {
);
}
-bool HeapLeakChecker::DoNoLeaks(CheckType check_type,
- CheckFullness fullness,
- ReportMode report_mode) {
+bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
SpinLockHolder l(lock_);
// The locking also helps us keep the messages
// for the two checks close together.
SpinLockHolder al(&alignment_checker_lock);
+ // thread-safe: protected by alignment_checker_lock
+ static bool have_disabled_hooks_for_symbolize = false;
+ // Once we've checked for leaks and symbolized the results once, it's
+ // not safe to do it again. This is because in order to symbolize
+ // safely, we had to disable all the malloc hooks here, so we no
+ // longer can be confident we've collected all the data we need.
+ if (have_disabled_hooks_for_symbolize) {
+ RAW_LOG(FATAL, "Must not call heap leak checker manually after "
+ " program-exit's automatic check.");
+ }
+
HeapProfileTable::Snapshot* leaks = NULL;
char* pprof_file = NULL;
@@ -1709,7 +1715,20 @@ bool HeapLeakChecker::DoNoLeaks(CheckType check_type,
int64(stats.allocs - stats.frees),
int64(stats.alloc_size - stats.free_size));
} else {
- leaks->ReportLeaks(name_, pprof_file);
+ if (should_symbolize == SYMBOLIZE) {
+ // To turn addresses into symbols, we need to fork, which is a
+ // problem if both parent and child end up trying to call the
+ // same malloc-hooks we've set up, at the same time. To avoid
+ // trouble, we turn off the hooks before symbolizing. Note that
+ // this makes it unsafe to ever leak-report again! Luckily, we
+ // typically only want to report once in a program's run, at the
+ // very end.
+ CancelInitialMallocHooks();
+ have_disabled_hooks_for_symbolize = true;
+ leaks->ReportLeaks(name_, pprof_file, true); // true = should_symbolize
+ } else {
+ leaks->ReportLeaks(name_, pprof_file, false);
+ }
if (FLAGS_heap_check_identify_leaks) {
leaks->ReportIndividualObjects();
}
@@ -1854,7 +1873,6 @@ static bool internal_init_start_has_run = false;
// (ignore more)
FLAGS_heap_check_after_destructors = false; // to after cleanup
// (most data is live)
- FLAGS_heap_check_strict_check = false; // < profile check (ignore more)
FLAGS_heap_check_ignore_thread_live = true; // ignore all live
FLAGS_heap_check_ignore_global_live = true; // ignore all live
} else if (FLAGS_heap_check == "normal") {
@@ -1862,7 +1880,6 @@ static bool internal_init_start_has_run = false;
FLAGS_heap_check_before_constructors = true; // from no profile (fast)
FLAGS_heap_check_after_destructors = false; // to after cleanup
// (most data is live)
- FLAGS_heap_check_strict_check = true; // == profile check (fast)
FLAGS_heap_check_ignore_thread_live = true; // ignore all live
FLAGS_heap_check_ignore_global_live = true; // ignore all live
} else if (FLAGS_heap_check == "strict") {
@@ -1871,7 +1888,6 @@ static bool internal_init_start_has_run = false;
FLAGS_heap_check_before_constructors = true; // from no profile (fast)
FLAGS_heap_check_after_destructors = true; // to after destructors
// (less data live)
- FLAGS_heap_check_strict_check = true; // == profile check (fast)
FLAGS_heap_check_ignore_thread_live = true; // ignore all live
FLAGS_heap_check_ignore_global_live = true; // ignore all live
} else if (FLAGS_heap_check == "draconian") {
@@ -1879,7 +1895,6 @@ static bool internal_init_start_has_run = false;
FLAGS_heap_check_before_constructors = true; // from no profile (fast)
FLAGS_heap_check_after_destructors = true; // to after destructors
// (need them)
- FLAGS_heap_check_strict_check = true; // == profile check (fast)
FLAGS_heap_check_ignore_thread_live = false; // no live flood (stricter)
FLAGS_heap_check_ignore_global_live = false; // no live flood (stricter)
} else if (FLAGS_heap_check == "as-is") {
@@ -1983,6 +1998,7 @@ bool HeapLeakChecker::DoMainHeapCheck() {
RAW_DCHECK(heap_checker_pid == getpid(), "");
do_main_heap_check = false; // will do it now; no need to do it more
}
+
if (!NoGlobalLeaks()) {
if (FLAGS_heap_check_identify_leaks) {
RAW_LOG(FATAL, "Whole-program memory leaks found.");
@@ -2005,15 +2021,14 @@ bool HeapLeakChecker::NoGlobalLeaks() {
// we never delete or change main_heap_checker once it's set:
HeapLeakChecker* main_hc = GlobalChecker();
if (main_hc) {
- CheckType check_type = FLAGS_heap_check_strict_check ? SAME_HEAP : NO_LEAKS;
- if (FLAGS_heap_check_before_constructors) check_type = SAME_HEAP;
- // NO_LEAKS here just would make it slower in this case
- // (we don't use the starting profile anyway)
- CheckFullness fullness = check_type == NO_LEAKS ? USE_PPROF : USE_COUNTS;
- // use pprof if it can help ignore false leaks
- ReportMode report_mode = FLAGS_heap_check_report ? PPROF_REPORT : NO_REPORT;
RAW_VLOG(1, "Checking for whole-program memory leaks");
- return main_hc->DoNoLeaks(check_type, fullness, report_mode);
+ // The program is over, so it's safe to symbolize addresses (which
+ // requires a fork) because no serious work is expected to be done
+ // after this. Symbolizing is really useful -- knowing what
+ // function has a leak is better than knowing just an address --
+ // and while we can only safely symbolize once in a program run,
+ // now is the time (after all, there's no "later" that would be better).
+ return main_hc->DoNoLeaks(SYMBOLIZE);
}
return true;
}
@@ -2034,9 +2049,6 @@ void HeapLeakChecker::CancelGlobalCheck() {
static bool in_initial_malloc_hook = false;
-// Cancel our InitialMallocHook_* if present.
-static void CancelInitialMallocHooks(); // defined below
-
#ifdef HAVE___ATTRIBUTE__ // we need __attribute__((weak)) for this to work
#define INSTALLED_INITIAL_MALLOC_HOOKS
diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc
index aaa4a2f..4d0ad8b 100644
--- a/src/heap-profile-table.cc
+++ b/src/heap-profile-table.cc
@@ -306,7 +306,7 @@ int HeapProfileTable::UnparseBucket(const Bucket& b,
return buflen;
}
-HeapProfileTable::Bucket**
+HeapProfileTable::Bucket**
HeapProfileTable::MakeSortedBucketList() const {
Bucket** list =
reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_));
@@ -602,7 +602,8 @@ static bool Symbolize(void *pc, char *out, int out_size) {
}
void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name,
- const char* filename) {
+ const char* filename,
+ bool should_symbolize) {
// This is only used by the heap leak checker, but is intimately
// tied to the allocation map that belongs in this module and is
// therefore placed here.
@@ -644,7 +645,8 @@ void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name,
for (int j = 0; j < e.bucket->depth; j++) {
const void* pc = e.bucket->stack[j];
const char* sym;
- if (Symbolize(const_cast<void*>(pc), sym_buffer, sizeof(sym_buffer))) {
+ if (should_symbolize &&
+ Symbolize(const_cast<void*>(pc), sym_buffer, sizeof(sym_buffer))) {
sym = sym_buffer;
} else {
sym = "";
diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h
index acbe14b..92d237e 100644
--- a/src/heap-profile-table.h
+++ b/src/heap-profile-table.h
@@ -335,9 +335,12 @@ class HeapProfileTable::Snapshot {
// Report anything in this snapshot as a leak.
// May use new/delete for temporary storage.
+ // If should_symbolize is true, will fork (which is not threadsafe)
+ // to turn addresses into symbol names. Set to false for maximum safety.
// Also writes a heap profile to "filename" that contains
// all of the objects in this snapshot.
- void ReportLeaks(const char* checker_name, const char* filename);
+ void ReportLeaks(const char* checker_name, const char* filename,
+ bool should_symbolize);
// Report the addresses of all leaked objects.
// May use new/delete for temporary storage.
diff --git a/src/page_heap_allocator.h b/src/page_heap_allocator.h
index 1911bc5..20e1ab1 100644
--- a/src/page_heap_allocator.h
+++ b/src/page_heap_allocator.h
@@ -63,7 +63,11 @@ class PageHeapAllocator {
if (free_avail_ < kAlignedSize) {
// Need more room
free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
- CHECK_CONDITION(free_area_ != NULL);
+ if (free_area_ == NULL) {
+ CRASH("FATAL ERROR: Out of memory trying to allocate internal "
+ "tcmalloc data (%d bytes, object-size %d)\n",
+ kAllocIncrement, static_cast<int>(sizeof(T)));
+ }
free_avail_ = kAllocIncrement;
}
result = free_area_;
diff --git a/src/pprof b/src/pprof
index e3d0907..f23786d 100755
--- a/src/pprof
+++ b/src/pprof
@@ -72,7 +72,7 @@ use strict;
use warnings;
use Getopt::Long;
-my $PPROF_VERSION = "1.1";
+my $PPROF_VERSION = "1.2";
# These are the object tools we use which can come from a
# user-specified location using --tools, from the PPROF_TOOLS
@@ -649,6 +649,7 @@ sub InteractiveMode {
while (1) {
print "(pprof) ";
$_ = <STDIN>;
+ last if ! defined $_ ;
s/\r//g; # turn windows-looking lines into unix-looking lines
# Save some flags that might be reset by InteractiveCommand()
@@ -919,7 +920,7 @@ sub PrintCallgrind {
map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
[$_, $1, $2] }
keys %$calls ) {
- my $count = $calls->{$call};
+ my $count = int($calls->{$call});
$call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
my ( $caller_file, $caller_line, $caller_function,
$callee_file, $callee_line, $callee_function ) =
@@ -1714,6 +1715,22 @@ sub IsSecondPcAlwaysTheSame {
return $second_pc;
}
+sub ExtractSymbolLocation {
+ my $symbols = shift;
+ my $address = shift;
+ # 'addr2line' outputs "??:0" for unknown locations; we do the
+ # same to be consistent.
+ my $location = "??:0:unknown";
+ if (exists $symbols->{$address}) {
+ my $file = $symbols->{$address}->[1];
+ if ($file eq "?") {
+ $file = "??:0"
+ }
+ $location = $file . ":" . $symbols->{$address}->[0];
+ }
+ return $location;
+}
+
# Extracts a graph of calls.
sub ExtractCalls {
my $symbols = shift;
@@ -1722,20 +1739,13 @@ sub ExtractCalls {
my $calls = {};
while( my ($stack_trace, $count) = each %$profile ) {
my @address = split(/\n/, $stack_trace);
+ my $destination = ExtractSymbolLocation($symbols, $address[0]);
+ AddEntry($calls, $destination, $count);
for (my $i = 1; $i <= $#address; $i++) {
- # TODO(csilvers): what should we do if $addresses[$i-1] doesn't exist?
- if (exists $symbols->{$address[$i]}) {
- my $source = $symbols->{$address[$i]}->[1] . ":" .
- $symbols->{$address[$i]}->[0];
- my $destination = $symbols->{$address[$i-1]}->[1] . ":" .
- $symbols->{$address[$i-1]}->[0];
- my $call = "$source -> $destination";
- AddEntry($calls, $call, $count);
-
- if ($i == 1) {
- AddEntry($calls, $destination, $count);
- }
- }
+ my $source = ExtractSymbolLocation($symbols, $address[$i]);
+ my $call = "$source -> $destination";
+ AddEntry($calls, $call, $count);
+ $destination = $source;
}
}
@@ -2938,7 +2948,7 @@ sub ParseLibraries {
my $finish;
my $offset;
my $lib;
- if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|exe)(\.\d+)*\w*)/i) {
+ if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib)(\.\d+)*\w*)/i) {
# Full line from /proc/self/maps. Example:
# 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so
$start = HexExtend($1);
@@ -3250,6 +3260,7 @@ sub GetLineNumbersViaAddr2Line {
$count++;
}
close(SYMBOLS);
+ return $count;
}
sub GetLineNumbers {
@@ -3268,20 +3279,15 @@ sub GetLineNumbers {
# Pass to addr2line
my $addr2line = $obj_tool_map{"addr2line"};
- GetLineNumbersViaAddr2Line("$addr2line -f -C -e $image",
- $pclist,
- $symbols);
-
- # If the executable is an MS Windows PDB-format executable, we'll
- # have set up obj_tool_map{"addr2line_pdb"}. In this case, we
- # actually want to use both unix addr2line and windows-specific
- # addr2line_pdb, since PDB-format executables can apparently include
- # dwarf .o files.
+ my @addr2line_commands = ("$addr2line -f -C -e $image");
if (exists $obj_tool_map{"addr2line_pdb"}) {
my $addr2line_pdb = $obj_tool_map{"addr2line_pdb"};
- GetLineNumbersViaAddr2Line("$addr2line_pdb --demangle -f -C -e $image",
- $pclist,
- $symbols);
+ push(@addr2line_commands, "$addr2line_pdb --demangle -f -C -e $image");
+ }
+ foreach my $addr2line_command (@addr2line_commands) {
+ if (GetLineNumbersViaAddr2Line("$addr2line_command", $pclist, $symbols)) {
+ last;
+ }
}
}
@@ -3550,16 +3556,33 @@ sub GetProcedureBoundaries {
my $cppfilt = $obj_tool_map{"c++filt"};
# nm can fail for two reasons: 1) $image isn't a debug library; 2) nm
- # binary doesn't support --demangle. For the first, we try with -D
- # to at least get *exported* symbols. For the second, we use c++filt
- # instead of --demangle. (c++filt is less reliable though, because it
- # might interpret nm meta-data as c++ symbols and try to demangle it :-/)
- my @nm_commands = ("$nm -n --demangle $image 2>/dev/null",
- "$nm -n $image 2>&1 | $cppfilt",
- "$nm -D -n --demangle $image 2>/dev/null",
- "$nm -D -n $image 2>&1 | $cppfilt",
- "$nm -n $image 2>/dev/null",
- "$nm -D -n $image 2>/dev/null");
+ # binary doesn't support --demangle. In addition, for OS X we need
+ # to use the -f flag to get 'flat' nm output (otherwise we don't sort
+ # properly and get incorrect results). Unfortunately, GNU nm uses -f
+ # in an incompatible way. So first we test whether our nm supports
+ # --demangle and -f.
+ my $demangle_flag = "";
+ my $cppfilt_flag = "";
+ if (system("$nm --demangle $image >/dev/null 2>&1") == 0) {
+ # In this mode, we do "nm --demangle <foo>"
+ $demangle_flag = "--demangle";
+ $cppfilt_flag = "";
+ } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) {
+ # In this mode, we do "nm <foo> | c++filt"
+ $cppfilt_flag = " | $cppfilt";
+ };
+ my $flatten_flag = "";
+ if (system("$nm -f $image >/dev/null 2>&1") == 0) {
+ $flatten_flag = "-f";
+ }
+
+ # Finally, in the case $imagie isn't a debug library, we try again with
+ # -D to at least get *exported* symbols. If we can't use --demangle,
+ # we use c++filt instead, if it exists on this system.
+ my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
+ " $image 2>/dev/null $cppfilt_flag",
+ "$nm -D -n $flatten_flag $demangle_flag" .
+ " $image 2>/dev/null $cppfilt_flag");
# If the executable is an MS Windows PDB-format executable, we'll
# have set up obj_tool_map("nm_pdb"). In this case, we actually
# want to use both unix nm and windows-specific nm_pdb, since
diff --git a/src/profile-handler.cc b/src/profile-handler.cc
new file mode 100644
index 0000000..0a9f54c
--- /dev/null
+++ b/src/profile-handler.cc
@@ -0,0 +1,498 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+// Nabeel Mian
+//
+// Implements management of profile timers and the corresponding signal handler.
+
+#include "config.h"
+#include "profile-handler.h"
+
+#if !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/time.h>
+
+#include <list>
+#include <string>
+
+#include "base/dynamic_annotations.h"
+#include "base/logging.h"
+#include "base/spinlock.h"
+#include "maybe_threads.h"
+
+using std::list;
+using std::string;
+
+// This structure is used by ProfileHandlerRegisterCallback and
+// ProfileHandlerUnregisterCallback as a handle to a registered callback.
+struct ProfileHandlerToken {
+ // Sets the callback and associated arg.
+ ProfileHandlerToken(ProfileHandlerCallback cb, void* cb_arg)
+ : callback(cb),
+ callback_arg(cb_arg) {
+ }
+
+ // Callback function to be invoked on receiving a profile timer interrupt.
+ ProfileHandlerCallback callback;
+ // Argument for the callback function.
+ void* callback_arg;
+};
+
+// This class manages profile timers and associated signal handler. This is a
+// a singleton.
+class ProfileHandler {
+ public:
+ // Registers the current thread with the profile handler. On systems which
+ // have a separate interval timer for each thread, this function starts the
+ // timer for the current thread.
+ //
+ // The function also attempts to determine whether or not timers are shared by
+ // all threads in the process. (With LinuxThreads, and with NPTL on some
+ // Linux kernel versions, each thread has separate timers.)
+ //
+ // Prior to determining whether timers are shared, this function will
+ // unconditionally start the timer. However, if this function determines
+ // that timers are shared, then it will stop the timer if no callbacks are
+ // currently registered.
+ void RegisterThread();
+
+ // Registers a callback routine to receive profile timer ticks. The returned
+ // token is to be used when unregistering this callback and must not be
+ // deleted by the caller. Registration of the first callback enables the
+ // SIGPROF handler.
+ ProfileHandlerToken* RegisterCallback(ProfileHandlerCallback callback,
+ void* callback_arg);
+
+ // Unregisters a previously registered callback. Expects the token returned
+ // by the corresponding RegisterCallback routine. Unregistering the last
+ // callback disables the SIGPROF handler.
+ void UnregisterCallback(ProfileHandlerToken* token);
+
+ // Unregisters all the callbacks, stops the timer if shared, disables the
+ // SIGPROF handler and clears the timer_sharing_ state.
+ void Reset();
+
+ // Gets the current state of profile handler.
+ void GetState(ProfileHandlerState* state);
+
+ // Initializes and returns the ProfileHandler singleton.
+ static ProfileHandler* Instance();
+
+ private:
+ ProfileHandler();
+ ~ProfileHandler();
+
+ // Largest allowed frequency.
+ static const int32 kMaxFrequency = 4000;
+ // Default frequency.
+ static const int32 kDefaultFrequency = 100;
+
+ // ProfileHandler singleton.
+ static ProfileHandler* instance_;
+
+ // pthread_once_t for one time initialization of ProfileHandler singleton.
+ static pthread_once_t once_;
+
+ // Initializes the ProfileHandler singleton via GoogleOnceInit.
+ static void Init();
+
+ // Counts the number of SIGPROF interrupts received.
+ int64 interrupts_ GUARDED_BY(signal_lock_);
+
+ // SIGPROF interrupt frequency, read-only after construction.
+ int32 frequency_;
+
+ // Counts the number of callbacks registered.
+ int32 callback_count_ GUARDED_BY(control_lock_);
+
+ // Whether or not the threading system provides interval timers that are
+ // shared by all threads in a process.
+ enum {
+ // No timer initialization attempted yet.
+ TIMERS_UNTOUCHED,
+ // First thread has registered and set timer.
+ TIMERS_ONE_SET,
+ // Timers are shared by all threads.
+ TIMERS_SHARED,
+ // Timers are separate in each thread.
+ TIMERS_SEPARATE
+ } timer_sharing_ GUARDED_BY(control_lock_);
+
+ // This lock serializes the registration of threads and protects the
+ // callbacks_ list below.
+ // Locking order:
+ // In the context of a signal handler, acquire signal_lock_ to walk the
+ // callback list. Otherwise, acquire control_lock_, disable the signal
+ // handler and then acquire signal_lock_.
+ SpinLock control_lock_ ACQUIRED_BEFORE(signal_lock_);
+ SpinLock signal_lock_;
+
+ // Holds the list of registered callbacks. We expect the list to be pretty
+ // small. Currently, the cpu profiler (base/profiler) and thread module
+ // (base/thread.h) are the only two components registering callbacks.
+ // Following are the locking requirements for callbacks_:
+ // For read-write access outside the SIGPROF handler:
+ // - Acquire control_lock_
+ // - Disable SIGPROF handler.
+ // - Acquire signal_lock_
+ // For read-only access in the context of SIGPROF handler
+ // (Read-write access is *not allowed* in the SIGPROF handler)
+ // - Acquire signal_lock_
+ // For read-only access outside SIGPROF handler:
+ // - Acquire control_lock_
+ typedef list<ProfileHandlerToken*> CallbackList;
+ typedef CallbackList::iterator CallbackIterator;
+ CallbackList callbacks_ GUARDED_BY(signal_lock_);
+
+ // Starts the interval timer. If the thread library shares timers between
+ // threads, this function starts the shared timer. Otherwise, this will start
+ // the timer in the current thread.
+ void StartTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+ // Stops the interval timer. If the thread library shares timers between
+ // threads, this fucntion stops the shared timer. Otherwise, this will stop
+ // the timer in the current thread.
+ void StopTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+ // Returns true if the profile interval timer is enabled in the current
+ // thread. This actually checks the kernel's interval timer setting. (It is
+ // used to detect whether timers are shared or separate.)
+ bool IsTimerRunning() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+ // Sets the timer interrupt signal handler.
+ void EnableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+ // Disables (ignores) the timer interrupt signal.
+ void DisableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+ // SIGPROF handler. Iterate over and call all the registered callbacks.
+ static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext);
+
+ DISALLOW_EVIL_CONSTRUCTORS(ProfileHandler);
+};
+
+ProfileHandler* ProfileHandler::instance_ = NULL;
+pthread_once_t ProfileHandler::once_ = PTHREAD_ONCE_INIT;
+
+const int32 ProfileHandler::kMaxFrequency;
+const int32 ProfileHandler::kDefaultFrequency;
+
+// If we are LD_PRELOAD-ed against a non-pthreads app, then
+// pthread_once won't be defined. We declare it here, for that
+// case (with weak linkage) which will cause the non-definition to
+// resolve to NULL. We can then check for NULL or not in Instance.
+#ifndef __THROW // I guess we're not on a glibc system
+# define __THROW // __THROW is just an optimization, so ok to make it ""
+#endif
+extern "C" int pthread_once(pthread_once_t *, void (*)(void))
+ __THROW ATTRIBUTE_WEAK;
+
+void ProfileHandler::Init() {
+ instance_ = new ProfileHandler();
+}
+
+ProfileHandler* ProfileHandler::Instance() {
+ if (pthread_once) {
+ pthread_once(&once_, Init);
+ }
+ if (instance_ == NULL) {
+ // This will be true on systems that don't link in pthreads,
+ // including on FreeBSD where pthread_once has a non-zero address
+ // (but doesn't do anything) even when pthreads isn't linked in.
+ Init();
+ assert(instance_ != NULL);
+ }
+ return instance_;
+}
+
+ProfileHandler::ProfileHandler()
+ : interrupts_(0),
+ callback_count_(0),
+ timer_sharing_(TIMERS_UNTOUCHED) {
+ SpinLockHolder cl(&control_lock_);
+ // Get frequency of interrupts (if specified)
+ char junk;
+ const char* fr = getenv("CPUPROFILE_FREQUENCY");
+ if (fr != NULL && (sscanf(fr, "%u%c", &frequency_, &junk) == 1) &&
+ (frequency_ > 0)) {
+ // Limit to kMaxFrequency
+ frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_;
+ } else {
+ frequency_ = kDefaultFrequency;
+ }
+
+ // Ignore signals until we decide to turn profiling on. (Paranoia;
+ // should already be ignored.)
+ DisableHandler();
+}
+
+ProfileHandler::~ProfileHandler() {
+ Reset();
+}
+
+void ProfileHandler::RegisterThread() {
+ SpinLockHolder cl(&control_lock_);
+
+ // We try to detect whether timers are being shared by setting a
+ // timer in the first call to this function, then checking whether
+ // it's set in the second call.
+ //
+ // Note that this detection method requires that the first two calls
+ // to RegisterThread must be made from different threads. (Subsequent
+ // calls will see timer_sharing_ set to either TIMERS_SEPARATE or
+ // TIMERS_SHARED, and won't try to detect the timer sharing type.)
+ //
+ // Also note that if timer settings were inherited across new thread
+ // creation but *not* shared, this approach wouldn't work. That's
+ // not an issue for any Linux threading implementation, and should
+ // not be a problem for a POSIX-compliant threads implementation.
+ switch (timer_sharing_) {
+ case TIMERS_UNTOUCHED:
+ StartTimer();
+ timer_sharing_ = TIMERS_ONE_SET;
+ break;
+ case TIMERS_ONE_SET:
+ // If the timer is running, that means that the main thread's
+ // timer setup is seen in this (second) thread -- and therefore
+ // that timers are shared.
+ if (IsTimerRunning()) {
+ timer_sharing_ = TIMERS_SHARED;
+ // If callback is already registered, we have to keep the timer
+ // running. If not, we disable the timer here.
+ if (callback_count_ == 0) {
+ StopTimer();
+ }
+ } else {
+ timer_sharing_ = TIMERS_SEPARATE;
+ StartTimer();
+ }
+ break;
+ case TIMERS_SHARED:
+ // Nothing needed.
+ break;
+ case TIMERS_SEPARATE:
+ StartTimer();
+ break;
+ }
+}
+
+ProfileHandlerToken* ProfileHandler::RegisterCallback(
+ ProfileHandlerCallback callback, void* callback_arg) {
+ ProfileHandlerToken* token = new ProfileHandlerToken(callback, callback_arg);
+
+ SpinLockHolder cl(&control_lock_);
+ DisableHandler();
+ {
+ SpinLockHolder sl(&signal_lock_);
+ callbacks_.push_back(token);
+ }
+ // Start the timer if timer is shared and this is a first callback.
+ if ((callback_count_ == 0) && (timer_sharing_ == TIMERS_SHARED)) {
+ StartTimer();
+ }
+ ++callback_count_;
+ EnableHandler();
+ return token;
+}
+
+void ProfileHandler::UnregisterCallback(ProfileHandlerToken* token) {
+ SpinLockHolder cl(&control_lock_);
+ for (CallbackIterator it = callbacks_.begin(); it != callbacks_.end();
+ ++it) {
+ if ((*it) == token) {
+ RAW_CHECK(callback_count_ > 0, "Invalid callback count");
+ DisableHandler();
+ {
+ SpinLockHolder sl(&signal_lock_);
+ delete *it;
+ callbacks_.erase(it);
+ }
+ --callback_count_;
+ if (callback_count_ > 0) {
+ EnableHandler();
+ } else if (timer_sharing_ == TIMERS_SHARED) {
+ StopTimer();
+ }
+ return;
+ }
+ }
+ // Unknown token.
+ RAW_LOG(FATAL, "Invalid token");
+}
+
+void ProfileHandler::Reset() {
+ SpinLockHolder cl(&control_lock_);
+ DisableHandler();
+ {
+ SpinLockHolder sl(&signal_lock_);
+ CallbackIterator it = callbacks_.begin();
+ while (it != callbacks_.end()) {
+ CallbackIterator tmp = it;
+ ++it;
+ delete *tmp;
+ callbacks_.erase(tmp);
+ }
+ }
+ callback_count_ = 0;
+ if (timer_sharing_ == TIMERS_SHARED) {
+ StopTimer();
+ }
+ timer_sharing_ = TIMERS_UNTOUCHED;
+}
+
+void ProfileHandler::GetState(ProfileHandlerState* state) {
+ SpinLockHolder cl(&control_lock_);
+ DisableHandler();
+ {
+ SpinLockHolder sl(&signal_lock_); // Protects interrupts_.
+ state->interrupts = interrupts_;
+ }
+ if (callback_count_ > 0) {
+ EnableHandler();
+ }
+ state->frequency = frequency_;
+ state->callback_count = callback_count_;
+}
+
+void ProfileHandler::StartTimer() {
+ struct itimerval timer;
+ timer.it_interval.tv_sec = 0;
+ timer.it_interval.tv_usec = 1000000 / frequency_;
+ timer.it_value = timer.it_interval;
+ setitimer(ITIMER_PROF, &timer, 0);
+}
+
+void ProfileHandler::StopTimer() {
+ struct itimerval timer;
+ memset(&timer, 0, sizeof timer);
+ setitimer(ITIMER_PROF, &timer, 0);
+}
+
+bool ProfileHandler::IsTimerRunning() {
+ struct itimerval current_timer;
+ RAW_CHECK(0 == getitimer(ITIMER_PROF, &current_timer), "getitimer");
+ return (current_timer.it_value.tv_sec != 0 ||
+ current_timer.it_value.tv_usec != 0);
+}
+
+void ProfileHandler::EnableHandler() {
+ struct sigaction sa;
+ sa.sa_sigaction = SignalHandler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (enable)");
+}
+
+void ProfileHandler::DisableHandler() {
+ struct sigaction sa;
+ sa.sa_handler = SIG_IGN;
+ sa.sa_flags = SA_RESTART;
+ sigemptyset(&sa.sa_mask);
+ RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (disable)");
+}
+
+void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) {
+ int saved_errno = errno;
+ RAW_CHECK(instance_ != NULL, "ProfileHandler is not initialized");
+ {
+ SpinLockHolder sl(&instance_->signal_lock_);
+ ++instance_->interrupts_;
+ for (CallbackIterator it = instance_->callbacks_.begin();
+ it != instance_->callbacks_.end();
+ ++it) {
+ (*it)->callback(sig, sinfo, ucontext, (*it)->callback_arg);
+ }
+ }
+ errno = saved_errno;
+}
+
+// The sole purpose of this class is to initialize the ProfileHandler singleton
+// when the global static objects are created. Note that the main thread will
+// be registered at this time.
+class ProfileHandlerInitializer {
+ public:
+ ProfileHandlerInitializer() {
+ ProfileHandler::Instance()->RegisterThread();
+ }
+
+ private:
+ DISALLOW_EVIL_CONSTRUCTORS(ProfileHandlerInitializer);
+};
+// ProfileHandlerInitializer singleton
+static ProfileHandlerInitializer profile_handler_initializer;
+
+extern "C" void ProfileHandlerRegisterThread() {
+ ProfileHandler::Instance()->RegisterThread();
+}
+
+extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback(
+ ProfileHandlerCallback callback, void* callback_arg) {
+ return ProfileHandler::Instance()->RegisterCallback(callback, callback_arg);
+}
+
+extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) {
+ ProfileHandler::Instance()->UnregisterCallback(token);
+}
+
+extern "C" void ProfileHandlerReset() {
+ return ProfileHandler::Instance()->Reset();
+}
+
+extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) {
+ ProfileHandler::Instance()->GetState(state);
+}
+
+#else // OS_CYGWIN
+
+// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't
+// work as well for profiling, and also interferes with alarm(). Because of
+// these issues, unless a specific need is identified, profiler support is
+// disabled under Cygwin.
+extern "C" void ProfileHandlerRegisterThread() {
+}
+
+extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback(
+ ProfileHandlerCallback callback, void* callback_arg) {
+ return NULL;
+}
+
+extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) {
+}
+
+extern "C" void ProfileHandlerReset() {
+}
+
+extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) {
+}
+
+#endif // OS_CYGWIN
diff --git a/src/profile-handler.h b/src/profile-handler.h
new file mode 100644
index 0000000..1cbe253
--- /dev/null
+++ b/src/profile-handler.h
@@ -0,0 +1,147 @@
+/* Copyright (c) 2009, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Nabeel Mian
+ *
+ * This module manages the cpu profile timers and the associated interrupt
+ * handler. When enabled, all registered threads in the program are profiled.
+ * (Note: if using linux 2.4 or earlier, you must use the Thread class, in
+ * google3/thread, to ensure all threads are profiled.)
+ *
+ * Any component interested in receiving a profile timer interrupt can do so by
+ * registering a callback. All registered callbacks must be async-signal-safe.
+ *
+ * Note: This module requires the sole ownership of ITIMER_PROF timer and the
+ * SIGPROF signal.
+ */
+
+#ifndef BASE_PROFILE_HANDLER_H_
+#define BASE_PROFILE_HANDLER_H_
+
+#include "config.h"
+#include <signal.h>
+#ifdef COMPILER_MSVC
+#include "conflict-signal.h"
+#endif
+#include "base/basictypes.h"
+
+/* All this code should be usable from within C apps. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Forward declaration. */
+struct ProfileHandlerToken;
+
+/*
+ * Callback function to be used with ProfilefHandlerRegisterCallback. This
+ * function will be called in the context of SIGPROF signal handler and must
+ * be async-signal-safe. The first three arguments are the values provided by
+ * the SIGPROF signal handler. We use void* to avoid using ucontext_t on
+ * non-POSIX systems.
+ *
+ * Requirements:
+ * - Callback must be async-signal-safe.
+ * - None of the functions in ProfileHandler are async-signal-safe. Therefore,
+ * callback function *must* not call any of the ProfileHandler functions.
+ * - Callback is not required to be re-entrant. At most one instance of
+ * callback can run at a time.
+ *
+ * Notes:
+ * - The SIGPROF signal handler saves and restores errno, so the callback
+ * doesn't need to.
+ * - Callback code *must* not acquire lock(s) to serialize access to data shared
+ * with the code outside the signal handler (callback must be
+ * async-signal-safe). If such a serialization is needed, follow the model
+ * used by profiler.cc:
+ *
+ * When code other than the signal handler modifies the shared data it must:
+ * - Acquire lock.
+ * - Unregister the callback with the ProfileHandler.
+ * - Modify shared data.
+ * - Re-register the callback.
+ * - Release lock.
+ * and the callback code gets a lockless, read-write access to the data.
+ */
+typedef void (*ProfileHandlerCallback)(int sig, siginfo_t* sig_info,
+ void* ucontext, void* callback_arg);
+
+/*
+ * Registers a new thread with profile handler and should be called only once
+ * per thread. The main thread is registered at program startup. This routine
+ * is called by the Thread module in google3/thread whenever a new thread is
+ * created. This function is not async-signal-safe.
+ */
+void ProfileHandlerRegisterThread();
+
+/*
+ * Registers a callback routine. This callback function will be called in the
+ * context of SIGPROF handler, so must be async-signal-safe. The returned token
+ * is to be used when unregistering this callback via
+ * ProfileHandlerUnregisterCallback. Registering the first callback enables
+ * the SIGPROF signal handler. Caller must not free the returned token. This
+ * function is not async-signal-safe.
+ */
+ProfileHandlerToken* ProfileHandlerRegisterCallback(
+ ProfileHandlerCallback callback, void* callback_arg);
+
+/*
+ * Unregisters a previously registered callback. Expects the token returned
+ * by the corresponding ProfileHandlerRegisterCallback and asserts that the
+ * passed token is valid. Unregistering the last callback disables the SIGPROF
+ * signal handler. It waits for the currently running callback to
+ * complete before returning. This function is not async-signal-safe.
+ */
+void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token);
+
+/*
+ * FOR TESTING ONLY
+ * Unregisters all the callbacks, stops the timers (if shared) and disables the
+ * SIGPROF handler. All the threads, including the main thread, need to be
+ * re-registered after this call. This function is not async-signal-safe.
+ */
+void ProfileHandlerReset();
+
+/*
+ * Stores profile handler's current state. This function is not
+ * async-signal-safe.
+ */
+struct ProfileHandlerState {
+ int32 frequency; /* Profiling frequency */
+ int32 callback_count; /* Number of callbacks registered */
+ int64 interrupts; /* Number of interrupts received */
+};
+void ProfileHandlerGetState(struct ProfileHandlerState* state);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* BASE_PROFILE_HANDLER_H_ */
diff --git a/src/profiledata.cc b/src/profiledata.cc
index e622b28..873100e 100644
--- a/src/profiledata.cc
+++ b/src/profiledata.cc
@@ -190,13 +190,25 @@ void ProfileData::Stop() {
// Dump "/proc/self/maps" so we get list of mapped shared libraries
DumpProcSelfMaps(out_);
- close(out_);
+ Reset();
fprintf(stderr, "PROFILE: interrupts/evictions/bytes = %d/%d/%" PRIuS "\n",
count_, evictions_, total_bytes_);
+}
+
+void ProfileData::Reset() {
+ if (!enabled()) {
+ return;
+ }
+
+ // Don't reset count_, evictions_, or total_bytes_ here. They're used
+ // by Stop to print information about the profile after reset, and are
+ // cleared by Start when starting a new profile.
+ close(out_);
delete[] hash_;
hash_ = 0;
delete[] evict_;
evict_ = 0;
+ num_evicted_ = 0;
free(fname_);
fname_ = 0;
start_time_ = 0;
diff --git a/src/profiledata.h b/src/profiledata.h
index 008c8a4..29bc1b7 100644
--- a/src/profiledata.h
+++ b/src/profiledata.h
@@ -60,11 +60,11 @@
// - 'Add' may be called from asynchronous signals, but is not
// re-entrant.
//
-// - None of 'Start', 'Stop', 'Flush', and 'Add' may be called at the
-// same time.
+// - None of 'Start', 'Stop', 'Reset', 'Flush', and 'Add' may be
+// called at the same time.
//
-// - 'Start' and 'Stop' should not be called while 'Enabled' or
-// 'GetCurrent' are running, and vice versa.
+// - 'Start', 'Stop', or 'Reset' should not be called while 'Enabled'
+// or 'GetCurrent' are running, and vice versa.
//
// A profiler which uses asyncronous signals to add samples will
// typically use two locks to protect this data structure:
@@ -72,7 +72,7 @@
// - A SpinLock which is held over all calls except for the 'Add'
// call made from the signal handler.
//
-// - A SpinLock which is held over calls to 'Start', 'Stop',
+// - A SpinLock which is held over calls to 'Start', 'Stop', 'Reset',
// 'Flush', and 'Add'. (This SpinLock should be acquired after
// the first SpinLock in all cases where both are needed.)
class ProfileData {
@@ -118,6 +118,10 @@ class ProfileData {
// data to disk.
void Stop();
+ // Stop data collection without writing anything else to disk, and
+ // discard any collected data.
+ void Reset();
+
// If data collection is enabled, record a sample with 'depth'
// entries from 'stack'. (depth must be > 0.) At most
// kMaxStackDepth stack entries will be recorded, starting with
diff --git a/src/profiler.cc b/src/profiler.cc
index 8675348..c51c7b2 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -58,15 +58,13 @@ typedef int ucontext_t; // just to quiet the compiler, mostly
#include "base/spinlock.h"
#include "base/sysinfo.h" /* for GetUniquePathFromEnv, etc */
#include "profiledata.h"
+#include "profile-handler.h"
#ifdef HAVE_CONFLICT_SIGNAL_H
#include "conflict-signal.h" /* used on msvc machines */
#endif
using std::string;
-DEFINE_string(cpu_profile, "",
- "Profile file name (used if CPUPROFILE env var not specified)");
-
// Collects up all profile data. This is a singleton, which is
// initialized by a constructor at startup.
class CpuProfiler {
@@ -87,94 +85,40 @@ class CpuProfiler {
void GetCurrentState(ProfilerState* state);
- // Register the current thread with the profiler. This should be
- // called only once per thread.
- //
- // The profiler attempts to determine whether or not timers are
- // shared by all threads in the process. (With LinuxThreads, and
- // with NPTL on some Linux kernel versions, each thread has separate
- // timers.)
- //
- // On systems which have a separate interval timer for each thread,
- // this function starts the timer for the current thread. Profiling
- // is disabled by ignoring the resulting signals, and enabled by
- // setting their handler to be prof_handler.
- //
- // Prior to determining whether timers are shared, this function
- // will unconditionally start the timer. However, if this function
- // determines that timers are shared, then it will stop the timer if
- // profiling is not currently enabled.
- void RegisterThread();
-
static CpuProfiler instance_;
private:
- static const int kMaxFrequency = 4000; // Largest allowed frequency
- static const int kDefaultFrequency = 100; // Default frequency
-
- // Sample frequency, read-only after construction.
- int frequency_;
-
- // These locks implement the locking requirements described in the
- // ProfileData documentation, specifically:
- //
- // control_lock_ is held all over all collector_ method calls except for
- // the 'Add' call made from the signal handler, to protect against
- // concurrent use of collector_'s control routines.
+ // This lock implements the locking requirements described in the ProfileData
+ // documentation, specifically:
//
- // signal_lock_ is held over calls to 'Start', 'Stop', 'Flush', and
- // 'Add', to protect against concurrent use of data collection and
- // writing routines. Code other than the signal handler must disable
- // the timer signal while holding signal_lock, to prevent deadlock.
- //
- // Locking order is control_lock_ first, and then signal_lock_.
- // signal_lock_ is acquired by the prof_handler without first
- // acquiring control_lock_.
- SpinLock control_lock_;
- SpinLock signal_lock_;
+ // lock_ is held all over all collector_ method calls except for the 'Add'
+ // call made from the signal handler, to protect against concurrent use of
+ // collector_'s control routines. Code other than signal handler must
+ // unregister the signal handler before calling any collector_ method.
+ // 'Add' method in the collector is protected by a guarantee from
+ // ProfileHandle that only one instance of prof_handler can run at a time.
+ SpinLock lock_;
ProfileData collector_;
- // Filter function and its argument, if any. (NULL means include
- // all samples). Set at start, read-only while running. Written
- // while holding both control_lock_ and signal_lock_, read and
- // executed under signal_lock_.
+ // Filter function and its argument, if any. (NULL means include all
+ // samples). Set at start, read-only while running. Written while holding
+ // lock_, read and executed in the context of SIGPROF interrupt.
int (*filter_)(void*);
void* filter_arg_;
- // Whether or not the threading system provides interval timers
- // that are shared by all threads in a process.
- enum {
- TIMERS_UNTOUCHED, // No timer initialization attempted yet.
- TIMERS_ONE_SET, // First thread has registered and set timer.
- TIMERS_SHARED, // Timers are shared by all threads.
- TIMERS_SEPARATE // Timers are separate in each thread.
- } timer_sharing_;
-
- // Start the interval timer used for profiling. If the thread
- // library shares timers between threads, this is used to enable and
- // disable the timer when starting and stopping profiling. If
- // timers are not shared, this is used to enable the timer in each
- // thread.
- void StartTimer();
-
- // Stop the interval timer used for profiling. Used only if the
- // thread library shares timers between threads.
- void StopTimer();
-
- // Returns true if the profiling interval timer enabled in the
- // current thread. This actually checks the kernel's interval timer
- // setting. (It is used to detect whether timers are shared or
- // separate.)
- bool IsTimerRunning();
-
- // Sets the timer interrupt signal handler to one that stores the pc.
- static void EnableHandler();
-
- // Disables (ignores) the timer interrupt signal.
- static void DisableHandler();
-
- // Signale handler that records the interrupted pc in the profile data
- static void prof_handler(int sig, siginfo_t*, void* signal_ucontext);
+ // Opague token returned by the profile handler. To be used when calling
+ // ProfileHandlerUnregisterCallback.
+ ProfileHandlerToken* prof_handler_token_;
+
+ // Sets up a callback to receive SIGPROF interrupt.
+ void EnableHandler();
+
+ // Disables receiving SIGPROF interrupt.
+ void DisableHandler();
+
+ // Signal handler that records the interrupted pc in the profile data.
+ static void prof_handler(int sig, siginfo_t*, void* signal_ucontext,
+ void* cpu_profiler);
};
// Profile data structure singleton: Constructor will check to see if
@@ -184,25 +128,10 @@ CpuProfiler CpuProfiler::instance_;
// Initialize profiling: activated if getenv("CPUPROFILE") exists.
CpuProfiler::CpuProfiler()
- : timer_sharing_(TIMERS_UNTOUCHED) {
- // Get frequency of interrupts (if specified)
- char junk;
- const char* fr = getenv("CPUPROFILE_FREQUENCY");
- if (fr != NULL && (sscanf(fr, "%d%c", &frequency_, &junk) == 1) &&
- (frequency_ > 0)) {
- // Limit to kMaxFrequency
- frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_;
- } else {
- frequency_ = kDefaultFrequency;
- }
-
- // Ignore signals until we decide to turn profiling on. (Paranoia;
- // should already be ignored.)
- DisableHandler();
-
- RegisterThread();
-
- // Should profiling be enabled automatically at start?
+ : prof_handler_token_(NULL) {
+ // TODO(cgd) Move this code *out* of the CpuProfile constructor into a
+ // separate object responsible for initialization. With ProfileHandler there
+ // is no need to limit the number of profilers.
char fname[PATH_MAX];
if (!GetUniquePathFromEnv("CPUPROFILE", fname)) {
return;
@@ -219,41 +148,26 @@ CpuProfiler::CpuProfiler()
}
}
-bool CpuProfiler::Start(const char* fname,
- const ProfilerOptions* options) {
- SpinLockHolder cl(&control_lock_);
+bool CpuProfiler::Start(const char* fname, const ProfilerOptions* options) {
+ SpinLockHolder cl(&lock_);
if (collector_.enabled()) {
return false;
}
- {
- // spin lock really is needed to protect init here, since it's
- // conceivable that prof_handler may still be running from a
- // previous profiler run. (For instance, if prof_handler just
- // started, had not grabbed the spinlock, then was switched out,
- // it might start again right now.) Any such late sample will be
- // recorded against the new profile, but there's no harm in that.
- SpinLockHolder sl(&signal_lock_);
-
- ProfileData::Options collector_options;
- collector_options.set_frequency(frequency_);
- if (!collector_.Start(fname, collector_options)) {
- return false;
- }
-
- filter_ = NULL;
- if (options != NULL && options->filter_in_thread != NULL) {
- filter_ = options->filter_in_thread;
- filter_arg_ = options->filter_in_thread_arg;
- }
-
- // Must unlock before setting prof_handler to avoid deadlock
- // with signal delivered to this thread.
+ ProfileHandlerState prof_handler_state;
+ ProfileHandlerGetState(&prof_handler_state);
+
+ ProfileData::Options collector_options;
+ collector_options.set_frequency(prof_handler_state.frequency);
+ if (!collector_.Start(fname, collector_options)) {
+ return false;
}
- if (timer_sharing_ == TIMERS_SHARED) {
- StartTimer();
+ filter_ = NULL;
+ if (options != NULL && options->filter_in_thread != NULL) {
+ filter_ = options->filter_in_thread;
+ filter_arg_ = options->filter_in_thread_arg;
}
// Setup handler for SIGPROF interrupts
@@ -268,55 +182,48 @@ CpuProfiler::~CpuProfiler() {
// Stop profiling and write out any collected profile data
void CpuProfiler::Stop() {
- SpinLockHolder cl(&control_lock_);
+ SpinLockHolder cl(&lock_);
if (!collector_.enabled()) {
return;
}
- // Ignore timer signals. Note that the handler may have just
- // started and might not have taken signal_lock_ yet. Holding
- // signal_lock_ below along with the semantics of collector_.Add()
- // (which does nothing if collection is not enabled) prevents that
- // late sample from causing a problem.
+ // Unregister prof_handler to stop receiving SIGPROF interrupts before
+ // stopping the collector.
DisableHandler();
- if (timer_sharing_ == TIMERS_SHARED) {
- StopTimer();
- }
-
- {
- SpinLockHolder sl(&signal_lock_);
- collector_.Stop();
- }
+ // DisableHandler waits for the currently running callback to complete and
+ // guarantees no future invocations. It is safe to stop the collector.
+ collector_.Stop();
}
void CpuProfiler::FlushTable() {
- SpinLockHolder cl(&control_lock_);
+ SpinLockHolder cl(&lock_);
if (!collector_.enabled()) {
return;
}
- // Disable timer signal while holding signal_lock_, to prevent deadlock
- // if we take a timer signal while flushing.
+ // Unregister prof_handler to stop receiving SIGPROF interrupts before
+ // flushing the profile data.
DisableHandler();
- {
- SpinLockHolder sl(&signal_lock_);
- collector_.FlushTable();
- }
+
+ // DisableHandler waits for the currently running callback to complete and
+ // guarantees no future invocations. It is safe to flush the profile data.
+ collector_.FlushTable();
+
EnableHandler();
}
bool CpuProfiler::Enabled() {
- SpinLockHolder cl(&control_lock_);
+ SpinLockHolder cl(&lock_);
return collector_.enabled();
}
void CpuProfiler::GetCurrentState(ProfilerState* state) {
ProfileData::State collector_state;
{
- SpinLockHolder cl(&control_lock_);
+ SpinLockHolder cl(&lock_);
collector_.GetCurrentState(&collector_state);
}
@@ -328,141 +235,56 @@ void CpuProfiler::GetCurrentState(ProfilerState* state) {
state->profile_name[buf_size-1] = '\0';
}
-void CpuProfiler::RegisterThread() {
- SpinLockHolder cl(&control_lock_);
-
- // We try to detect whether timers are being shared by setting a
- // timer in the first call to this function, then checking whether
- // it's set in the second call.
- //
- // Note that this detection method requires that the first two calls
- // to RegisterThread must be made from different threads. (Subsequent
- // calls will see timer_sharing_ set to either TIMERS_SEPARATE or
- // TIMERS_SHARED, and won't try to detect the timer sharing type.)
- //
- // Also note that if timer settings were inherited across new thread
- // creation but *not* shared, this approach wouldn't work. That's
- // not an issue for any Linux threading implementation, and should
- // not be a problem for a POSIX-compliant threads implementation.
- switch (timer_sharing_) {
- case TIMERS_UNTOUCHED:
- StartTimer();
- timer_sharing_ = TIMERS_ONE_SET;
- break;
- case TIMERS_ONE_SET:
- // If the timer is running, that means that the main thread's
- // timer setup is seen in this (second) thread -- and therefore
- // that timers are shared.
- if (IsTimerRunning()) {
- timer_sharing_ = TIMERS_SHARED;
- // If profiling has already been enabled, we have to keep the
- // timer running. If not, we disable the timer here and
- // re-enable it in start.
- if (!collector_.enabled()) {
- StopTimer();
- }
- } else {
- timer_sharing_ = TIMERS_SEPARATE;
- StartTimer();
- }
- break;
- case TIMERS_SHARED:
- // Nothing needed.
- break;
- case TIMERS_SEPARATE:
- StartTimer();
- break;
- }
-}
-
-void CpuProfiler::StartTimer() {
- // TODO: Randomize the initial interrupt value?
- // TODO: Randomize the inter-interrupt period on every interrupt?
- struct itimerval timer;
- timer.it_interval.tv_sec = 0;
- timer.it_interval.tv_usec = 1000000 / frequency_;
- timer.it_value = timer.it_interval;
- setitimer(ITIMER_PROF, &timer, 0);
-}
-
-void CpuProfiler::StopTimer() {
- struct itimerval timer;
- memset(&timer, 0, sizeof timer);
- setitimer(ITIMER_PROF, &timer, 0);
-}
-
-bool CpuProfiler::IsTimerRunning() {
- itimerval current_timer;
- RAW_CHECK(0 == getitimer(ITIMER_PROF, &current_timer), "getitimer failed");
- return (current_timer.it_value.tv_sec != 0 ||
- current_timer.it_value.tv_usec != 0);
-}
-
void CpuProfiler::EnableHandler() {
- struct sigaction sa;
- sa.sa_sigaction = prof_handler;
- sa.sa_flags = SA_RESTART | SA_SIGINFO;
- sigemptyset(&sa.sa_mask);
- RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed");
+ RAW_CHECK(prof_handler_token_ == NULL, "SIGPROF handler already registered");
+ prof_handler_token_ = ProfileHandlerRegisterCallback(prof_handler, this);
+ RAW_CHECK(prof_handler_token_ != NULL, "Failed to set up SIGPROF handler");
}
void CpuProfiler::DisableHandler() {
- struct sigaction sa;
- sa.sa_handler = SIG_IGN;
- sa.sa_flags = SA_RESTART;
- sigemptyset(&sa.sa_mask);
- RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed");
+ RAW_CHECK(prof_handler_token_ != NULL, "SIGPROF handler is not registered");
+ ProfileHandlerUnregisterCallback(prof_handler_token_);
+ prof_handler_token_ = NULL;
}
-// Signal handler that records the pc in the profile-data structure
-//
-// NOTE: it is possible for profiling to be disabled just as this
-// signal handler starts, before signal_lock_ is acquired. Therefore,
-// collector_.Add must check whether profiling is enabled before
-// trying to record any data. (See also comments in Start and Stop.)
-void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext) {
- int saved_errno = errno;
-
- // Hold the spin lock while we're gathering the trace because there's
- // no real harm in holding it and there's little point in releasing
- // and re-acquiring it. (We'll only be blocking Start, Stop, and
- // Flush.) We make sure to release it before restoring errno.
- {
- SpinLockHolder sl(&instance_.signal_lock_);
-
- if (instance_.filter_ == NULL ||
- (*instance_.filter_)(instance_.filter_arg_)) {
- void* stack[ProfileData::kMaxStackDepth];
-
- // The top-most active routine doesn't show up as a normal
- // frame, but as the "pc" value in the signal handler context.
- stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext));
-
- // We skip the top two stack trace entries (this function and one
- // signal handler frame) since they are artifacts of profiling and
- // should not be measured. Other profiling related frames may be
- // removed by "pprof" at analysis time. Instead of skipping the top
- // frames, we could skip nothing, but that would increase the
- // profile size unnecessarily.
- int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1,
- 2, signal_ucontext);
- depth++; // To account for pc value in stack[0];
-
- instance_.collector_.Add(depth, stack);
- }
+// Signal handler that records the pc in the profile-data structure. We do no
+// synchronization here. profile-handler.cc guarantees that at most one
+// instance of prof_handler() will run at a time. All other routines that
+// access the data touched by prof_handler() disable this signal handler before
+// accessing the data and therefore cannot execute concurrently with
+// prof_handler().
+void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext,
+ void* cpu_profiler) {
+ CpuProfiler* instance = static_cast<CpuProfiler*>(cpu_profiler);
+
+ if (instance->filter_ == NULL ||
+ (*instance->filter_)(instance->filter_arg_)) {
+ void* stack[ProfileData::kMaxStackDepth];
+
+ // The top-most active routine doesn't show up as a normal
+ // frame, but as the "pc" value in the signal handler context.
+ stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext));
+
+ // We skip the top two stack trace entries (this function and one
+ // signal handler frame) since they are artifacts of profiling and
+ // should not be measured. Other profiling related frames may be
+ // removed by "pprof" at analysis time. Instead of skipping the top
+ // frames, we could skip nothing, but that would increase the
+ // profile size unnecessarily.
+ int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1,
+ 2, signal_ucontext);
+ depth++; // To account for pc value in stack[0];
+
+ instance->collector_.Add(depth, stack);
}
-
- errno = saved_errno;
}
+#if !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+
extern "C" void ProfilerRegisterThread() {
- CpuProfiler::instance_.RegisterThread();
+ ProfileHandlerRegisterThread();
}
-// DEPRECATED routines
-extern "C" void ProfilerEnable() { }
-extern "C" void ProfilerDisable() { }
-
extern "C" void ProfilerFlush() {
CpuProfiler::instance_.FlushTable();
}
@@ -488,9 +310,27 @@ extern "C" void ProfilerGetCurrentState(ProfilerState* state) {
CpuProfiler::instance_.GetCurrentState(state);
}
+#else // OS_CYGWIN
-REGISTER_MODULE_INITIALIZER(profiler, {
- if (!FLAGS_cpu_profile.empty()) {
- ProfilerStart(FLAGS_cpu_profile.c_str());
- }
-});
+// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't
+// work as well for profiling, and also interferes with alarm(). Because of
+// these issues, unless a specific need is identified, profiler support is
+// disabled under Cygwin.
+extern "C" void ProfilerRegisterThread() { }
+extern "C" void ProfilerFlush() { }
+extern "C" int ProfilingIsEnabledForAllThreads() { return 0; }
+extern "C" int ProfilerStart(const char* fname) { return 0; }
+extern "C" int ProfilerStartWithOptions(const char *fname,
+ const ProfilerOptions *options) {
+ return 0;
+}
+extern "C" void ProfilerStop() { }
+extern "C" void ProfilerGetCurrentState(ProfilerState* state) {
+ memset(state, 0, sizeof(*state));
+}
+
+#endif // OS_CYGWIN
+
+// DEPRECATED routines
+extern "C" void ProfilerEnable() { }
+extern "C" void ProfilerDisable() { }
diff --git a/src/sampler.cc b/src/sampler.cc
index dda225c..a11b893 100755
--- a/src/sampler.cc
+++ b/src/sampler.cc
@@ -39,7 +39,7 @@
using std::min;
-// Twice the approximate gap between sampling actions.
+// The approximate gap in bytes between sampling actions.
// I.e., we take one sample approximately once every
// tcmalloc_sample_parameter bytes of allocation
// i.e. about once every 512KB.
@@ -59,9 +59,9 @@ namespace tcmalloc {
// Statics for Sampler
double Sampler::log_table_[1<<kFastlogNumBits];
-// Populate the lookup table for FastLog2
-// The approximates the log2 curve with a step function
-// Steps have height equal to log2 of the mid-point of the step
+// Populate the lookup table for FastLog2.
+// This approximates the log2 curve with a step function.
+// Steps have height equal to log2 of the mid-point of the step.
void Sampler::PopulateFastLog2Table() {
for (int i = 0; i < (1<<kFastlogNumBits); i++) {
log_table_[i] = (log(1.0 + static_cast<double>(i+0.5)/(1<<kFastlogNumBits))
@@ -101,32 +101,30 @@ void Sampler::InitStatics() {
// This is done by generating a random number between 0 and 1 and applying
// the inverse cumulative distribution function for an exponential.
// Specifically: Let m be the inverse of the sample period, then
-// p = 1 - exp(mx)
-// q = exp(mx)
-// log_e(q) = mx
-// log_e(q)/m = x
-// log_2(q) / (log_e(2) / m) = x
-// The value (log_e(2) / m) is precomputed
-// and may also be approximated for large sampler periods by
-// 1.0 / log2(1.0-1.0/(sample_period_));
-// In the code, q is actually in the range 1 to 2**26, hence the -26
+// the probability distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
size_t Sampler::PickNextSamplingPoint() {
- double sample_scaling = - log(2.0) * FLAGS_tcmalloc_sample_parameter;
rnd_ = NextRandom(rnd_);
// Take the top 26 bits as the random number
- // (This plus the 1<<26 sampling bound give a max step possible of
- // 1209424308 bytes.)
+ // (This plus the 1<<58 sampling bound give a max possible step of
+ // 5194297183973780480 bytes.)
const uint64_t prng_mod_power = 48; // Number of bits in prng
// The uint32_t cast is to prevent a (hard-to-reproduce) NAN
// under piii debug for some binaries.
double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0;
- // Put the computed p-value through the CDF of a geometric
+ // Put the computed p-value through the CDF of a geometric.
// For faster performance (save ~1/20th exec time), replace
- // min(FastLog2(q) - 26,0) by (Fastlog2(q) - 26.000705)
+ // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705)
// The value 26.000705 is used rather than 26 to compensate
// for inaccuracies in FastLog2 which otherwise result in a
// negative answer.
- return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * sample_scaling + 1);
+ return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * (-log(2.0)
+ * FLAGS_tcmalloc_sample_parameter) + 1);
}
} // namespace tcmalloc
diff --git a/src/stacktrace_win32-inl.h b/src/stacktrace_win32-inl.h
index a717714..26ae297 100644
--- a/src/stacktrace_win32-inl.h
+++ b/src/stacktrace_win32-inl.h
@@ -62,7 +62,7 @@ typedef USHORT NTAPI RtlCaptureStackBackTrace_Function(
// to worry about someone else holding the loader's lock.
static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn =
(RtlCaptureStackBackTrace_Function*)
- GetProcAddress(GetModuleHandle("ntdll.dll"), "RtlCaptureStackBackTrace");
+ GetProcAddress(GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace");
int GetStackTrace(void** result, int max_depth, int skip_count) {
if (!RtlCaptureStackBackTrace_fn) {
diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h
index 902806d..9f68a03 100644
--- a/src/stacktrace_x86-inl.h
+++ b/src/stacktrace_x86-inl.h
@@ -243,7 +243,7 @@ static void **NextStackFrame(void **old_sp, const void *uc) {
// last two pages in the address space
if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
#endif
-#if !defined(_WIN32)
+#ifdef HAVE_MMAP
if (!STRICT_UNWINDING) {
// Lax sanity checks cause a crash on AMD-based machines with
// VDSO-enabled kernels.
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index ca88b91..e5022e3 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -131,6 +131,10 @@
#include "tcmalloc_guard.h"
#include "thread_cache.h"
+#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS)
+# define WIN32_DO_PATCHING 1
+#endif
+
using tcmalloc::PageHeap;
using tcmalloc::PageHeapAllocator;
using tcmalloc::SizeMap;
@@ -171,7 +175,8 @@ DEFINE_int64(tcmalloc_large_alloc_report_threshold,
"messages. This bounds the amount of extra logging "
"generated by this flag. Default value of this flag "
"is very large and therefore you should see no extra "
- "logging unless the flag is overridden.");
+ "logging unless the flag is overridden. Set to 0 to "
+ "disable reporting entirely.");
// These routines are called by free(), realloc(), etc. if the pointer is
// invalid. This is a cheap (source-editing required) kind of exception
@@ -181,12 +186,6 @@ void InvalidFree(void* ptr) {
CRASH("Attempt to free invalid pointer: %p\n", ptr);
}
-void* InvalidRealloc(void* old_ptr, size_t new_size) {
- CRASH("Attempt to realloc invalid pointer: %p (realloc to %" PRIuS ")\n",
- old_ptr, new_size);
- return NULL;
-}
-
size_t InvalidGetSizeForRealloc(void* old_ptr) {
CRASH("Attempt to realloc invalid pointer: %p\n", old_ptr);
return 0;
@@ -492,7 +491,8 @@ TCMallocGuard::TCMallocGuard() {
// Check whether the kernel also supports TLS (needs to happen at runtime)
tcmalloc::CheckIfKernelSupportsTLS();
#endif
-#ifdef _WIN32 // patch the windows VirtualAlloc, etc.
+#ifdef WIN32_DO_PATCHING
+ // patch the windows VirtualAlloc, etc.
PatchWindowsFunctions(); // defined in windows/patch_functions.cc
#endif
free(malloc(1));
@@ -512,7 +512,9 @@ TCMallocGuard::~TCMallocGuard() {
}
}
}
+#ifndef WIN32_OVERRIDE_ALLOCATORS
static TCMallocGuard module_enter_exit_hook;
+#endif
//-------------------------------------------------------------------
// Helpers for the exported routines below
@@ -578,8 +580,8 @@ static void ReportLargeAlloc(Length num_pages, void* result) {
static const int N = 1000;
char buffer[N];
TCMalloc_Printer printer(buffer, N);
- printer.printf("tcmalloc: large alloc %lld bytes == %p @ ",
- static_cast<long long>(num_pages) << kPageShift,
+ printer.printf("tcmalloc: large alloc %llu bytes == %p @ ",
+ static_cast<unsigned long long>(num_pages) << kPageShift,
result);
for (int i = 0; i < stack.depth; i++) {
printer.printf(" %p", stack.stack[i]);
@@ -598,7 +600,7 @@ inline void* do_malloc_pages(Length num_pages) {
SpinLockHolder h(Static::pageheap_lock());
span = Static::pageheap()->New(num_pages);
const int64 threshold = large_alloc_threshold;
- if (num_pages >= (threshold >> kPageShift)) {
+ if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
// Increase the threshold by 1/8 every time we generate a report.
// We cap the threshold at 8GB to avoid overflow problems.
large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
@@ -716,7 +718,7 @@ inline size_t GetSizeWithCallback(void* ptr,
return Static::sizemap()->ByteSizeForClass(cl);
} else {
Span *span = Static::pageheap()->GetDescriptor(p);
- if (span == NULL) { // means we do now own this memory
+ if (span == NULL) { // means we do not own this memory
return (*invalid_getsize_fn)(ptr);
} else if (span->sizeclass != 0) {
Static::pageheap()->CacheSizeClass(p, span->sizeclass);
@@ -729,12 +731,12 @@ inline size_t GetSizeWithCallback(void* ptr,
// This lets you call back to a given function pointer if ptr is invalid.
// It is used primarily by windows code which wants a specialized callback.
-inline void* do_realloc_with_callback(void* old_ptr, size_t new_size,
- void* (*invalid_realloc_fn)(void*,
- size_t)) {
+inline void* do_realloc_with_callback(
+ void* old_ptr, size_t new_size,
+ void (*invalid_free_fn)(void*),
+ size_t (*invalid_get_size_fn)(void*)) {
// Get the size of the old entry
- const size_t old_size = GetSizeWithCallback(old_ptr,
- &InvalidGetSizeForRealloc);
+ const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
// Reallocate if the new size is larger than the old size,
// or if the new size is significantly smaller than the old size.
@@ -764,7 +766,7 @@ inline void* do_realloc_with_callback(void* old_ptr, size_t new_size,
// We could use a variant of do_free() that leverages the fact
// that we already know the sizeclass of old_ptr. The benefit
// would be small, so don't bother.
- do_free(old_ptr);
+ do_free_with_callback(old_ptr, invalid_free_fn);
return new_ptr;
} else {
// We still need to call hooks to report the updated size:
@@ -775,7 +777,8 @@ inline void* do_realloc_with_callback(void* old_ptr, size_t new_size,
}
inline void* do_realloc(void* old_ptr, size_t new_size) {
- return do_realloc_with_callback(old_ptr, new_size, &InvalidRealloc);
+ return do_realloc_with_callback(old_ptr, new_size,
+ &InvalidFree, &InvalidGetSizeForRealloc);
}
// For use by exported routines below that want specific alignments
@@ -893,7 +896,7 @@ static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
inline void* cpp_alloc(size_t size, bool nothrow) {
for (;;) {
void* p = do_malloc(size);
-#ifdef PREANSINEW
+#if defined(PREANSINEW) || (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
return p;
#else
if (p == NULL) { // allocation failed
@@ -939,7 +942,7 @@ size_t TCMallocImplementation::GetAllocatedSize(void* ptr) {
// Exported routines
//-------------------------------------------------------------------
-#ifndef _WIN32 // windows doesn't allow overriding; use the do_* fns instead
+#ifndef WIN32_DO_PATCHING
// CAVEAT: The code structure below ensures that MallocHook methods are always
// called from the stack frame of the invoked allocation function.
@@ -1198,4 +1201,4 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller)
}
void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
-#endif // #ifndef _WIN32
+#endif // #ifndef WIN32_DO_PATCHING
diff --git a/src/tests/frag_unittest.cc b/src/tests/frag_unittest.cc
index c257c7d..08494b4 100644
--- a/src/tests/frag_unittest.cc
+++ b/src/tests/frag_unittest.cc
@@ -35,10 +35,13 @@
#include "config_for_unittests.h"
#include <stdlib.h>
#include <stdio.h>
-#ifndef _WIN32
+#ifdef HAVE_SYS_RESOURCE_H
#include <sys/time.h> // for struct timeval
#include <sys/resource.h> // for getrusage
#endif
+#ifdef _WIN32
+#include <windows.h> // for GetTickCount()
+#endif
#include <vector>
#include "base/logging.h"
#include <google/malloc_extension.h>
@@ -80,12 +83,14 @@ int main(int argc, char** argv) {
// Now do timing tests
for (int i = 0; i < 5; i++) {
static const int kIterations = 100000;
-#ifdef _WIN32
- long long int tv_start = GetTickCount();
-#else
+#ifdef HAVE_SYS_RESOURCE_H
struct rusage r;
getrusage(RUSAGE_SELF, &r); // figure out user-time spent on this
struct timeval tv_start = r.ru_utime;
+#elif defined(_WIN32)
+ long long int tv_start = GetTickCount();
+#else
+# error No way to calculate time on your system
#endif
for (int i = 0; i < kIterations; i++) {
@@ -94,16 +99,18 @@ int main(int argc, char** argv) {
&s);
}
-#ifdef _WIN32
+#ifdef HAVE_SYS_RESOURCE_H
+ getrusage(RUSAGE_SELF, &r);
+ struct timeval tv_end = r.ru_utime;
+ int64 sumsec = static_cast<int64>(tv_end.tv_sec) - tv_start.tv_sec;
+ int64 sumusec = static_cast<int64>(tv_end.tv_usec) - tv_start.tv_usec;
+#elif defined(_WIN32)
long long int tv_end = GetTickCount();
int64 sumsec = (tv_end - tv_start) / 1000;
// Resolution in windows is only to the millisecond, alas
int64 sumusec = ((tv_end - tv_start) % 1000) * 1000;
#else
- getrusage(RUSAGE_SELF, &r);
- struct timeval tv_end = r.ru_utime;
- int64 sumsec = static_cast<int64>(tv_end.tv_sec) - tv_start.tv_sec;
- int64 sumusec = static_cast<int64>(tv_end.tv_usec) - tv_start.tv_usec;
+# error No way to calculate time on your system
#endif
fprintf(stderr, "getproperty: %6.1f ns/call\n",
(sumsec * 1e9 + sumusec * 1e3) / kIterations);
diff --git a/src/tests/profile-handler_unittest.cc b/src/tests/profile-handler_unittest.cc
new file mode 100644
index 0000000..d780aac
--- /dev/null
+++ b/src/tests/profile-handler_unittest.cc
@@ -0,0 +1,445 @@
+// Copyright 2009 Google Inc. All Rights Reserved.
+// Author: Nabeel Mian (nabeelmian@google.com)
+// Chris Demetriou (cgd@google.com)
+//
+// This file contains the unit tests for profile-handler.h interface.
+
+#include "config.h"
+#include "profile-handler.h"
+
+#include <assert.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "base/logging.h"
+#include "base/simple_mutex.h"
+
+// Some helpful macros for the test class
+#define EXPECT_TRUE(cond) CHECK(cond)
+#define EXPECT_FALSE(cond) CHECK(!(cond))
+#define EXPECT_EQ(a, b) CHECK_EQ(a, b)
+#define EXPECT_NE(a, b) CHECK_NE(a, b)
+#define EXPECT_GT(a, b) CHECK_GT(a, b)
+#define EXPECT_LT(a, b) CHECK_LT(a, b)
+#define EXPECT_GE(a, b) CHECK_GE(a, b)
+#define EXPECT_LE(a, b) CHECK_LE(a, b)
+#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0)
+#define TEST_F(cls, fn) void cls :: fn()
+
+namespace {
+
+// TODO(csilvers): error-checking on the pthreads routines
+class Thread {
+ public:
+ Thread() : joinable_(false) { }
+ void SetJoinable(bool value) { joinable_ = value; }
+ void Start() {
+ pthread_attr_t attr;
+ pthread_attr_init(&attr);
+ pthread_attr_setdetachstate(&attr, joinable_ ? PTHREAD_CREATE_JOINABLE
+ : PTHREAD_CREATE_DETACHED);
+ pthread_create(&thread_, &attr, &DoRun, this);
+ pthread_attr_destroy(&attr);
+ }
+ void Join() {
+ assert(joinable_);
+ pthread_join(thread_, NULL);
+ }
+ virtual void Run() = 0;
+ private:
+ static void* DoRun(void* cls) {
+ ProfileHandlerRegisterThread();
+ reinterpret_cast<Thread*>(cls)->Run();
+ return NULL;
+ }
+ pthread_t thread_;
+ bool joinable_;
+};
+
+// Sleep interval in usecs. To ensure a SIGPROF timer interrupt under heavy
+// load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz)
+// TODO(nabeelmian) Under very heavy loads, the worker thread may not accumulate
+// enough cpu usage to get a profile tick.
+int kSleepInterval = 200000;
+
+// Whether each thread has separate timers.
+static bool timer_separate_ = false;
+
+// Checks whether the profile timer is enabled for the current thread.
+bool IsTimerEnabled() {
+ itimerval current_timer;
+ EXPECT_EQ(0, getitimer(ITIMER_PROF, &current_timer));
+ return (current_timer.it_value.tv_sec != 0 ||
+ current_timer.it_value.tv_usec != 0);
+}
+
+class VirtualTimerGetterThread : public Thread {
+ public:
+ VirtualTimerGetterThread() {
+ memset(&virtual_timer_, 0, sizeof virtual_timer_);
+ }
+ struct itimerval virtual_timer_;
+
+ private:
+ void Run() {
+ CHECK_EQ(0, getitimer(ITIMER_VIRTUAL, &virtual_timer_));
+ }
+};
+
+// This function checks whether the timers are shared between thread. This
+// function spawns a thread, so use it carefully when testing thread-dependent
+// behaviour.
+static bool threads_have_separate_timers() {
+ struct itimerval new_timer_val;
+
+ // Enable the virtual timer in the current thread.
+ memset(&new_timer_val, 0, sizeof new_timer_val);
+ new_timer_val.it_value.tv_sec = 1000000; // seconds
+ CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL));
+
+ // Spawn a thread, get the virtual timer's value there.
+ VirtualTimerGetterThread thread;
+ thread.SetJoinable(true);
+ thread.Start();
+ thread.Join();
+
+ // Disable timer here.
+ memset(&new_timer_val, 0, sizeof new_timer_val);
+ CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL));
+
+ bool target_timer_enabled = (thread.virtual_timer_.it_value.tv_sec != 0 ||
+ thread.virtual_timer_.it_value.tv_usec != 0);
+ if (!target_timer_enabled) {
+ LOG(INFO, "threads have separate timers");
+ return true;
+ } else {
+ LOG(INFO, "threads have shared timers");
+ return false;
+ }
+}
+
+// Dummy worker thread to accumulate cpu time.
+class BusyThread : public Thread {
+ public:
+ BusyThread() : stop_work_(false) {
+ }
+
+ // Setter/Getters
+ bool stop_work() {
+ MutexLock lock(&mu_);
+ return stop_work_;
+ }
+ void set_stop_work(bool stop_work) {
+ MutexLock lock(&mu_);
+ stop_work_ = stop_work;
+ }
+
+ private:
+ // Protects stop_work_ below.
+ Mutex mu_;
+ // Whether to stop work?
+ bool stop_work_;
+
+ // Do work until asked to stop.
+ void Run() {
+ while (!stop_work()) {
+ }
+ // If timers are separate, check that timer is enabled for this thread.
+ EXPECT_TRUE(!timer_separate_ || IsTimerEnabled());
+ }
+};
+
+class NullThread : public Thread {
+ private:
+ void Run() {
+ // If timers are separate, check that timer is enabled for this thread.
+ EXPECT_TRUE(!timer_separate_ || IsTimerEnabled());
+ }
+};
+
+// Signal handler which tracks the profile timer ticks.
+static void TickCounter(int sig, siginfo_t* sig_info, void *vuc,
+ void* tick_counter) {
+ int* counter = static_cast<int*>(tick_counter);
+ ++(*counter);
+}
+
+// This class tests the profile-handler.h interface.
+class ProfileHandlerTest {
+ protected:
+
+ // Determines whether threads have separate timers.
+ static void SetUpTestCase() {
+ timer_separate_ = threads_have_separate_timers();
+ }
+
+ // Sets up the profile timers and SIGPROF handler in a known state. It does
+ // the following:
+ // 1. Unregisters all the callbacks, stops the timer (if shared) and
+ // clears out timer_sharing state in the ProfileHandler. This clears
+ // out any state left behind by the previous test or during module
+ // initialization when the test program was started.
+ // 2. Spawns two threads which will be registered with the ProfileHandler.
+ // At this time ProfileHandler knows if the timers are shared.
+ // 3. Starts a busy worker thread to accumulate CPU usage.
+ virtual void SetUp() {
+ // Reset the state of ProfileHandler between each test. This unregisters
+ // all callbacks, stops timer (if shared) and clears timer sharing state.
+ ProfileHandlerReset();
+ EXPECT_EQ(GetCallbackCount(), 0);
+ VerifyDisabled();
+ // ProfileHandler requires at least two threads to be registerd to determine
+ // whether timers are shared.
+ RegisterThread();
+ RegisterThread();
+ // Now that two threads are started, verify that the signal handler is
+ // disabled and the timers are correctly enabled/disabled.
+ VerifyDisabled();
+ // Start worker to accumulate cpu usage.
+ StartWorker();
+ }
+
+ virtual void TearDown() {
+ ProfileHandlerReset();
+ // Stops the worker thread.
+ StopWorker();
+ }
+
+ // Starts a no-op thread that gets registered with the ProfileHandler. Waits
+ // for the thread to stop.
+ void RegisterThread() {
+ NullThread t;
+ t.SetJoinable(true);
+ t.Start();
+ t.Join();
+ }
+
+ // Starts a busy worker thread to accumulate cpu time. There should be only
+ // one busy worker running. This is required for the case where there are
+ // separate timers for each thread.
+ void StartWorker() {
+ busy_worker_ = new BusyThread();
+ busy_worker_->SetJoinable(true);
+ busy_worker_->Start();
+ // Wait for worker to start up and register with the ProfileHandler.
+ // TODO(nabeelmian) This may not work under very heavy load.
+ usleep(kSleepInterval);
+ }
+
+ // Stops the worker thread.
+ void StopWorker() {
+ busy_worker_->set_stop_work(true);
+ busy_worker_->Join();
+ delete busy_worker_;
+ }
+
+ // Checks whether SIGPROF signal handler is enabled.
+ bool IsSignalEnabled() {
+ struct sigaction sa;
+ CHECK_EQ(sigaction(SIGPROF, NULL, &sa), 0);
+ return ((sa.sa_handler == SIG_IGN) || (sa.sa_handler == SIG_DFL)) ?
+ false : true;
+ }
+
+ // Gets the number of callbacks registered with the ProfileHandler.
+ uint32 GetCallbackCount() {
+ ProfileHandlerState state;
+ ProfileHandlerGetState(&state);
+ return state.callback_count;
+ }
+
+ // Gets the current ProfileHandler interrupt count.
+ uint64 GetInterruptCount() {
+ ProfileHandlerState state;
+ ProfileHandlerGetState(&state);
+ return state.interrupts;
+ }
+
+ // Verifies that a callback is correctly registered and receiving
+ // profile ticks.
+ void VerifyRegistration(const int& tick_counter) {
+ // Check the callback count.
+ EXPECT_GT(GetCallbackCount(), 0);
+ // Check that the profile timer is enabled.
+ EXPECT_TRUE(IsTimerEnabled());
+ // Check that the signal handler is enabled.
+ EXPECT_TRUE(IsSignalEnabled());
+ uint64 interrupts_before = GetInterruptCount();
+ // Sleep for a bit and check that tick counter is making progress.
+ int old_tick_count = tick_counter;
+ usleep(kSleepInterval);
+ int new_tick_count = tick_counter;
+ EXPECT_GT(new_tick_count, old_tick_count);
+ uint64 interrupts_after = GetInterruptCount();
+ EXPECT_GT(interrupts_after, interrupts_before);
+ }
+
+ // Verifies that a callback is not receiving profile ticks.
+ void VerifyUnregistration(const int& tick_counter) {
+ // Sleep for a bit and check that tick counter is not making progress.
+ int old_tick_count = tick_counter;
+ usleep(kSleepInterval);
+ int new_tick_count = tick_counter;
+ EXPECT_EQ(new_tick_count, old_tick_count);
+ // If no callbacks, signal handler and shared timer should be disabled.
+ if (GetCallbackCount() == 0) {
+ EXPECT_FALSE(IsSignalEnabled());
+ if (timer_separate_) {
+ EXPECT_TRUE(IsTimerEnabled());
+ } else {
+ EXPECT_FALSE(IsTimerEnabled());
+ }
+ }
+ }
+
+ // Verifies that the SIGPROF interrupt handler is disabled and the timer,
+ // if shared, is disabled. Expects the worker to be running.
+ void VerifyDisabled() {
+ // Check that the signal handler is disabled.
+ EXPECT_FALSE(IsSignalEnabled());
+ // Check that the callback count is 0.
+ EXPECT_EQ(GetCallbackCount(), 0);
+ // Check that the timer is disabled if shared, enabled otherwise.
+ if (timer_separate_) {
+ EXPECT_TRUE(IsTimerEnabled());
+ } else {
+ EXPECT_FALSE(IsTimerEnabled());
+ }
+ // Verify that the ProfileHandler is not accumulating profile ticks.
+ uint64 interrupts_before = GetInterruptCount();
+ usleep(kSleepInterval);
+ uint64 interrupts_after = GetInterruptCount();
+ EXPECT_EQ(interrupts_after, interrupts_before);
+ }
+
+ // Busy worker thread to accumulate cpu usage.
+ BusyThread* busy_worker_;
+
+ private:
+ // The tests to run
+ void RegisterUnregisterCallback();
+ void MultipleCallbacks();
+ void Reset();
+ void RegisterCallbackBeforeThread();
+
+ public:
+#define RUN(test) do { \
+ printf("Running %s\n", #test); \
+ ProfileHandlerTest pht; \
+ pht.SetUp(); \
+ pht.test(); \
+ pht.TearDown(); \
+} while (0)
+
+ static int RUN_ALL_TESTS() {
+ SetUpTestCase();
+ RUN(RegisterUnregisterCallback);
+ RUN(MultipleCallbacks);
+ RUN(Reset);
+ RUN(RegisterCallbackBeforeThread);
+ printf("Done\n");
+ return 0;
+ }
+};
+
+// Verifies ProfileHandlerRegisterCallback and
+// ProfileHandlerUnregisterCallback.
+TEST_F(ProfileHandlerTest, RegisterUnregisterCallback) {
+ int tick_count = 0;
+ ProfileHandlerToken* token = ProfileHandlerRegisterCallback(
+ TickCounter, &tick_count);
+ VerifyRegistration(tick_count);
+ ProfileHandlerUnregisterCallback(token);
+ VerifyUnregistration(tick_count);
+}
+
+// Verifies that multiple callbacks can be registered.
+TEST_F(ProfileHandlerTest, MultipleCallbacks) {
+ // Register first callback.
+ int first_tick_count;
+ ProfileHandlerToken* token1 = ProfileHandlerRegisterCallback(
+ TickCounter, &first_tick_count);
+ // Check that callback was registered correctly.
+ VerifyRegistration(first_tick_count);
+ EXPECT_EQ(GetCallbackCount(), 1);
+
+ // Register second callback.
+ int second_tick_count;
+ ProfileHandlerToken* token2 = ProfileHandlerRegisterCallback(
+ TickCounter, &second_tick_count);
+ // Check that callback was registered correctly.
+ VerifyRegistration(second_tick_count);
+ EXPECT_EQ(GetCallbackCount(), 2);
+
+ // Unregister first callback.
+ ProfileHandlerUnregisterCallback(token1);
+ VerifyUnregistration(first_tick_count);
+ EXPECT_EQ(GetCallbackCount(), 1);
+ // Verify that second callback is still registered.
+ VerifyRegistration(second_tick_count);
+
+ // Unregister second callback.
+ ProfileHandlerUnregisterCallback(token2);
+ VerifyUnregistration(second_tick_count);
+ EXPECT_EQ(GetCallbackCount(), 0);
+
+ // Verify that the signal handler and timers are correctly disabled.
+ VerifyDisabled();
+}
+
+// Verifies ProfileHandlerReset
+TEST_F(ProfileHandlerTest, Reset) {
+ // Verify that the profile timer interrupt is disabled.
+ VerifyDisabled();
+ int first_tick_count;
+ ProfileHandlerRegisterCallback(TickCounter, &first_tick_count);
+ VerifyRegistration(first_tick_count);
+ EXPECT_EQ(GetCallbackCount(), 1);
+
+ // Register second callback.
+ int second_tick_count;
+ ProfileHandlerRegisterCallback(TickCounter, &second_tick_count);
+ VerifyRegistration(second_tick_count);
+ EXPECT_EQ(GetCallbackCount(), 2);
+
+ // Reset the profile handler and verify that callback were correctly
+ // unregistered and timer/signal are disabled.
+ ProfileHandlerReset();
+ VerifyUnregistration(first_tick_count);
+ VerifyUnregistration(second_tick_count);
+ VerifyDisabled();
+}
+
+// Verifies that ProfileHandler correctly handles a case where a callback was
+// registered before the second thread started.
+TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) {
+ // Stop the worker.
+ StopWorker();
+ // Unregister all existing callbacks, stop the timer (if shared), disable
+ // the signal handler and reset the timer sharing state in the Profile
+ // Handler.
+ ProfileHandlerReset();
+ EXPECT_EQ(GetCallbackCount(), 0);
+ VerifyDisabled();
+
+ // Start the worker. At this time ProfileHandler doesn't know if timers are
+ // shared as only one thread has registered so far.
+ StartWorker();
+ // Register a callback and check that profile ticks are being delivered.
+ int tick_count;
+ ProfileHandlerRegisterCallback(TickCounter, &tick_count);
+ EXPECT_EQ(GetCallbackCount(), 1);
+ VerifyRegistration(tick_count);
+
+ // Register a second thread and verify that timer and signal handler are
+ // correctly enabled.
+ RegisterThread();
+ EXPECT_EQ(GetCallbackCount(), 1);
+ EXPECT_TRUE(IsTimerEnabled());
+ EXPECT_TRUE(IsSignalEnabled());
+}
+
+} // namespace
+
+int main(int argc, char** argv) {
+ return ProfileHandlerTest::RUN_ALL_TESTS();
+}
diff --git a/src/tests/profiledata_unittest.cc b/src/tests/profiledata_unittest.cc
index 679b9e2..31ba3b6 100644
--- a/src/tests/profiledata_unittest.cc
+++ b/src/tests/profiledata_unittest.cc
@@ -54,6 +54,7 @@ using std::string;
#define EXPECT_TRUE(cond) CHECK(cond)
#define EXPECT_FALSE(cond) CHECK(!(cond))
#define EXPECT_EQ(a, b) CHECK_EQ(a, b)
+#define EXPECT_NE(a, b) CHECK_NE(a, b)
#define EXPECT_GT(a, b) CHECK_GT(a, b)
#define EXPECT_LT(a, b) CHECK_LT(a, b)
#define EXPECT_GE(a, b) CHECK_GE(a, b)
@@ -64,9 +65,41 @@ using std::string;
namespace {
+template<typename T> class scoped_array {
+ public:
+ scoped_array(T* data) : data_(data) { }
+ ~scoped_array() { delete[] data_; }
+ T* get() { return data_; }
+ T& operator[](int i) { return data_[i]; }
+ private:
+ T* const data_;
+};
+
// Re-runs fn until it doesn't cause EINTR.
#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
+// Read up to "count" bytes from file descriptor "fd" into the buffer
+// starting at "buf" while handling short reads and EINTR. On
+// success, return the number of bytes read. Otherwise, return -1.
+static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) {
+ CHECK_GE(fd, 0);
+ char *buf0 = reinterpret_cast<char *>(buf);
+ ssize_t num_bytes = 0;
+ while (num_bytes < count) {
+ ssize_t len;
+ NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes));
+ if (len < 0) { // There was an error other than EINTR.
+ return -1;
+ }
+ if (len == 0) { // Reached EOF.
+ break;
+ }
+ num_bytes += len;
+ }
+ CHECK(num_bytes <= count);
+ return num_bytes;
+}
+
// Thin wrapper around a file descriptor so that the file descriptor
// gets closed for sure.
struct FileDescriptor {
@@ -87,6 +120,9 @@ typedef uintptr_t ProfileDataSlot;
// sample.
inline void* V(intptr_t x) { return reinterpret_cast<void*>(x); }
+// String returned by ProfileDataChecker helper functions to indicate success.
+const char kNoError[] = "";
+
class ProfileDataChecker {
public:
ProfileDataChecker() {
@@ -99,35 +135,194 @@ class ProfileDataChecker {
string filename() const { return filename_; }
- void Check(const ProfileDataSlot* slots, int num_slots) {
- CheckWithSkips(slots, num_slots, NULL, 0);
+ // Checks the first 'num_slots' profile data slots in the file
+ // against the data pointed to by 'slots'. Returns kNoError if the
+ // data matched, otherwise returns an indication of the cause of the
+ // mismatch.
+ string Check(const ProfileDataSlot* slots, int num_slots) {
+ return CheckWithSkips(slots, num_slots, NULL, 0);
}
- void CheckWithSkips(const ProfileDataSlot* slots, int num_slots,
- const int* skips, int num_skips) {
- FileDescriptor fd(open(filename_.c_str(), O_RDONLY));
- CHECK_GE(fd.get(), 0);
-
- ProfileDataSlot* filedata = new ProfileDataSlot[num_slots];
- size_t expected_bytes = num_slots * sizeof filedata[0];
- ssize_t bytes_read = read(fd.get(), filedata, expected_bytes);
- CHECK_EQ(expected_bytes, bytes_read);
-
- for (int i = 0; i < num_slots; i++) {
- if (num_skips > 0 && *skips == i) {
- num_skips--;
- skips++;
- continue;
- }
- CHECK_EQ(slots[i], filedata[i]); // "first mismatch at slot " << i;
- }
- delete[] filedata;
- }
+ // Checks the first 'num_slots' profile data slots in the file
+ // against the data pointed to by 'slots', skipping over entries
+ // described by 'skips' and 'num_skips'.
+ //
+ // 'skips' must be a sorted list of (0-based) slot numbers to be
+ // skipped, of length 'num_skips'. Note that 'num_slots' includes
+ // any skipped slots, i.e., the first 'num_slots' profile data slots
+ // will be considered, but some may be skipped.
+ //
+ // Returns kNoError if the data matched, otherwise returns an
+ // indication of the cause of the mismatch.
+ string CheckWithSkips(const ProfileDataSlot* slots, int num_slots,
+ const int* skips, int num_skips);
+
+ // Validate that a profile is correctly formed. The profile is
+ // assumed to have been created by the same kind of binary (e.g.,
+ // same slot size, same endian, etc.) as is validating the profile.
+ //
+ // Returns kNoError if the profile appears valid, otherwise returns
+ // an indication of the problem with the profile.
+ string ValidateProfile();
private:
string filename_;
};
+string ProfileDataChecker::CheckWithSkips(const ProfileDataSlot* slots,
+ int num_slots, const int* skips,
+ int num_skips) {
+ FileDescriptor fd(open(filename_.c_str(), O_RDONLY));
+ if (fd.get() < 0)
+ return "file open error";
+
+ scoped_array<ProfileDataSlot> filedata(new ProfileDataSlot[num_slots]);
+ size_t expected_bytes = num_slots * sizeof filedata[0];
+ ssize_t bytes_read = ReadPersistent(fd.get(), filedata.get(), expected_bytes);
+ if (expected_bytes != bytes_read)
+ return "file too small";
+
+ for (int i = 0; i < num_slots; i++) {
+ if (num_skips > 0 && *skips == i) {
+ num_skips--;
+ skips++;
+ continue;
+ }
+ if (slots[i] != filedata[i])
+ return "data mismatch";
+ }
+ return kNoError;
+}
+
+string ProfileDataChecker::ValidateProfile() {
+ FileDescriptor fd(open(filename_.c_str(), O_RDONLY));
+ if (fd.get() < 0)
+ return "file open error";
+
+ struct stat statbuf;
+ if (fstat(fd.get(), &statbuf) != 0)
+ return "fstat error";
+ if (statbuf.st_size != static_cast<ssize_t>(statbuf.st_size))
+ return "file impossibly large";
+ ssize_t filesize = statbuf.st_size;
+
+ scoped_array<char> filedata(new char[filesize]);
+ if (ReadPersistent(fd.get(), filedata.get(), filesize) != filesize)
+ return "read of whole file failed";
+
+ // Must have enough data for the header and the trailer.
+ if (filesize < (5 + 3) * sizeof(ProfileDataSlot))
+ return "not enough data in profile for header + trailer";
+
+ // Check the header
+ if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[0] != 0)
+ return "error in header: non-zero count";
+ if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[1] != 3)
+ return "error in header: num_slots != 3";
+ if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[2] != 0)
+ return "error in header: non-zero format version";
+ // Period (slot 3) can have any value.
+ if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[4] != 0)
+ return "error in header: non-zero padding value";
+ ssize_t cur_offset = 5 * sizeof(ProfileDataSlot);
+
+ // While there are samples, skip them. Each sample consists of
+ // at least three slots.
+ bool seen_trailer = false;
+ while (!seen_trailer) {
+ if (cur_offset > filesize - 3 * sizeof(ProfileDataSlot))
+ return "truncated sample header";
+ ProfileDataSlot* sample =
+ reinterpret_cast<ProfileDataSlot*>(filedata.get() + cur_offset);
+ ProfileDataSlot slots_this_sample = 2 + sample[1];
+ ssize_t size_this_sample = slots_this_sample * sizeof(ProfileDataSlot);
+ if (cur_offset > filesize - size_this_sample)
+ return "truncated sample";
+
+ if (sample[0] == 0 && sample[1] == 1 && sample[2] == 0) {
+ seen_trailer = true;
+ } else {
+ if (sample[0] < 1)
+ return "error in sample: sample count < 1";
+ if (sample[1] < 1)
+ return "error in sample: num_pcs < 1";
+ for (int i = 2; i < slots_this_sample; i++) {
+ if (sample[i] == 0)
+ return "error in sample: NULL PC";
+ }
+ }
+ cur_offset += size_this_sample;
+ }
+
+ // There must be at least one line in the (text) list of mapped objects,
+ // and it must be terminated by a newline. Note, the use of newline
+ // here and below Might not be reasonable on non-UNIX systems.
+ if (cur_offset >= filesize)
+ return "no list of mapped objects";
+ if (filedata[filesize - 1] != '\n')
+ return "profile did not end with a complete line";
+
+ while (cur_offset < filesize) {
+ char* line_start = filedata.get() + cur_offset;
+
+ // Find the end of the line, and replace it with a NUL for easier
+ // scanning.
+ char* line_end = strchr(line_start, '\n');
+ *line_end = '\0';
+
+ // Advance past any leading space. It's allowed in some lines,
+ // but not in others.
+ bool has_leading_space = false;
+ char* line_cur = line_start;
+ while (*line_cur == ' ') {
+ has_leading_space = true;
+ line_cur++;
+ }
+
+ bool found_match = false;
+
+ // Check for build lines.
+ if (!found_match) {
+ found_match = (strncmp(line_cur, "build=", 6) == 0);
+ // Anything may follow "build=", and leading space is allowed.
+ }
+
+ // A line from ProcMapsIterator::FormatLine, of the form:
+ //
+ // 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so
+ //
+ // Leading space is not allowed. The filename may be omitted or
+ // may consist of multiple words, so we scan only up to the
+ // space before the filename.
+ if (!found_match) {
+ int chars_scanned = -1;
+ sscanf(line_cur, "%*x-%*x %*c%*c%*c%*c %*x %*x:%*x %*d %n",
+ &chars_scanned);
+ found_match = (chars_scanned > 0 && !has_leading_space);
+ }
+
+ // A line from DumpAddressMap, of the form:
+ //
+ // 40000000-40015000: /lib/ld-2.3.2.so
+ //
+ // Leading space is allowed. The filename may be omitted or may
+ // consist of multiple words, so we scan only up to the space
+ // before the filename.
+ if (!found_match) {
+ int chars_scanned = -1;
+ sscanf(line_cur, "%*x-%*x: %n", &chars_scanned);
+ found_match = (chars_scanned > 0);
+ }
+
+ if (!found_match)
+ return "unrecognized line in text section";
+
+ cur_offset += (line_end - line_start) + 1;
+ }
+
+ return kNoError;
+}
+
class ProfileDataTest {
protected:
void ExpectStopped() {
@@ -162,6 +357,7 @@ class ProfileDataTest {
void CollectOne();
void CollectTwoMatching();
void CollectTwoFlush();
+ void StartResetRestart();
public:
#define RUN(test) do { \
@@ -178,6 +374,7 @@ class ProfileDataTest {
RUN(CollectOne);
RUN(CollectTwoMatching);
RUN(CollectTwoFlush);
+ RUN(StartResetRestart);
return 0;
}
};
@@ -226,7 +423,8 @@ TEST_F(ProfileDataTest, StartStopEmpty) {
ExpectRunningSamples(0);
collector_.Stop();
ExpectStopped();
- checker_.Check(slots, arraysize(slots));
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
}
// Start and Stop with no options, collecting no samples. Verify
@@ -246,8 +444,10 @@ TEST_F(ProfileDataTest, StartStopNoOptionsEmpty) {
ExpectRunningSamples(0);
collector_.Stop();
ExpectStopped();
- checker_.CheckWithSkips(slots, arraysize(slots),
- slots_to_skip, arraysize(slots_to_skip));
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.CheckWithSkips(slots, arraysize(slots),
+ slots_to_skip,
+ arraysize(slots_to_skip)));
}
// Start after already started. Should return false and not impact
@@ -275,7 +475,8 @@ TEST_F(ProfileDataTest, StartWhenStarted) {
collector_.Stop();
ExpectStopped();
- checker_.Check(slots, arraysize(slots));
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
}
// Like StartStopEmpty, but uses a different file name and frequency.
@@ -293,7 +494,8 @@ TEST_F(ProfileDataTest, StartStopEmpty2) {
ExpectRunningSamples(0);
collector_.Stop();
ExpectStopped();
- checker_.Check(slots, arraysize(slots));
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
}
TEST_F(ProfileDataTest, CollectOne) {
@@ -316,7 +518,8 @@ TEST_F(ProfileDataTest, CollectOne) {
collector_.Stop();
ExpectStopped();
- checker_.Check(slots, arraysize(slots));
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
}
TEST_F(ProfileDataTest, CollectTwoMatching) {
@@ -341,7 +544,8 @@ TEST_F(ProfileDataTest, CollectTwoMatching) {
collector_.Stop();
ExpectStopped();
- checker_.Check(slots, arraysize(slots));
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
}
TEST_F(ProfileDataTest, CollectTwoFlush) {
@@ -370,7 +574,41 @@ TEST_F(ProfileDataTest, CollectTwoFlush) {
collector_.Stop();
ExpectStopped();
- checker_.Check(slots, arraysize(slots));
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
+}
+
+// Start then reset, verify that the result is *not* a valid profile.
+// Then start again and make sure the result is OK.
+TEST_F(ProfileDataTest, StartResetRestart) {
+ ExpectStopped();
+ ProfileData::Options options;
+ options.set_frequency(1);
+ EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options));
+ ExpectRunningSamples(0);
+ collector_.Reset();
+ ExpectStopped();
+ // We expect the resulting file to be empty. This is a minimal test
+ // of ValidateProfile.
+ EXPECT_NE(kNoError, checker_.ValidateProfile());
+
+ struct stat statbuf;
+ EXPECT_EQ(0, stat(checker_.filename().c_str(), &statbuf));
+ EXPECT_EQ(0, statbuf.st_size);
+
+ const int frequency = 2; // Different frequency than used above.
+ ProfileDataSlot slots[] = {
+ 0, 3, 0, 1000000 / frequency, 0, // binary header
+ 0, 1, 0 // binary trailer
+ };
+
+ options.set_frequency(frequency);
+ EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options));
+ ExpectRunningSamples(0);
+ collector_.Stop();
+ ExpectStopped();
+ EXPECT_EQ(kNoError, checker_.ValidateProfile());
+ EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
}
} // namespace
diff --git a/src/tests/sampling_test.cc b/src/tests/sampling_test.cc
index 6845574..b75e70e 100644
--- a/src/tests/sampling_test.cc
+++ b/src/tests/sampling_test.cc
@@ -64,7 +64,7 @@ int main(int argc, char** argv) {
fprintf(stderr, "USAGE: %s <base of output files>\n", argv[0]);
exit(1);
}
- for (int i = 0; i < 9000; i++) {
+ for (int i = 0; i < 8000; i++) {
AllocateAllocate();
}
diff --git a/src/tests/sampling_test.sh b/src/tests/sampling_test.sh
index 9e45f67..149d27b 100755
--- a/src/tests/sampling_test.sh
+++ b/src/tests/sampling_test.sh
@@ -52,7 +52,15 @@ OUTDIR="/tmp/sampling_test_dir"
# libtool is annoying, and puts the actual executable in a different
# directory, replacing the seeming-executable with a shell script.
# We use the error output of sampling_test to indicate its real location
-SAMPLING_TEST_BINARY=`"$SAMPLING_TEST" 2>&1 | awk '{print $2; exit;}'`
+SAMPLING_TEST_BINARY=`"$SAMPLING_TEST" 2>&1 | awk '/USAGE/ {print $2; exit;}'`
+
+# A kludge for cygwin. Unfortunately, 'test -f' says that 'foo' exists
+# even when it doesn't, and only foo.exe exists. Other unix utilities
+# (like nm) need you to say 'foo.exe'. We use one such utility, cat, to
+# see what the *real* binary name is.
+if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then
+ SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe
+fi
die() {
echo "FAILED"
@@ -66,20 +74,20 @@ rm -rf "$OUTDIR" || die "Unable to delete $OUTDIR"
mkdir "$OUTDIR" || die "Unable to create $OUTDIR"
# This puts the output into out.heap and out.growth. It allocates
-# 9*10^7 bytes of memory, which is 85M. Because we sample, the
+# 8*10^7 bytes of memory, which is 76M. Because we sample, the
# estimate may be a bit high or a bit low: we accept anything from
-# 70M to 99M.
+# 50M to 99M.
"$SAMPLING_TEST" "$OUTDIR/out"
echo -n "Testing heap output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \
- | grep '^ *[7-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
+ | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
|| die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"`
echo "OK"
echo -n "Testing growth output..."
"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \
- | grep '^ *[7-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
+ | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
|| die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"`
echo "OK"
diff --git a/src/tests/testutil.cc b/src/tests/testutil.cc
index 6643443..f2b8592 100644
--- a/src/tests/testutil.cc
+++ b/src/tests/testutil.cc
@@ -99,7 +99,9 @@ extern "C" void RunManyThreadsWithId(void (*fn)(int), int count, int) {
#elif defined(_WIN32)
+#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN /* We always want minimal includes */
+#endif
#include <windows.h>
extern "C" {
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index d2b0c4f..a1fdf0f 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -489,16 +489,23 @@ void ThreadCache::RecomputePerThreadCacheSize() {
double ratio = space / max<double>(1, per_thread_cache_size_);
size_t claimed = 0;
for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
- // Don't circumvent the slow-start growth of max_size_ by increasing
- // the total cache size.
- if (!use_dynamic_cache_size_ || ratio < 1.0) {
- h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
+ if (use_dynamic_cache_size_) {
+ // Don't circumvent the slow-start growth of max_size_ by increasing the
+ // total cache size.
+ if (ratio < 1.0) {
+ h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
+ }
+ } else {
+ // Don't try to be clever and multiply by 'ratio' because rounding
+ // errors will eventually cause long-lived threads to have zero
+ // max_size_.
+ h->max_size_ = space;
}
claimed += h->max_size_;
}
unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
per_thread_cache_size_ = space;
- //MESSAGE("Threads %d => cache size %8d\n", n, int(space));
+ // TCMalloc_MESSAGE(__FILE__, __LINE__, "Threads %d => cache size %8d\n", n, int(space));
}
void ThreadCache::Print(TCMalloc_Printer* out) const {
diff --git a/src/windows/addr2line-pdb.c b/src/windows/addr2line-pdb.c
index 5384731..97b614b 100644
--- a/src/windows/addr2line-pdb.c
+++ b/src/windows/addr2line-pdb.c
@@ -45,7 +45,6 @@
#include <windows.h>
#include <dbghelp.h>
-
#define SEARCH_CAP (1024*1024)
#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols"
diff --git a/src/windows/config.h b/src/windows/config.h
index 6be561e..2811296 100644
--- a/src/windows/config.h
+++ b/src/windows/config.h
@@ -12,6 +12,13 @@
#ifndef GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_
#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_
+/* define this if you are linking tcmalloc statically and overriding the
+ * default allocators.
+ * For instructions on how to use this mode, see
+ * http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b
+ */
+#undef WIN32_OVERRIDE_ALLOCATORS
+
/* the location of <hash_map> */
#define HASH_MAP_H <hash_map>
@@ -21,6 +28,9 @@
/* the location of <hash_set> */
#define HASH_SET_H <hash_set>
+/* Define to 1 if your libc has a snprintf implementation */
+#undef HAVE_SNPRINTF
+
/* Define to 1 if compiler supports __builtin_stack_pointer */
#undef HAVE_BUILTIN_STACK_POINTER
@@ -162,9 +172,6 @@
/* Define to 1 if you have the <unwind.h> header file. */
#undef HAVE_UNWIND_H
-/* Define to 1 if you have the <windows.h> header file. */
-#define HAVE_WINDOWS_H 1
-
/* define if your compiler has __attribute__ */
#undef HAVE___ATTRIBUTE__
@@ -177,6 +184,9 @@
/* Define to 1 if int32_t is equivalent to intptr_t */
#undef INT32_EQUALS_INTPTR
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+
/* Name of package */
#undef PACKAGE
diff --git a/src/windows/mingw.h b/src/windows/mingw.h
index 1745723..e69b5da 100644
--- a/src/windows/mingw.h
+++ b/src/windows/mingw.h
@@ -47,6 +47,8 @@
#include "windows/port.h"
+#define HAVE_SNPRINTF 1
+
#endif /* __MINGW32__ */
#endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */
diff --git a/src/windows/nm-pdb.c b/src/windows/nm-pdb.c
index ec0ddf9..726d345 100644
--- a/src/windows/nm-pdb.c
+++ b/src/windows/nm-pdb.c
@@ -41,6 +41,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <string.h> // for _strdup
#include <windows.h>
#include <dbghelp.h>
diff --git a/src/windows/override_functions.cc b/src/windows/override_functions.cc
new file mode 100644
index 0000000..2ad6bbc
--- /dev/null
+++ b/src/windows/override_functions.cc
@@ -0,0 +1,118 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ---
+// Author: Mike Belshe
+//
+// To link tcmalloc into a EXE or DLL statically without using the patching
+// facility, we can take a stock libcmt and remove all the allocator functions.
+// When we relink the EXE/DLL with the modified libcmt and tcmalloc, a few
+// functions are missing. This file contains the additional overrides which
+// are required in the VS2005 libcmt in order to link the modified libcmt.
+//
+// See also
+// http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b
+
+#include "config.h"
+
+#ifndef _WIN32
+# error You should only be including this file in a windows environment!
+#endif
+
+#ifndef WIN32_OVERRIDE_ALLOCATORS
+# error This file is intended for use when overriding allocators
+#endif
+
+#include "tcmalloc.cc"
+
+extern "C" void* _recalloc(void* p, size_t n, size_t size) {
+ void* result = realloc(p, n * size);
+ memset(result, 0, n * size);
+ return result;
+}
+
+extern "C" void* _calloc_impl(size_t n, size_t size) {
+ return calloc(n, size);
+}
+
+extern "C" size_t _msize(void* p) {
+ return MallocExtension::instance()->GetAllocatedSize(p);
+}
+
+extern "C" intptr_t _get_heap_handle() {
+ return 0;
+}
+
+// The CRT heap initialization stub.
+extern "C" int _heap_init() {
+ // We intentionally leak this object. It lasts for the process
+ // lifetime. Trying to teardown at _heap_term() is so late that
+ // you can't do anything useful anyway.
+ new TCMallocGuard();
+ return 1;
+}
+
+// The CRT heap cleanup stub.
+extern "C" void _heap_term() {
+}
+
+#ifndef NDEBUG
+#undef malloc
+#undef free
+#undef calloc
+int _CrtDbgReport(int, const char*, int, const char*, const char*, ...) {
+ return 0;
+}
+
+int _CrtDbgReportW(int, const wchar_t*, int, const wchar_t*, const wchar_t*, ...) {
+ return 0;
+}
+
+int _CrtSetReportMode(int, int) {
+ return 0;
+}
+
+extern "C" void* _malloc_dbg(size_t size, int , const char*, int) {
+ return malloc(size);
+}
+
+extern "C" void _free_dbg(void* ptr, int) {
+ free(ptr);
+}
+
+extern "C" void* _calloc_dbg(size_t n, size_t size, int, const char*, int) {
+ return calloc(n, size);
+}
+#endif // NDEBUG
+
+// We set this to 1 because part of the CRT uses a check of _crtheap != 0
+// to test whether the CRT has been initialized. Once we've ripped out
+// the allocators from libcmt, we need to provide this definition so that
+// the rest of the CRT is still usable.
+extern "C" void* _crtheap = reinterpret_cast<void*>(1);
diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc
index d73c064..ff5acad 100644
--- a/src/windows/patch_functions.cc
+++ b/src/windows/patch_functions.cc
@@ -28,7 +28,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ---
-// Author: Craig Silversteion
+// Author: Craig Silverstein
//
// The main purpose of this file is to patch the libc allocation
// routines (malloc and friends, but also _msize and other
@@ -67,6 +67,11 @@
#endif
#include "config.h"
+
+#ifdef WIN32_OVERRIDE_ALLOCATORS
+#error This file is intended for patching allocators - use override_functions.cc instead.
+#endif
+
#include <windows.h>
#include <malloc.h> // for _msize and _expand
#include <tlhelp32.h> // for CreateToolhelp32Snapshot()
@@ -476,7 +481,7 @@ void LibcInfoWithPatchFunctions<T>::Unpatch() {
}
void WindowsInfo::Patch() {
- HMODULE hkernel32 = ::GetModuleHandle("kernel32");
+ HMODULE hkernel32 = ::GetModuleHandleA("kernel32");
CHECK_NE(hkernel32, NULL);
// Unlike for libc, we know these exist in our module, so we can get
@@ -693,8 +698,10 @@ void* LibcInfoWithPatchFunctions<T>::Perftools_realloc(
(void (*)(void*))origstub_fn_[kFree]);
return NULL;
}
- return do_realloc_with_callback(old_ptr, new_size, ((void* (*)(void*, size_t))
- origstub_fn_[kRealloc]));
+ return do_realloc_with_callback(
+ old_ptr, new_size,
+ (void (*)(void*))origstub_fn_[kFree],
+ (size_t (*)(void*))origstub_fn_[k_Msize]);
}
template<int T>
@@ -772,28 +779,7 @@ void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow(
template<int T>
size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW {
- // Get the size of the old entry
- const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
- Span *span = NULL;
- size_t old_size;
- if (cl == 0) {
- span = Static::pageheap()->GetDescriptor(p);
- if (!span) {
- // This can happen on windows because some constructors may
- // construct things before tcmalloc hooks _msize().
- return ((size_t (*)(void*))origstub_fn_[k_Msize])(ptr);
- }
- cl = span->sizeclass;
- Static::pageheap()->CacheSizeClass(p, cl);
- }
- if (cl != 0) {
- old_size = Static::sizemap()->ByteSizeForClass(cl);
- } else {
- ASSERT(span != NULL);
- old_size = span->length << kPageShift;
- }
- return old_size;
+ return GetSizeWithCallback(ptr, (size_t (*)(void*))origstub_fn_[k_Msize]);
}
// We need to define this because internal windows functions like to
diff --git a/src/windows/port.cc b/src/windows/port.cc
index 7cb3a57..0f1a700 100644
--- a/src/windows/port.cc
+++ b/src/windows/port.cc
@@ -55,8 +55,7 @@ int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
return _vsnprintf(str, size-1, format, ap);
}
-// mingw defines its own snprintf, though msvc does not
-#ifndef __MINGW32__
+#ifndef HAVE_SNPRINTF
int snprintf(char *str, size_t size, const char *format, ...) {
va_list ap;
va_start(ap, format);
diff --git a/src/windows/port.h b/src/windows/port.h
index e5b9b5f..50866ec 100644
--- a/src/windows/port.h
+++ b/src/windows/port.h
@@ -49,7 +49,9 @@
#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN /* We always want minimal includes */
+#endif
#include <windows.h>
#include <io.h> /* because we so often use open/close/etc */
#include <stdarg.h> /* for va_list */