Fri Apr 17 16:40:48 2009 Google Inc. <opensource@google.com>

* google-perftools: version 1.2 release * Allow large_alloc_threshold=0 to turn it off entirely (csilvers) * Die more helpfully when out of memory for internal data (csilvers) * Refactor profile-data gathering, add a new unittest (cgd, nabeelmian) * BUGFIX: fix rounding errors with static thread-size caches (addi) * BUGFIX: disable hooks better when forking in leak-checker (csilvers) * BUGFIX: fix realloc of crt pointers on windows (csilvers) * BUGFIX: do a better job of finding binaries in .sh tests (csilvers) * WINDOWS: allow overriding malloc/etc instead of patching (mbelshe) * PORTING: fix compilation error in a ppc-specific file (csilvers) * PORTING: deal with quirks in cygwin's /proc/self/maps (csilvers) * PORTING: use 'A' version of functions for ascii input (mbelshe) * PORTING: generate .so's on cygwin and mingw (ajenjo) * PORTING: disable profiler methods on cygwin (jperkins) * Updated autoconf version to 2.61 and libtool version to 1.5.26 git-svn-id: http://gperftools.googlecode.com/svn/trunk@68 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> 2009-04-18 00:02:25 +0000
committer: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> 2009-04-18 00:02:25 +0000
commit: beb6a9a183c1ca25c99e4401b58266ce73b8c846 (patch)
tree: b02a2cfe46761e177303c1dbaf420f7cfb14642f /src
parent: edd03a831f350bc72d76d4fad2b390d43faccb79 (diff)
download: gperftools-beb6a9a183c1ca25c99e4401b58266ce73b8c846.tar.gz
38 files changed, 1931 insertions, 567 deletions
diff --git a/src/base/atomicops-internals-linuxppc.h b/src/base/atomicops-internals-linuxppc.h
index dcf143e..d8e23fe 100644
--- a/src/base/atomicops-internals-linuxppc.h
+++ b/src/base/atomicops-internals-linuxppc.h
@@ -407,9 +407,4 @@ inline Atomic64 Release_Load(volatile const Atomic64 *ptr) {
 }   // namespace base::subtle
 }   // namespace base
 
-// NOTE(vchen): The following is also deprecated.  New callers should use
-// the base::subtle namespace.
-inline void MemoryBarrier() {
-  base::subtle::MemoryBarrier();
-}
 #endif  // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_
diff --git a/src/base/logging.cc b/src/base/logging.cc
index 2f56fce..a68401c 100644
--- a/src/base/logging.cc
+++ b/src/base/logging.cc
@@ -39,7 +39,7 @@ DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0),
              "--verbose == -4 means we log fatal errors only.");
 
 
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
 
 // While windows does have a POSIX-compatible API
 // (_open/_write/_close), it acquires memory.  Using this lower-level
@@ -49,8 +49,8 @@ RawFD RawOpenForWriting(const char* filename) {
   // that ever becomes a problem then we ought to compute the absolute
   // path on its behalf (perhaps the ntdll/kernel function isn't aware
   // of the working directory?)
-  RawFD fd = CreateFile(filename, GENERIC_WRITE, 0, NULL,
-                        CREATE_ALWAYS, 0, NULL);
+  RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL,
+                         CREATE_ALWAYS, 0, NULL);
   if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS)
     SetEndOfFile(fd);    // truncate the existing file
   return fd;
@@ -71,7 +71,7 @@ void RawClose(RawFD handle) {
   CloseHandle(handle);
 }
 
-#else  // _WIN32
+#else  // _WIN32 || __CYGWIN__ || __CYGWIN32__
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
@@ -104,4 +104,4 @@ void RawClose(RawFD fd) {
   NO_INTR(close(fd));
 }
 
-#endif  // _WIN32
+#endif  // _WIN32 || __CYGWIN__ || __CYGWIN32__
diff --git a/src/base/logging.h b/src/base/logging.h
index 77ee988..bc1a4c2 100644
--- a/src/base/logging.h
+++ b/src/base/logging.h
@@ -208,14 +208,14 @@ inline void LOG_IF(int lvl, bool cond, const char* pat, ...) {
 // to allow even more low-level stuff in the future.
 // Like other "raw" routines, these functions are best effort, and
 // thus don't return error codes (except RawOpenForWriting()).
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
 #include <windows.h>
 typedef HANDLE RawFD;
 const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE;
 #else
 typedef int RawFD;
 const RawFD kIllegalRawFD = -1;   // what open returns if it fails
-#endif  // _WIN32
+#endif  // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
 
 RawFD RawOpenForWriting(const char* filename);   // uses default permissions
 void RawWrite(RawFD fd, const char* buf, size_t len);
diff --git a/src/base/simple_mutex.h b/src/base/simple_mutex.h
index d59f5a0..0eed34f 100644
--- a/src/base/simple_mutex.h
+++ b/src/base/simple_mutex.h
@@ -95,8 +95,10 @@
 
 #if defined(NO_THREADS)
   typedef int MutexType;      // to keep a lock-count
-#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
-# define WIN32_LEAN_AND_MEAN  // We only need minimal includes
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+# ifndef WIN32_LEAN_AND_MEAN
+#   define WIN32_LEAN_AND_MEAN  // We only need minimal includes
+# endif
   // We need Windows NT or later for TryEnterCriticalSection().  If you
   // don't need that functionality, you can remove these _WIN32_WINNT
   // lines, and change TryLock() to assert(0) or something.
@@ -152,7 +154,7 @@ class Mutex {
   inline void SetIsSafe() { is_safe_ = true; }
 
   // Catch the error of writing Mutex when intending MutexLock.
-  Mutex(Mutex *ignored) {}
+  Mutex(Mutex* /*ignored*/) {}
   // Disallow "evil" constructors
   Mutex(const Mutex&);
   void operator=(const Mutex&);
@@ -180,7 +182,7 @@ bool Mutex::TryLock()      { if (mutex_) return false; Lock(); return true; }
 void Mutex::ReaderLock()   { assert(++mutex_ > 0); }
 void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
 
-#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
+#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__)
 
 Mutex::Mutex()             { InitializeCriticalSection(&mutex_); SetIsSafe(); }
 Mutex::~Mutex()            { DeleteCriticalSection(&mutex_); }
@@ -206,7 +208,8 @@ Mutex::~Mutex()            { SAFE_PTHREAD(pthread_rwlock_destroy); }
 void Mutex::Lock()         { SAFE_PTHREAD(pthread_rwlock_wrlock); }
 void Mutex::Unlock()       { SAFE_PTHREAD(pthread_rwlock_unlock); }
 bool Mutex::TryLock()      { return is_safe_ ?
-                                 pthread_rwlock_trywrlock(&mutex_) == 0 : true; }
+                                    pthread_rwlock_trywrlock(&mutex_) == 0 :
+                                    true; }
 void Mutex::ReaderLock()   { SAFE_PTHREAD(pthread_rwlock_rdlock); }
 void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); }
 #undef SAFE_PTHREAD
diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc
index 1f542ae..a6bd3a0 100644
--- a/src/base/sysinfo.cc
+++ b/src/base/sysinfo.cc
@@ -31,6 +31,10 @@
 // Author: Mike Burrows
 
 #include "config.h"
+#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
+# define OS_WINDOWS 1
+#endif
+
 #include <stdlib.h>   // for getenv()
 #include <stdio.h>    // for snprintf(), sscanf()
 #include <string.h>   // for memmove(), memchr(), etc.
@@ -48,7 +52,7 @@
 #include <sys/sysctl.h>
 #elif defined __sun__         // Solaris
 #include <procfs.h>           // for, e.g., prmap_t
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
 #include <process.h>          // for getpid() (actually, _getpid())
 #include <shlwapi.h>          // for SHGetValueA()
 #include <tlhelp32.h>         // for Module32First()
@@ -58,7 +62,7 @@
 #include "base/logging.h"
 #include "base/cycleclock.h"
 
-#ifdef _WIN32
+#ifdef OS_WINDOWS
 #ifdef MODULEENTRY32
 // In a change from the usual W-A pattern, there is no A variant of
 // MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
@@ -75,7 +79,7 @@
 #ifndef TH32CS_SNAPMODULE32
 #define TH32CS_SNAPMODULE32  0
 #endif  /* TH32CS_SNAPMODULE32 */
-#endif  /* _WIN32 */
+#endif  /* OS_WINDOWS */
 
 // Re-run fn until it doesn't cause EINTR.
 #define NO_INTR(fn)  do {} while ((fn) < 0 && errno == EINTR)
@@ -177,7 +181,7 @@ static double cpuinfo_cycles_per_second = 1.0;  // 0.0 might be dangerous
 static int cpuinfo_num_cpus = 1;  // Conservative guess
 
 static void SleepForMilliseconds(int milliseconds) {
-#ifdef _WIN32
+#ifdef OS_WINDOWS
   _sleep(milliseconds);   // Windows's _sleep takes milliseconds argument
 #else
   // Sleep for a few milliseconds
@@ -334,7 +338,7 @@ static void InitializeSystemInfo() {
   }
   // TODO(csilvers): also figure out cpuinfo_num_cpus
 
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
 # pragma comment(lib, "shlwapi.lib")  // for SHGetValue()
   // In NT, read MHz from the registry. If we fail to do so or we're in win9x
   // then make a crude estimate.
@@ -410,7 +414,7 @@ bool HasPosixThreads() {
   if (confstr(_CS_GNU_LIBPTHREAD_VERSION, buf, sizeof(buf)) == 0)
     return false;
   return strncmp(buf, "NPTL", 4) == 0;
-#elif defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
+#elif defined(OS_WINDOWS) || defined(__CYGWIN__) || defined(__CYGWIN32__)
   return false;
 #else  // other OS
   return true;      //  Assume that everything else has Posix
@@ -492,7 +496,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
 #elif defined(__MACH__)
   current_image_ = _dyld_image_count();   // count down from the top
   current_load_cmd_ = -1;
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
   snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE |
                                        TH32CS_SNAPMODULE32,
                                        GetCurrentProcessId());
@@ -504,7 +508,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer *buffer,
 }
 
 ProcMapsIterator::~ProcMapsIterator() {
-#if defined(_WIN32) || defined(__MINGW32__)
+#if defined(OS_WINDOWS)
   if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_);
 #elif defined(__MACH__)
   // no cleanup necessary!
@@ -515,7 +519,7 @@ ProcMapsIterator::~ProcMapsIterator() {
 }
 
 bool ProcMapsIterator::Valid() const {
-#if defined(_WIN32) || defined(__MINGW32__)
+#if defined(OS_WINDOWS)
   return snapshot_ != INVALID_HANDLE_VALUE;
 #elif defined(__MACH__)
   return 1;
@@ -579,7 +583,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
     int64 tmpinode;
     int major, minor;
     unsigned filename_offset = 0;
-#if defined(__linux__)  || defined(__CYGWIN__) || defined(__CYGWIN32__)
+#if defined(__linux__)
     // for now, assume all linuxes have the same format
     if (sscanf(stext_, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
                start ? start : &tmpstart,
@@ -588,6 +592,24 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
                offset ? offset : &tmpoffset,
                &major, &minor,
                inode ? inode : &tmpinode, &filename_offset) != 7) continue;
+#elif defined(__CYGWIN__) || defined(__CYGWIN32__)
+    // cygwin is like linux, except the third field is the "entry point"
+    // rather than the offset (see format_process_maps at
+    // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src
+    // Offset is always be 0 on cygwin: cygwin implements an mmap
+    // by loading the whole file and then calling NtMapViewOfSection.
+    // Cygwin also seems to set its flags kinda randomly; use windows default.
+    char tmpflags[5];
+    if (offset)
+      *offset = 0;
+    strcpy(flags_, "r-xp");
+    if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
+               start ? start : &tmpstart,
+               end ? end : &tmpend,
+               tmpflags,
+               &tmpoffset,
+               &major, &minor,
+               inode ? inode : &tmpinode, &filename_offset) != 7) continue;
 #elif defined(__FreeBSD__)
     // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup
     tmpstart = tmpend = tmpoffset = 0;
@@ -722,7 +744,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags,
     // If we get here, no more load_cmd's in this image talk about
     // segments.  Go on to the next image.
   }
-#elif defined(_WIN32) || defined(__MINGW32__)
+#elif defined(OS_WINDOWS)
   static char kDefaultPerms[5] = "r-xp";
   BOOL ok;
   if (module_.dwSize == 0) {  // only possible before first call
diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h
index 86d998c..fb276eb 100644
--- a/src/base/sysinfo.h
+++ b/src/base/sysinfo.h
@@ -39,7 +39,7 @@
 #include "config.h"
 
 #include <time.h>
-#if defined(_WIN32) || defined(__MINGW32__)
+#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
 #include <windows.h>   // for DWORD
 #include <TlHelp32.h>  // for CreateToolhelp32Snapshot
 #endif
@@ -190,7 +190,7 @@ class ProcMapsIterator {
   char *etext_;       // end of text
   char *nextline_;    // start of next line
   char *ebuf_;        // end of buffer (1 char for a nul)
-#if defined(_WIN32) || defined(__MINGW32__)
+#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
   HANDLE snapshot_;   // filehandle on dll info
   // In a change from the usual W-A pattern, there is no A variant of
   // MODULEENTRY32.  Tlhelp32.h #defines the W variant, but not the A.
diff --git a/src/config.h.in b/src/config.h.in
index d225d49..bfac21c 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -150,9 +150,6 @@
 /* Define to 1 if you have the <unwind.h> header file. */
 #undef HAVE_UNWIND_H
 
-/* Define to 1 if you have the <windows.h> header file. */
-#undef HAVE_WINDOWS_H
-
 /* define if your compiler has __attribute__ */
 #undef HAVE___ATTRIBUTE__
 
@@ -165,6 +162,9 @@
 /* Define to 1 if int32_t is equivalent to intptr_t */
 #undef INT32_EQUALS_INTPTR
 
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+
 /* Name of package */
 #undef PACKAGE
 
diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h
index 89a2512..acedd46 100644
--- a/src/google/heap-checker.h
+++ b/src/google/heap-checker.h
@@ -120,41 +120,22 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
   // has been called at least once).
   ~HeapLeakChecker();
 
-  // Return true iff the heap does not have more objects allocated
-  // w.r.t. its state at the time of our construction.
-  // This does full pprof heap change checking and reporting.
-  // To detect tricky leaks it depends on correct working pprof implementation
-  // referred by FLAGS_heap_profile_pprof.
-  // (By 'tricky leaks' we mean a change of heap state that e.g. for SameHeap
-  //  preserves the number of allocated objects and bytes
-  //  -- see TestHeapLeakCheckerTrick in heap-checker_unittest.cc --
-  //  and thus is not detected by BriefNoLeaks.)
-  // CAVEAT: pprof will do no checking over stripped binaries
-  // (our automatic test binaries are stripped)
-  // NOTE: All *NoLeaks() and *SameHeap() methods can be called many times
-  // to check for leaks at different end-points in program's execution.
-  bool NoLeaks() { return DoNoLeaks(NO_LEAKS, USE_PPROF, PPROF_REPORT); }
-
-  // Return true iff the heap does not seem to have more objects allocated
-  // w.r.t. its state at the time of our construction
-  // by looking at the number of objects & bytes allocated.
-  // This also tries to do pprof reporting of detected leaks.
-  bool QuickNoLeaks() { return DoNoLeaks(NO_LEAKS, USE_COUNTS, PPROF_REPORT); }
-
-  // Return true iff the heap does not seem to have more objects allocated
-  // w.r.t. its state at the time of our construction
-  // by looking at the number of objects & bytes allocated.
-  // This does not try to use pprof at all.
-  bool BriefNoLeaks() { return DoNoLeaks(NO_LEAKS, USE_COUNTS, NO_REPORT); }
-
-  // These are similar to their *NoLeaks counterparts,
-  // but they in addition require no negative leaks,
-  // i.e. the state of the heap must be exactly the same
-  // as at the time of our construction.
-  bool SameHeap() { return DoNoLeaks(SAME_HEAP, USE_PPROF, PPROF_REPORT); }
-  bool QuickSameHeap()
-    { return DoNoLeaks(SAME_HEAP, USE_COUNTS, PPROF_REPORT); }
-  bool BriefSameHeap() { return DoNoLeaks(SAME_HEAP, USE_COUNTS, NO_REPORT); }
+  // These used to be different but are all the same now: they return
+  // true iff all memory allocated since this HeapLeakChecker object
+  // was constructor is still reachable from global state.
+  //
+  // Because we fork to convert addresses to symbol-names, and forking
+  // is not thread-safe, and we may be called in a threaded context,
+  // we do not try to symbolize addresses when called manually.
+  bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); }
+
+  // These forms are obsolete; use NoLeaks() instead.
+  // TODO(csilvers): mark with ATTRIBUTE_DEPRECATED.
+  bool QuickNoLeaks()  { return NoLeaks(); }
+  bool BriefNoLeaks()  { return NoLeaks(); }
+  bool SameHeap()      { return NoLeaks(); }
+  bool QuickSameHeap() { return NoLeaks(); }
+  bool BriefSameHeap() { return NoLeaks(); }
 
   // Detailed information about the number of leaked bytes and objects
   // (both of these can be negative as well).
@@ -231,15 +212,10 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker {
   // Helper for constructors
   void Create(const char *name, bool make_start_snapshot);
 
-  // Types for DoNoLeaks and its helpers.
-  enum CheckType { SAME_HEAP, NO_LEAKS };
-  enum CheckFullness { USE_PPROF, USE_COUNTS };
-  enum ReportMode { PPROF_REPORT, NO_REPORT };
+  enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE };
 
   // Helper for *NoLeaks and *SameHeap
-  bool DoNoLeaks(CheckType check_type,
-                 CheckFullness fullness,
-                 ReportMode report_mode);
+  bool DoNoLeaks(ShouldSymbolize should_symbolize);
 
   // These used to be public, but they are now deprecated.
   // Will remove entirely when all internal uses are fixed.
diff --git a/src/google/profiler.h b/src/google/profiler.h
index be7dbf3..74b936f 100644
--- a/src/google/profiler.h
+++ b/src/google/profiler.h
@@ -146,9 +146,7 @@ PERFTOOLS_DLL_DECL void ProfilerDisable();
 /* Returns nonzero if profile is currently enabled, zero if it's not. */
 PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads();
 
-/* Routine for registering new threads with the profiler. This routine
- * is called by the Thread module in google3/thread whenever a new
- * thread is created.
+/* Routine for registering new threads with the profiler.
  */
 PERFTOOLS_DLL_DECL void ProfilerRegisterThread();
 
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index ef37df2..4c446c1 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -124,9 +124,7 @@ DEFINE_string(heap_check,
               " or the empty string are the supported choices. "
               "(See HeapLeakChecker::InternalInitStart for details.)");
 
-DEFINE_bool(heap_check_report,
-            EnvToBool("HEAP_CHECK_REPORT", true),
-            "If overall heap check should report the found leaks via pprof");
+DEFINE_bool(heap_check_report, true, "Obsolete");
 
 DEFINE_bool(heap_check_before_constructors,
             true,
@@ -137,13 +135,7 @@ DEFINE_bool(heap_check_after_destructors,
             "If overall heap check is to end after global destructors "
             "or right after all REGISTER_HEAPCHECK_CLEANUP's");
 
-DEFINE_bool(heap_check_strict_check,
-            EnvToBool("HEAP_CHECK_STRICT_CHECK", true),
-            "If overall heap check is to be done "
-            "via HeapLeakChecker::*SameHeap "
-            "or HeapLeakChecker::*NoLeaks call");
-            // heap_check_strict_check == false
-            // is useful only when heap_check_before_constructors == false
+DEFINE_bool(heap_check_strict_check, true, "Obsolete");
 
 DEFINE_bool(heap_check_ignore_global_live,
             EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true),
@@ -264,6 +256,9 @@ static const int heap_checker_info_level = 0;
 // The larger it can be, the lesser is the chance of missing real leaks.
 static const size_t kPointerSourceAlignment = sizeof(void*);
 
+// Cancel our InitialMallocHook_* if present.
+static void CancelInitialMallocHooks();  // defined below
+
 //----------------------------------------------------------------------
 // HeapLeakChecker's own memory allocator that is
 // independent of the normal program allocator.
@@ -573,11 +568,13 @@ enum StackDirection {
 
 // Determine which way the stack grows:
 
-static StackDirection ATTRIBUTE_NOINLINE GetStackDirection() {
-  if (__builtin_frame_address(0) > __builtin_frame_address(1))
-    return GROWS_TOWARDS_HIGH_ADDRESSES;
-  if (__builtin_frame_address(0) < __builtin_frame_address(1))
+static StackDirection ATTRIBUTE_NOINLINE GetStackDirection(
+    const uintptr_t *const ptr) {
+  uintptr_t x;
+  if (&x < ptr)
     return GROWS_TOWARDS_LOW_ADDRESSES;
+  if (ptr < &x)
+    return GROWS_TOWARDS_HIGH_ADDRESSES;
 
   RAW_CHECK(0, "");  // Couldn't determine the stack direction.
 
@@ -597,7 +594,7 @@ static void RegisterStackLocked(const void* top_ptr) {
 
   // make sure stack_direction is initialized
   if (stack_direction == UNKNOWN_DIRECTION) {
-    stack_direction = GetStackDirection();
+    stack_direction = GetStackDirection(&top);
   }
 
   // Find memory region with this stack
@@ -1454,7 +1451,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) {
 //----------------------------------------------------------------------
 
 char* HeapLeakChecker::MakeProfileNameLocked() {
-  RAW_DCHECK(lock_.IsHeld(), "");
+  RAW_DCHECK(lock_->IsHeld(), "");
   RAW_DCHECK(heap_checker_lock.IsHeld(), "");
   const int len = profile_name_prefix->size() + strlen(name_) + 5 +
                   strlen(HeapProfileTable::kFileExt) + 1;
@@ -1596,14 +1593,23 @@ static void SuggestPprofCommand(const char* pprof_file_arg) {
           );
 }
 
-bool HeapLeakChecker::DoNoLeaks(CheckType check_type,
-                                CheckFullness fullness,
-                                ReportMode report_mode) {
+bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
   SpinLockHolder l(lock_);
   // The locking also helps us keep the messages
   // for the two checks close together.
   SpinLockHolder al(&alignment_checker_lock);
 
+  // thread-safe: protected by alignment_checker_lock
+  static bool have_disabled_hooks_for_symbolize = false;
+  // Once we've checked for leaks and symbolized the results once, it's
+  // not safe to do it again.  This is because in order to symbolize
+  // safely, we had to disable all the malloc hooks here, so we no
+  // longer can be confident we've collected all the data we need.
+  if (have_disabled_hooks_for_symbolize) {
+    RAW_LOG(FATAL, "Must not call heap leak checker manually after "
+            " program-exit's automatic check.");
+  }
+
   HeapProfileTable::Snapshot* leaks = NULL;
   char* pprof_file = NULL;
 
@@ -1709,7 +1715,20 @@ bool HeapLeakChecker::DoNoLeaks(CheckType check_type,
              int64(stats.allocs - stats.frees),
              int64(stats.alloc_size - stats.free_size));
   } else {
-    leaks->ReportLeaks(name_, pprof_file);
+    if (should_symbolize == SYMBOLIZE) {
+      // To turn addresses into symbols, we need to fork, which is a
+      // problem if both parent and child end up trying to call the
+      // same malloc-hooks we've set up, at the same time.  To avoid
+      // trouble, we turn off the hooks before symbolizing.  Note that
+      // this makes it unsafe to ever leak-report again!  Luckily, we
+      // typically only want to report once in a program's run, at the
+      // very end.
+      CancelInitialMallocHooks();
+      have_disabled_hooks_for_symbolize = true;
+      leaks->ReportLeaks(name_, pprof_file, true);  // true = should_symbolize
+    } else {
+      leaks->ReportLeaks(name_, pprof_file, false);
+    }
     if (FLAGS_heap_check_identify_leaks) {
       leaks->ReportIndividualObjects();
     }
@@ -1854,7 +1873,6 @@ static bool internal_init_start_has_run = false;
                                                    // (ignore more)
     FLAGS_heap_check_after_destructors = false;  // to after cleanup
                                                  // (most data is live)
-    FLAGS_heap_check_strict_check = false;  // < profile check (ignore more)
     FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
     FLAGS_heap_check_ignore_global_live = true;  // ignore all live
   } else if (FLAGS_heap_check == "normal") {
@@ -1862,7 +1880,6 @@ static bool internal_init_start_has_run = false;
     FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
     FLAGS_heap_check_after_destructors = false;  // to after cleanup
                                                  // (most data is live)
-    FLAGS_heap_check_strict_check = true;  // == profile check (fast)
     FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
     FLAGS_heap_check_ignore_global_live = true;  // ignore all live
   } else if (FLAGS_heap_check == "strict") {
@@ -1871,7 +1888,6 @@ static bool internal_init_start_has_run = false;
     FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
     FLAGS_heap_check_after_destructors = true;  // to after destructors
                                                 // (less data live)
-    FLAGS_heap_check_strict_check = true;  // == profile check (fast)
     FLAGS_heap_check_ignore_thread_live = true;  // ignore all live
     FLAGS_heap_check_ignore_global_live = true;  // ignore all live
   } else if (FLAGS_heap_check == "draconian") {
@@ -1879,7 +1895,6 @@ static bool internal_init_start_has_run = false;
     FLAGS_heap_check_before_constructors = true;  // from no profile (fast)
     FLAGS_heap_check_after_destructors = true;  // to after destructors
                                                 // (need them)
-    FLAGS_heap_check_strict_check = true;  // == profile check (fast)
     FLAGS_heap_check_ignore_thread_live = false;  // no live flood (stricter)
     FLAGS_heap_check_ignore_global_live = false;  // no live flood (stricter)
   } else if (FLAGS_heap_check == "as-is") {
@@ -1983,6 +1998,7 @@ bool HeapLeakChecker::DoMainHeapCheck() {
     RAW_DCHECK(heap_checker_pid == getpid(), "");
     do_main_heap_check = false;  // will do it now; no need to do it more
   }
+
   if (!NoGlobalLeaks()) {
     if (FLAGS_heap_check_identify_leaks) {
       RAW_LOG(FATAL, "Whole-program memory leaks found.");
@@ -2005,15 +2021,14 @@ bool HeapLeakChecker::NoGlobalLeaks() {
   // we never delete or change main_heap_checker once it's set:
   HeapLeakChecker* main_hc = GlobalChecker();
   if (main_hc) {
-    CheckType check_type = FLAGS_heap_check_strict_check ? SAME_HEAP : NO_LEAKS;
-    if (FLAGS_heap_check_before_constructors) check_type = SAME_HEAP;
-      // NO_LEAKS here just would make it slower in this case
-      // (we don't use the starting profile anyway)
-    CheckFullness fullness = check_type == NO_LEAKS ? USE_PPROF : USE_COUNTS;
-      // use pprof if it can help ignore false leaks
-    ReportMode report_mode = FLAGS_heap_check_report ? PPROF_REPORT : NO_REPORT;
     RAW_VLOG(1, "Checking for whole-program memory leaks");
-    return main_hc->DoNoLeaks(check_type, fullness, report_mode);
+    // The program is over, so it's safe to symbolize addresses (which
+    // requires a fork) because no serious work is expected to be done
+    // after this.  Symbolizing is really useful -- knowing what
+    // function has a leak is better than knowing just an address --
+    // and while we can only safely symbolize once in a program run,
+    // now is the time (after all, there's no "later" that would be better).
+    return main_hc->DoNoLeaks(SYMBOLIZE);
   }
   return true;
 }
@@ -2034,9 +2049,6 @@ void HeapLeakChecker::CancelGlobalCheck() {
 
 static bool in_initial_malloc_hook = false;
 
-// Cancel our InitialMallocHook_* if present.
-static void CancelInitialMallocHooks();  // defined below
-
 #ifdef HAVE___ATTRIBUTE__   // we need __attribute__((weak)) for this to work
 #define INSTALLED_INITIAL_MALLOC_HOOKS
 
diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc
index aaa4a2f..4d0ad8b 100644
--- a/src/heap-profile-table.cc
+++ b/src/heap-profile-table.cc
@@ -306,7 +306,7 @@ int HeapProfileTable::UnparseBucket(const Bucket& b,
   return buflen;
 }
 
-HeapProfileTable::Bucket** 
+HeapProfileTable::Bucket**
 HeapProfileTable::MakeSortedBucketList() const {
   Bucket** list =
     reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_));
@@ -602,7 +602,8 @@ static bool Symbolize(void *pc, char *out, int out_size) {
 }
 
 void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name,
-                                             const char* filename) {
+                                             const char* filename,
+                                             bool should_symbolize) {
   // This is only used by the heap leak checker, but is intimately
   // tied to the allocation map that belongs in this module and is
   // therefore placed here.
@@ -644,7 +645,8 @@ void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name,
     for (int j = 0; j < e.bucket->depth; j++) {
       const void* pc = e.bucket->stack[j];
       const char* sym;
-      if (Symbolize(const_cast<void*>(pc), sym_buffer, sizeof(sym_buffer))) {
+      if (should_symbolize &&
+          Symbolize(const_cast<void*>(pc), sym_buffer, sizeof(sym_buffer))) {
         sym = sym_buffer;
       } else {
         sym = "";
diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h
index acbe14b..92d237e 100644
--- a/src/heap-profile-table.h
+++ b/src/heap-profile-table.h
@@ -335,9 +335,12 @@ class HeapProfileTable::Snapshot {
 
   // Report anything in this snapshot as a leak.
   // May use new/delete for temporary storage.
+  // If should_symbolize is true, will fork (which is not threadsafe)
+  // to turn addresses into symbol names.  Set to false for maximum safety.
   // Also writes a heap profile to "filename" that contains
   // all of the objects in this snapshot.
-  void ReportLeaks(const char* checker_name, const char* filename);
+  void ReportLeaks(const char* checker_name, const char* filename,
+                   bool should_symbolize);
 
   // Report the addresses of all leaked objects.
   // May use new/delete for temporary storage.
diff --git a/src/page_heap_allocator.h b/src/page_heap_allocator.h
index 1911bc5..20e1ab1 100644
--- a/src/page_heap_allocator.h
+++ b/src/page_heap_allocator.h
@@ -63,7 +63,11 @@ class PageHeapAllocator {
       if (free_avail_ < kAlignedSize) {
         // Need more room
         free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
-        CHECK_CONDITION(free_area_ != NULL);
+        if (free_area_ == NULL) {
+          CRASH("FATAL ERROR: Out of memory trying to allocate internal "
+                "tcmalloc data (%d bytes, object-size %d)\n",
+                kAllocIncrement, static_cast<int>(sizeof(T)));
+        }
         free_avail_ = kAllocIncrement;
       }
       result = free_area_;
diff --git a/src/pprof b/src/pprof
index e3d0907..f23786d 100755
--- a/src/pprof
+++ b/src/pprof
@@ -72,7 +72,7 @@ use strict;
 use warnings;
 use Getopt::Long;
 
-my $PPROF_VERSION = "1.1";
+my $PPROF_VERSION = "1.2";
 
 # These are the object tools we use which can come from a
 # user-specified location using --tools, from the PPROF_TOOLS
@@ -649,6 +649,7 @@ sub InteractiveMode {
     while (1) {
       print "(pprof) ";
       $_ = <STDIN>;
+      last if ! defined $_ ;
       s/\r//g;         # turn windows-looking lines into unix-looking lines
 
       # Save some flags that might be reset by InteractiveCommand()
@@ -919,7 +920,7 @@ sub PrintCallgrind {
                      map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
                            [$_, $1, $2] }
                      keys %$calls ) {
-    my $count = $calls->{$call};
+    my $count = int($calls->{$call});
     $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
     my ( $caller_file, $caller_line, $caller_function,
          $callee_file, $callee_line, $callee_function ) =
@@ -1714,6 +1715,22 @@ sub IsSecondPcAlwaysTheSame {
   return $second_pc;
 }
 
+sub ExtractSymbolLocation {
+  my $symbols = shift;
+  my $address = shift;
+  # 'addr2line' outputs "??:0" for unknown locations; we do the
+  # same to be consistent.
+  my $location = "??:0:unknown";
+  if (exists $symbols->{$address}) {
+    my $file = $symbols->{$address}->[1];
+    if ($file eq "?") {
+      $file = "??:0"
+    }
+    $location = $file . ":" . $symbols->{$address}->[0];
+  }
+  return $location;
+}
+
 # Extracts a graph of calls.
 sub ExtractCalls {
   my $symbols = shift;
@@ -1722,20 +1739,13 @@ sub ExtractCalls {
   my $calls = {};
   while( my ($stack_trace, $count) = each %$profile ) {
     my @address = split(/\n/, $stack_trace);
+    my $destination = ExtractSymbolLocation($symbols, $address[0]);
+    AddEntry($calls, $destination, $count);
     for (my $i = 1; $i <= $#address; $i++) {
-      # TODO(csilvers): what should we do if $addresses[$i-1] doesn't exist?
-      if (exists $symbols->{$address[$i]}) {
-	my $source = $symbols->{$address[$i]}->[1] . ":" .
-                     $symbols->{$address[$i]}->[0];
-	my $destination = $symbols->{$address[$i-1]}->[1] . ":" .
-                          $symbols->{$address[$i-1]}->[0];
-	my $call = "$source -> $destination";
-	AddEntry($calls, $call, $count);
-
-	if ($i == 1) {
-	  AddEntry($calls, $destination, $count);
-	}
-      }
+      my $source = ExtractSymbolLocation($symbols, $address[$i]);
+      my $call = "$source -> $destination";
+      AddEntry($calls, $call, $count);
+      $destination = $source;
     }
   }
 
@@ -2938,7 +2948,7 @@ sub ParseLibraries {
     my $finish;
     my $offset;
     my $lib;
-    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|exe)(\.\d+)*\w*)/i) {
+    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib)(\.\d+)*\w*)/i) {
       # Full line from /proc/self/maps.  Example:
       #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
       $start = HexExtend($1);
@@ -3250,6 +3260,7 @@ sub GetLineNumbersViaAddr2Line {
     $count++;
   }
   close(SYMBOLS);
+  return $count;
 }
 
 sub GetLineNumbers {
@@ -3268,20 +3279,15 @@ sub GetLineNumbers {
 
   # Pass to addr2line
   my $addr2line = $obj_tool_map{"addr2line"};
-  GetLineNumbersViaAddr2Line("$addr2line -f -C -e $image",
-                             $pclist,
-                             $symbols);
-
-  # If the executable is an MS Windows PDB-format executable, we'll
-  # have set up obj_tool_map{"addr2line_pdb"}.  In this case, we
-  # actually want to use both unix addr2line and windows-specific
-  # addr2line_pdb, since PDB-format executables can apparently include
-  # dwarf .o files.
+  my @addr2line_commands = ("$addr2line -f -C -e $image");
   if (exists $obj_tool_map{"addr2line_pdb"}) {
     my $addr2line_pdb = $obj_tool_map{"addr2line_pdb"};
-    GetLineNumbersViaAddr2Line("$addr2line_pdb --demangle -f -C -e $image",
-                               $pclist,
-                               $symbols);
+    push(@addr2line_commands, "$addr2line_pdb --demangle -f -C -e $image");
+  }
+  foreach my $addr2line_command (@addr2line_commands) {
+    if (GetLineNumbersViaAddr2Line("$addr2line_command", $pclist, $symbols)) {
+      last;
+    }
   }
 }
 
@@ -3550,16 +3556,33 @@ sub GetProcedureBoundaries {
   my $cppfilt = $obj_tool_map{"c++filt"};
 
   # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm
-  # binary doesn't support --demangle.  For the first, we try with -D
-  # to at least get *exported* symbols.  For the second, we use c++filt
-  # instead of --demangle.  (c++filt is less reliable though, because it
-  # might interpret nm meta-data as c++ symbols and try to demangle it :-/)
-  my @nm_commands = ("$nm -n --demangle $image 2>/dev/null",
-		     "$nm -n $image 2>&1 | $cppfilt",
-		     "$nm -D -n --demangle $image 2>/dev/null",
-		     "$nm -D -n $image 2>&1 | $cppfilt",
-		     "$nm -n $image 2>/dev/null",
-		     "$nm -D -n $image 2>/dev/null");
+  # binary doesn't support --demangle.  In addition, for OS X we need
+  # to use the -f flag to get 'flat' nm output (otherwise we don't sort
+  # properly and get incorrect results).  Unfortunately, GNU nm uses -f
+  # in an incompatible way.  So first we test whether our nm supports
+  # --demangle and -f.
+  my $demangle_flag = "";
+  my $cppfilt_flag = "";
+  if (system("$nm --demangle $image >/dev/null 2>&1") == 0) {
+    # In this mode, we do "nm --demangle <foo>"
+    $demangle_flag = "--demangle";
+    $cppfilt_flag = "";
+  } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) {
+    # In this mode, we do "nm <foo> | c++filt"
+    $cppfilt_flag = " | $cppfilt";
+  };
+  my $flatten_flag = "";
+  if (system("$nm -f $image >/dev/null 2>&1") == 0) {
+    $flatten_flag = "-f";
+  }
+
+  # Finally, in the case $imagie isn't a debug library, we try again with
+  # -D to at least get *exported* symbols.  If we can't use --demangle,
+  # we use c++filt instead, if it exists on this system.
+  my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
+		     " $image 2>/dev/null $cppfilt_flag",
+		     "$nm -D -n $flatten_flag $demangle_flag" .
+		     " $image 2>/dev/null $cppfilt_flag");
   # If the executable is an MS Windows PDB-format executable, we'll
   # have set up obj_tool_map("nm_pdb").  In this case, we actually
   # want to use both unix nm and windows-specific nm_pdb, since
diff --git a/src/profile-handler.cc b/src/profile-handler.cc
new file mode 100644
index 0000000..0a9f54c
--- /dev/null
+++ b/src/profile-handler.cc
@@ -0,0 +1,498 @@
+// Copyright (c) 2009, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//         Nabeel Mian
+//
+// Implements management of profile timers and the corresponding signal handler.
+
+#include "config.h"
+#include "profile-handler.h"
+
+#if !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/time.h>
+
+#include <list>
+#include <string>
+
+#include "base/dynamic_annotations.h"
+#include "base/logging.h"
+#include "base/spinlock.h"
+#include "maybe_threads.h"
+
+using std::list;
+using std::string;
+
+// This structure is used by ProfileHandlerRegisterCallback and
+// ProfileHandlerUnregisterCallback as a handle to a registered callback.
+struct ProfileHandlerToken {
+  // Sets the callback and associated arg.
+  ProfileHandlerToken(ProfileHandlerCallback cb, void* cb_arg)
+      : callback(cb),
+        callback_arg(cb_arg) {
+  }
+
+  // Callback function to be invoked on receiving a profile timer interrupt.
+  ProfileHandlerCallback callback;
+  // Argument for the callback function.
+  void* callback_arg;
+};
+
+// This class manages profile timers and associated signal handler. This is a
+// a singleton.
+class ProfileHandler {
+ public:
+  // Registers the current thread with the profile handler. On systems which
+  // have a separate interval timer for each thread, this function starts the
+  // timer for the current thread.
+  //
+  // The function also attempts to determine whether or not timers are shared by
+  // all threads in the process.  (With LinuxThreads, and with NPTL on some
+  // Linux kernel versions, each thread has separate timers.)
+  //
+  // Prior to determining whether timers are shared, this function will
+  // unconditionally start the timer.  However, if this function determines
+  // that timers are shared, then it will stop the timer if no callbacks are
+  // currently registered.
+  void RegisterThread();
+
+  // Registers a callback routine to receive profile timer ticks. The returned
+  // token is to be used when unregistering this callback and must not be
+  // deleted by the caller. Registration of the first callback enables the
+  // SIGPROF handler.
+  ProfileHandlerToken* RegisterCallback(ProfileHandlerCallback callback,
+                                        void* callback_arg);
+
+  // Unregisters a previously registered callback. Expects the token returned
+  // by the corresponding RegisterCallback routine. Unregistering the last
+  // callback disables the SIGPROF handler.
+  void UnregisterCallback(ProfileHandlerToken* token);
+
+  // Unregisters all the callbacks, stops the timer if shared, disables the
+  // SIGPROF handler and clears the timer_sharing_ state.
+  void Reset();
+
+  // Gets the current state of profile handler.
+  void GetState(ProfileHandlerState* state);
+
+  // Initializes and returns the ProfileHandler singleton.
+  static ProfileHandler* Instance();
+
+ private:
+  ProfileHandler();
+  ~ProfileHandler();
+
+  // Largest allowed frequency.
+  static const int32 kMaxFrequency = 4000;
+  // Default frequency.
+  static const int32 kDefaultFrequency = 100;
+
+  // ProfileHandler singleton.
+  static ProfileHandler* instance_;
+
+  // pthread_once_t for one time initialization of ProfileHandler singleton.
+  static pthread_once_t once_;
+
+  // Initializes the ProfileHandler singleton via GoogleOnceInit.
+  static void Init();
+
+  // Counts the number of SIGPROF interrupts received.
+  int64 interrupts_ GUARDED_BY(signal_lock_);
+
+  // SIGPROF interrupt frequency, read-only after construction.
+  int32 frequency_;
+
+  // Counts the number of callbacks registered.
+  int32 callback_count_ GUARDED_BY(control_lock_);
+
+  // Whether or not the threading system provides interval timers that are
+  // shared by all threads in a process.
+  enum {
+    // No timer initialization attempted yet.
+    TIMERS_UNTOUCHED,
+    // First thread has registered and set timer.
+    TIMERS_ONE_SET,
+    // Timers are shared by all threads.
+    TIMERS_SHARED,
+    // Timers are separate in each thread.
+    TIMERS_SEPARATE
+  } timer_sharing_ GUARDED_BY(control_lock_);
+
+  // This lock serializes the registration of threads and protects the
+  // callbacks_ list below.
+  // Locking order:
+  // In the context of a signal handler, acquire signal_lock_ to walk the
+  // callback list. Otherwise, acquire control_lock_, disable the signal
+  // handler and then acquire signal_lock_.
+  SpinLock control_lock_ ACQUIRED_BEFORE(signal_lock_);
+  SpinLock signal_lock_;
+
+  // Holds the list of registered callbacks. We expect the list to be pretty
+  // small. Currently, the cpu profiler (base/profiler) and thread module
+  // (base/thread.h) are the only two components registering callbacks.
+  // Following are the locking requirements for callbacks_:
+  // For read-write access outside the SIGPROF handler:
+  //  - Acquire control_lock_
+  //  - Disable SIGPROF handler.
+  //  - Acquire signal_lock_
+  // For read-only access in the context of SIGPROF handler
+  // (Read-write access is *not allowed* in the SIGPROF handler)
+  //  - Acquire signal_lock_
+  // For read-only access outside SIGPROF handler:
+  //  - Acquire control_lock_
+  typedef list<ProfileHandlerToken*> CallbackList;
+  typedef CallbackList::iterator CallbackIterator;
+  CallbackList callbacks_ GUARDED_BY(signal_lock_);
+
+  // Starts the interval timer.  If the thread library shares timers between
+  // threads, this function starts the shared timer. Otherwise, this will start
+  // the timer in the current thread.
+  void StartTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+  // Stops the interval timer. If the thread library shares timers between
+  // threads, this fucntion stops the shared timer. Otherwise, this will stop
+  // the timer in the current thread.
+  void StopTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+  // Returns true if the profile interval timer is enabled in the current
+  // thread.  This actually checks the kernel's interval timer setting.  (It is
+  // used to detect whether timers are shared or separate.)
+  bool IsTimerRunning() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+  // Sets the timer interrupt signal handler.
+  void EnableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+  // Disables (ignores) the timer interrupt signal.
+  void DisableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
+
+  // SIGPROF handler. Iterate over and call all the registered callbacks.
+  static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext);
+
+  DISALLOW_EVIL_CONSTRUCTORS(ProfileHandler);
+};
+
+ProfileHandler* ProfileHandler::instance_ = NULL;
+pthread_once_t ProfileHandler::once_ = PTHREAD_ONCE_INIT;
+
+const int32 ProfileHandler::kMaxFrequency;
+const int32 ProfileHandler::kDefaultFrequency;
+
+// If we are LD_PRELOAD-ed against a non-pthreads app, then
+// pthread_once won't be defined.  We declare it here, for that
+// case (with weak linkage) which will cause the non-definition to
+// resolve to NULL.  We can then check for NULL or not in Instance.
+#ifndef __THROW    // I guess we're not on a glibc system
+# define __THROW   // __THROW is just an optimization, so ok to make it ""
+#endif
+extern "C" int pthread_once(pthread_once_t *, void (*)(void))
+    __THROW ATTRIBUTE_WEAK;
+
+void ProfileHandler::Init() {
+  instance_ = new ProfileHandler();
+}
+
+ProfileHandler* ProfileHandler::Instance() {
+  if (pthread_once) {
+    pthread_once(&once_, Init);
+  }
+  if (instance_ == NULL) {
+    // This will be true on systems that don't link in pthreads,
+    // including on FreeBSD where pthread_once has a non-zero address
+    // (but doesn't do anything) even when pthreads isn't linked in.
+    Init();
+    assert(instance_ != NULL);
+  }
+  return instance_;
+}
+
+ProfileHandler::ProfileHandler()
+    : interrupts_(0),
+      callback_count_(0),
+      timer_sharing_(TIMERS_UNTOUCHED) {
+  SpinLockHolder cl(&control_lock_);
+  // Get frequency of interrupts (if specified)
+  char junk;
+  const char* fr = getenv("CPUPROFILE_FREQUENCY");
+  if (fr != NULL && (sscanf(fr, "%u%c", &frequency_, &junk) == 1) &&
+      (frequency_ > 0)) {
+    // Limit to kMaxFrequency
+    frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_;
+  } else {
+    frequency_ = kDefaultFrequency;
+  }
+
+  // Ignore signals until we decide to turn profiling on.  (Paranoia;
+  // should already be ignored.)
+  DisableHandler();
+}
+
+ProfileHandler::~ProfileHandler() {
+  Reset();
+}
+
+void ProfileHandler::RegisterThread() {
+  SpinLockHolder cl(&control_lock_);
+
+  // We try to detect whether timers are being shared by setting a
+  // timer in the first call to this function, then checking whether
+  // it's set in the second call.
+  //
+  // Note that this detection method requires that the first two calls
+  // to RegisterThread must be made from different threads.  (Subsequent
+  // calls will see timer_sharing_ set to either TIMERS_SEPARATE or
+  // TIMERS_SHARED, and won't try to detect the timer sharing type.)
+  //
+  // Also note that if timer settings were inherited across new thread
+  // creation but *not* shared, this approach wouldn't work.  That's
+  // not an issue for any Linux threading implementation, and should
+  // not be a problem for a POSIX-compliant threads implementation.
+  switch (timer_sharing_) {
+    case TIMERS_UNTOUCHED:
+      StartTimer();
+      timer_sharing_ = TIMERS_ONE_SET;
+      break;
+    case TIMERS_ONE_SET:
+      // If the timer is running, that means that the main thread's
+      // timer setup is seen in this (second) thread -- and therefore
+      // that timers are shared.
+      if (IsTimerRunning()) {
+        timer_sharing_ = TIMERS_SHARED;
+        // If callback is already registered, we have to keep the timer
+        // running.  If not, we disable the timer here.
+        if (callback_count_ == 0) {
+          StopTimer();
+        }
+      } else {
+        timer_sharing_ = TIMERS_SEPARATE;
+        StartTimer();
+      }
+      break;
+    case TIMERS_SHARED:
+      // Nothing needed.
+      break;
+    case TIMERS_SEPARATE:
+      StartTimer();
+      break;
+  }
+}
+
+ProfileHandlerToken* ProfileHandler::RegisterCallback(
+    ProfileHandlerCallback callback, void* callback_arg) {
+  ProfileHandlerToken* token = new ProfileHandlerToken(callback, callback_arg);
+
+  SpinLockHolder cl(&control_lock_);
+  DisableHandler();
+  {
+    SpinLockHolder sl(&signal_lock_);
+    callbacks_.push_back(token);
+  }
+  // Start the timer if timer is shared and this is a first callback.
+  if ((callback_count_ == 0) && (timer_sharing_ == TIMERS_SHARED)) {
+    StartTimer();
+  }
+  ++callback_count_;
+  EnableHandler();
+  return token;
+}
+
+void ProfileHandler::UnregisterCallback(ProfileHandlerToken* token) {
+  SpinLockHolder cl(&control_lock_);
+  for (CallbackIterator it = callbacks_.begin(); it != callbacks_.end();
+       ++it) {
+    if ((*it) == token) {
+      RAW_CHECK(callback_count_ > 0, "Invalid callback count");
+      DisableHandler();
+      {
+        SpinLockHolder sl(&signal_lock_);
+        delete *it;
+        callbacks_.erase(it);
+      }
+      --callback_count_;
+      if (callback_count_ > 0) {
+        EnableHandler();
+      } else if (timer_sharing_ == TIMERS_SHARED) {
+        StopTimer();
+      }
+      return;
+    }
+  }
+  // Unknown token.
+  RAW_LOG(FATAL, "Invalid token");
+}
+
+void ProfileHandler::Reset() {
+  SpinLockHolder cl(&control_lock_);
+  DisableHandler();
+  {
+    SpinLockHolder sl(&signal_lock_);
+    CallbackIterator it = callbacks_.begin();
+    while (it != callbacks_.end()) {
+      CallbackIterator tmp = it;
+      ++it;
+      delete *tmp;
+      callbacks_.erase(tmp);
+    }
+  }
+  callback_count_ = 0;
+  if (timer_sharing_ == TIMERS_SHARED) {
+    StopTimer();
+  }
+  timer_sharing_ = TIMERS_UNTOUCHED;
+}
+
+void ProfileHandler::GetState(ProfileHandlerState* state) {
+  SpinLockHolder cl(&control_lock_);
+  DisableHandler();
+  {
+    SpinLockHolder sl(&signal_lock_);  // Protects interrupts_.
+    state->interrupts = interrupts_;
+  }
+  if (callback_count_ > 0) {
+    EnableHandler();
+  }
+  state->frequency = frequency_;
+  state->callback_count = callback_count_;
+}
+
+void ProfileHandler::StartTimer() {
+  struct itimerval timer;
+  timer.it_interval.tv_sec = 0;
+  timer.it_interval.tv_usec = 1000000 / frequency_;
+  timer.it_value = timer.it_interval;
+  setitimer(ITIMER_PROF, &timer, 0);
+}
+
+void ProfileHandler::StopTimer() {
+  struct itimerval timer;
+  memset(&timer, 0, sizeof timer);
+  setitimer(ITIMER_PROF, &timer, 0);
+}
+
+bool ProfileHandler::IsTimerRunning() {
+  struct itimerval current_timer;
+  RAW_CHECK(0 == getitimer(ITIMER_PROF, &current_timer), "getitimer");
+  return (current_timer.it_value.tv_sec != 0 ||
+          current_timer.it_value.tv_usec != 0);
+}
+
+void ProfileHandler::EnableHandler() {
+  struct sigaction sa;
+  sa.sa_sigaction = SignalHandler;
+  sa.sa_flags = SA_RESTART | SA_SIGINFO;
+  sigemptyset(&sa.sa_mask);
+  RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (enable)");
+}
+
+void ProfileHandler::DisableHandler() {
+  struct sigaction sa;
+  sa.sa_handler = SIG_IGN;
+  sa.sa_flags = SA_RESTART;
+  sigemptyset(&sa.sa_mask);
+  RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (disable)");
+}
+
+void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) {
+  int saved_errno = errno;
+  RAW_CHECK(instance_ != NULL, "ProfileHandler is not initialized");
+  {
+    SpinLockHolder sl(&instance_->signal_lock_);
+    ++instance_->interrupts_;
+    for (CallbackIterator it = instance_->callbacks_.begin();
+         it != instance_->callbacks_.end();
+         ++it) {
+      (*it)->callback(sig, sinfo, ucontext, (*it)->callback_arg);
+    }
+  }
+  errno = saved_errno;
+}
+
+// The sole purpose of this class is to initialize the ProfileHandler singleton
+// when the global static objects are created. Note that the main thread will
+// be registered at this time.
+class ProfileHandlerInitializer {
+ public:
+  ProfileHandlerInitializer() {
+    ProfileHandler::Instance()->RegisterThread();
+  }
+
+ private:
+  DISALLOW_EVIL_CONSTRUCTORS(ProfileHandlerInitializer);
+};
+// ProfileHandlerInitializer singleton
+static ProfileHandlerInitializer profile_handler_initializer;
+
+extern "C" void ProfileHandlerRegisterThread() {
+  ProfileHandler::Instance()->RegisterThread();
+}
+
+extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback(
+    ProfileHandlerCallback callback, void* callback_arg) {
+  return ProfileHandler::Instance()->RegisterCallback(callback, callback_arg);
+}
+
+extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) {
+  ProfileHandler::Instance()->UnregisterCallback(token);
+}
+
+extern "C" void ProfileHandlerReset() {
+  return ProfileHandler::Instance()->Reset();
+}
+
+extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) {
+  ProfileHandler::Instance()->GetState(state);
+}
+
+#else  // OS_CYGWIN
+
+// ITIMER_PROF doesn't work under cygwin.  ITIMER_REAL is available, but doesn't
+// work as well for profiling, and also interferes with alarm().  Because of
+// these issues, unless a specific need is identified, profiler support is
+// disabled under Cygwin.
+extern "C" void ProfileHandlerRegisterThread() {
+}
+
+extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback(
+    ProfileHandlerCallback callback, void* callback_arg) {
+  return NULL;
+}
+
+extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) {
+}
+
+extern "C" void ProfileHandlerReset() {
+}
+
+extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) {
+}
+
+#endif  // OS_CYGWIN
diff --git a/src/profile-handler.h b/src/profile-handler.h
new file mode 100644
index 0000000..1cbe253
--- /dev/null
+++ b/src/profile-handler.h
@@ -0,0 +1,147 @@
+/* Copyright (c) 2009, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Nabeel Mian
+ *
+ * This module manages the cpu profile timers and the associated interrupt
+ * handler. When enabled, all registered threads in the program are profiled.
+ * (Note: if using linux 2.4 or earlier, you must use the Thread class, in
+ * google3/thread, to ensure all threads are profiled.)
+ *
+ * Any component interested in receiving a profile timer interrupt can do so by
+ * registering a callback. All registered callbacks must be async-signal-safe.
+ *
+ * Note: This module requires the sole ownership of ITIMER_PROF timer and the
+ * SIGPROF signal.
+ */
+
+#ifndef BASE_PROFILE_HANDLER_H_
+#define BASE_PROFILE_HANDLER_H_
+
+#include "config.h"
+#include <signal.h>
+#ifdef COMPILER_MSVC
+#include "conflict-signal.h"
+#endif
+#include "base/basictypes.h"
+
+/* All this code should be usable from within C apps. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Forward declaration. */
+struct ProfileHandlerToken;
+
+/*
+ * Callback function to be used with ProfilefHandlerRegisterCallback. This
+ * function will be called in the context of SIGPROF signal handler and must
+ * be async-signal-safe. The first three arguments are the values provided by
+ * the SIGPROF signal handler. We use void* to avoid using ucontext_t on
+ * non-POSIX systems.
+ *
+ * Requirements:
+ * - Callback must be async-signal-safe.
+ * - None of the functions in ProfileHandler are async-signal-safe. Therefore,
+ *   callback function *must* not call any of the ProfileHandler functions.
+ * - Callback is not required to be re-entrant. At most one instance of
+ *   callback can run at a time.
+ *
+ * Notes:
+ * - The SIGPROF signal handler saves and restores errno, so the callback
+ *   doesn't need to.
+ * - Callback code *must* not acquire lock(s) to serialize access to data shared
+ *   with the code outside the signal handler (callback must be
+ *   async-signal-safe). If such a serialization is needed, follow the model
+ *   used by profiler.cc:
+ *
+ *   When code other than the signal handler modifies the shared data it must:
+ *   - Acquire lock.
+ *   - Unregister the callback with the ProfileHandler.
+ *   - Modify shared data.
+ *   - Re-register the callback.
+ *   - Release lock.
+ *   and the callback code gets a lockless, read-write access to the data.
+ */
+typedef void (*ProfileHandlerCallback)(int sig, siginfo_t* sig_info,
+                                       void* ucontext, void* callback_arg);
+
+/*
+ * Registers a new thread with profile handler and should be called only once
+ * per thread. The main thread is registered at program startup. This routine
+ * is called by the Thread module in google3/thread whenever a new thread is
+ * created. This function is not async-signal-safe.
+ */
+void ProfileHandlerRegisterThread();
+
+/*
+ * Registers a callback routine. This callback function will be called in the
+ * context of SIGPROF handler, so must be async-signal-safe. The returned token
+ * is to be used when unregistering this callback via
+ * ProfileHandlerUnregisterCallback. Registering the first callback enables
+ * the SIGPROF signal handler. Caller must not free the returned token. This
+ * function is not async-signal-safe.
+ */
+ProfileHandlerToken* ProfileHandlerRegisterCallback(
+    ProfileHandlerCallback callback, void* callback_arg);
+
+/*
+ * Unregisters a previously registered callback. Expects the token returned
+ * by the corresponding ProfileHandlerRegisterCallback and asserts that the
+ * passed token is valid. Unregistering the last callback disables the SIGPROF
+ * signal handler. It waits for the currently running callback to
+ * complete before returning. This function is not async-signal-safe.
+ */
+void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token);
+
+/*
+ * FOR TESTING ONLY
+ * Unregisters all the callbacks, stops the timers (if shared) and disables the
+ * SIGPROF handler. All the threads, including the main thread, need to be
+ * re-registered after this call. This function is not async-signal-safe.
+ */
+void ProfileHandlerReset();
+
+/*
+ * Stores profile handler's current state. This function is not
+ * async-signal-safe.
+ */
+struct ProfileHandlerState {
+  int32 frequency;  /* Profiling frequency */
+  int32 callback_count;  /* Number of callbacks registered */
+  int64 interrupts;  /* Number of interrupts received */
+};
+void ProfileHandlerGetState(struct ProfileHandlerState* state);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* BASE_PROFILE_HANDLER_H_ */
diff --git a/src/profiledata.cc b/src/profiledata.cc
index e622b28..873100e 100644
--- a/src/profiledata.cc
+++ b/src/profiledata.cc
@@ -190,13 +190,25 @@ void ProfileData::Stop() {
   // Dump "/proc/self/maps" so we get list of mapped shared libraries
   DumpProcSelfMaps(out_);
 
-  close(out_);
+  Reset();
   fprintf(stderr, "PROFILE: interrupts/evictions/bytes = %d/%d/%" PRIuS "\n",
           count_, evictions_, total_bytes_);
+}
+
+void ProfileData::Reset() {
+  if (!enabled()) {
+    return;
+  }
+
+  // Don't reset count_, evictions_, or total_bytes_ here.  They're used
+  // by Stop to print information about the profile after reset, and are
+  // cleared by Start when starting a new profile.
+  close(out_);
   delete[] hash_;
   hash_ = 0;
   delete[] evict_;
   evict_ = 0;
+  num_evicted_ = 0;
   free(fname_);
   fname_ = 0;
   start_time_ = 0;
diff --git a/src/profiledata.h b/src/profiledata.h
index 008c8a4..29bc1b7 100644
--- a/src/profiledata.h
+++ b/src/profiledata.h
@@ -60,11 +60,11 @@
 //  - 'Add' may be called from asynchronous signals, but is not
 //    re-entrant.
 //
-//  - None of 'Start', 'Stop', 'Flush', and 'Add' may be called at the
-//    same time.
+//  - None of 'Start', 'Stop', 'Reset', 'Flush', and 'Add' may be
+//    called at the same time.
 //
-//  - 'Start' and 'Stop' should not be called while 'Enabled' or
-//    'GetCurrent' are running, and vice versa.
+//  - 'Start', 'Stop', or 'Reset' should not be called while 'Enabled'
+//     or 'GetCurrent' are running, and vice versa.
 //
 // A profiler which uses asyncronous signals to add samples will
 // typically use two locks to protect this data structure:
@@ -72,7 +72,7 @@
 //  - A SpinLock which is held over all calls except for the 'Add'
 //    call made from the signal handler.
 //
-//  - A SpinLock which is held over calls to 'Start', 'Stop',
+//  - A SpinLock which is held over calls to 'Start', 'Stop', 'Reset',
 //    'Flush', and 'Add'.  (This SpinLock should be acquired after
 //    the first SpinLock in all cases where both are needed.)
 class ProfileData {
@@ -118,6 +118,10 @@ class ProfileData {
   // data to disk.
   void Stop();
 
+  // Stop data collection without writing anything else to disk, and
+  // discard any collected data.
+  void Reset();
+
   // If data collection is enabled, record a sample with 'depth'
   // entries from 'stack'.  (depth must be > 0.)  At most
   // kMaxStackDepth stack entries will be recorded, starting with
diff --git a/src/profiler.cc b/src/profiler.cc
index 8675348..c51c7b2 100644
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -58,15 +58,13 @@ typedef int ucontext_t;   // just to quiet the compiler, mostly
 #include "base/spinlock.h"
 #include "base/sysinfo.h"             /* for GetUniquePathFromEnv, etc */
 #include "profiledata.h"
+#include "profile-handler.h"
 #ifdef HAVE_CONFLICT_SIGNAL_H
 #include "conflict-signal.h"          /* used on msvc machines */
 #endif
 
 using std::string;
 
-DEFINE_string(cpu_profile, "",
-              "Profile file name (used if CPUPROFILE env var not specified)");
-
 // Collects up all profile data.  This is a singleton, which is
 // initialized by a constructor at startup.
 class CpuProfiler {
@@ -87,94 +85,40 @@ class CpuProfiler {
 
   void GetCurrentState(ProfilerState* state);
 
-  // Register the current thread with the profiler.  This should be
-  // called only once per thread.
-  //
-  // The profiler attempts to determine whether or not timers are
-  // shared by all threads in the process.  (With LinuxThreads, and
-  // with NPTL on some Linux kernel versions, each thread has separate
-  // timers.)
-  //
-  // On systems which have a separate interval timer for each thread,
-  // this function starts the timer for the current thread.  Profiling
-  // is disabled by ignoring the resulting signals, and enabled by
-  // setting their handler to be prof_handler.
-  //
-  // Prior to determining whether timers are shared, this function
-  // will unconditionally start the timer.  However, if this function
-  // determines that timers are shared, then it will stop the timer if
-  // profiling is not currently enabled.
-  void RegisterThread();
-
   static CpuProfiler instance_;
 
  private:
-  static const int kMaxFrequency = 4000;        // Largest allowed frequency
-  static const int kDefaultFrequency = 100;     // Default frequency
-
-  // Sample frequency, read-only after construction.
-  int           frequency_;
-
-  // These locks implement the locking requirements described in the
-  // ProfileData documentation, specifically:
-  //
-  // control_lock_ is held all over all collector_ method calls except for
-  // the 'Add' call made from the signal handler, to protect against
-  // concurrent use of collector_'s control routines.
+  // This lock implements the locking requirements described in the ProfileData
+  // documentation, specifically:
   //
-  // signal_lock_ is held over calls to 'Start', 'Stop', 'Flush', and
-  // 'Add', to protect against concurrent use of data collection and
-  // writing routines.  Code other than the signal handler must disable
-  // the timer signal while holding signal_lock, to prevent deadlock.
-  //
-  // Locking order is control_lock_ first, and then signal_lock_.
-  // signal_lock_ is acquired by the prof_handler without first
-  // acquiring control_lock_.
-  SpinLock      control_lock_;
-  SpinLock      signal_lock_;
+  // lock_ is held all over all collector_ method calls except for the 'Add'
+  // call made from the signal handler, to protect against concurrent use of
+  // collector_'s control routines. Code other than signal handler must
+  // unregister the signal handler before calling any collector_ method.
+  // 'Add' method in the collector is protected by a guarantee from
+  // ProfileHandle that only one instance of prof_handler can run at a time.
+  SpinLock      lock_;
   ProfileData   collector_;
 
-  // Filter function and its argument, if any.  (NULL means include
-  // all samples).  Set at start, read-only while running.  Written
-  // while holding both control_lock_ and signal_lock_, read and
-  // executed under signal_lock_.
+  // Filter function and its argument, if any.  (NULL means include all
+  // samples).  Set at start, read-only while running.  Written while holding
+  // lock_, read and executed in the context of SIGPROF interrupt.
   int           (*filter_)(void*);
   void*         filter_arg_;
 
-  // Whether or not the threading system provides interval timers
-  // that are shared by all threads in a process.
-  enum {
-    TIMERS_UNTOUCHED,  // No timer initialization attempted yet.
-    TIMERS_ONE_SET,    // First thread has registered and set timer.
-    TIMERS_SHARED,     // Timers are shared by all threads.
-    TIMERS_SEPARATE    // Timers are separate in each thread.
-  }             timer_sharing_;
-
-  // Start the interval timer used for profiling.  If the thread
-  // library shares timers between threads, this is used to enable and
-  // disable the timer when starting and stopping profiling.  If
-  // timers are not shared, this is used to enable the timer in each
-  // thread.
-  void StartTimer();
-
-  // Stop the interval timer used for profiling.  Used only if the
-  // thread library shares timers between threads.
-  void StopTimer();
-
-  // Returns true if the profiling interval timer enabled in the
-  // current thread.  This actually checks the kernel's interval timer
-  // setting.  (It is used to detect whether timers are shared or
-  // separate.)
-  bool IsTimerRunning();
-
-  // Sets the timer interrupt signal handler to one that stores the pc.
-  static void EnableHandler();
-
-  // Disables (ignores) the timer interrupt signal.
-  static void DisableHandler();
-
-  // Signale handler that records the interrupted pc in the profile data
-  static void prof_handler(int sig, siginfo_t*, void* signal_ucontext);
+  // Opague token returned by the profile handler. To be used when calling
+  // ProfileHandlerUnregisterCallback.
+  ProfileHandlerToken* prof_handler_token_;
+
+  // Sets up a callback to receive SIGPROF interrupt.
+  void EnableHandler();
+
+  // Disables receiving SIGPROF interrupt.
+  void DisableHandler();
+
+  // Signal handler that records the interrupted pc in the profile data.
+  static void prof_handler(int sig, siginfo_t*, void* signal_ucontext,
+                           void* cpu_profiler);
 };
 
 // Profile data structure singleton: Constructor will check to see if
@@ -184,25 +128,10 @@ CpuProfiler CpuProfiler::instance_;
 
 // Initialize profiling: activated if getenv("CPUPROFILE") exists.
 CpuProfiler::CpuProfiler()
-    : timer_sharing_(TIMERS_UNTOUCHED) {
-  // Get frequency of interrupts (if specified)
-  char junk;
-  const char* fr = getenv("CPUPROFILE_FREQUENCY");
-  if (fr != NULL && (sscanf(fr, "%d%c", &frequency_, &junk) == 1) &&
-      (frequency_ > 0)) {
-    // Limit to kMaxFrequency
-    frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_;
-  } else {
-    frequency_ = kDefaultFrequency;
-  }
-
-  // Ignore signals until we decide to turn profiling on.  (Paranoia;
-  // should already be ignored.)
-  DisableHandler();
-
-  RegisterThread();
-
-  // Should profiling be enabled automatically at start?
+    : prof_handler_token_(NULL) {
+  // TODO(cgd) Move this code *out* of the CpuProfile constructor into a
+  // separate object responsible for initialization. With ProfileHandler there
+  // is no need to limit the number of profilers.
   char fname[PATH_MAX];
   if (!GetUniquePathFromEnv("CPUPROFILE", fname)) {
     return;
@@ -219,41 +148,26 @@ CpuProfiler::CpuProfiler()
   }
 }
 
-bool CpuProfiler::Start(const char* fname,
-                        const ProfilerOptions* options) {
-  SpinLockHolder cl(&control_lock_);
+bool CpuProfiler::Start(const char* fname, const ProfilerOptions* options) {
+  SpinLockHolder cl(&lock_);
 
   if (collector_.enabled()) {
     return false;
   }
 
-  {
-    // spin lock really is needed to protect init here, since it's
-    // conceivable that prof_handler may still be running from a
-    // previous profiler run.  (For instance, if prof_handler just
-    // started, had not grabbed the spinlock, then was switched out,
-    // it might start again right now.)  Any such late sample will be
-    // recorded against the new profile, but there's no harm in that.
-    SpinLockHolder sl(&signal_lock_);
-
-    ProfileData::Options collector_options;
-    collector_options.set_frequency(frequency_);
-    if (!collector_.Start(fname, collector_options)) {
-      return false;
-    }
-
-    filter_ = NULL;
-    if (options != NULL && options->filter_in_thread != NULL) {
-      filter_ = options->filter_in_thread;
-      filter_arg_ = options->filter_in_thread_arg;
-    }
-
-    // Must unlock before setting prof_handler to avoid deadlock
-    // with signal delivered to this thread.
+  ProfileHandlerState prof_handler_state;
+  ProfileHandlerGetState(&prof_handler_state);
+
+  ProfileData::Options collector_options;
+  collector_options.set_frequency(prof_handler_state.frequency);
+  if (!collector_.Start(fname, collector_options)) {
+    return false;
   }
 
-  if (timer_sharing_ == TIMERS_SHARED) {
-    StartTimer();
+  filter_ = NULL;
+  if (options != NULL && options->filter_in_thread != NULL) {
+    filter_ = options->filter_in_thread;
+    filter_arg_ = options->filter_in_thread_arg;
   }
 
   // Setup handler for SIGPROF interrupts
@@ -268,55 +182,48 @@ CpuProfiler::~CpuProfiler() {
 
 // Stop profiling and write out any collected profile data
 void CpuProfiler::Stop() {
-  SpinLockHolder cl(&control_lock_);
+  SpinLockHolder cl(&lock_);
 
   if (!collector_.enabled()) {
     return;
   }
 
-  // Ignore timer signals.  Note that the handler may have just
-  // started and might not have taken signal_lock_ yet.  Holding
-  // signal_lock_ below along with the semantics of collector_.Add()
-  // (which does nothing if collection is not enabled) prevents that
-  // late sample from causing a problem.
+  // Unregister prof_handler to stop receiving SIGPROF interrupts before
+  // stopping the collector.
   DisableHandler();
 
-  if (timer_sharing_ == TIMERS_SHARED) {
-    StopTimer();
-  }
-
-  {
-    SpinLockHolder sl(&signal_lock_);
-    collector_.Stop();
-  }
+  // DisableHandler waits for the currently running callback to complete and
+  // guarantees no future invocations. It is safe to stop the collector.
+  collector_.Stop();
 }
 
 void CpuProfiler::FlushTable() {
-  SpinLockHolder cl(&control_lock_);
+  SpinLockHolder cl(&lock_);
 
   if (!collector_.enabled()) {
     return;
   }
 
-  // Disable timer signal while holding signal_lock_, to prevent deadlock
-  // if we take a timer signal while flushing.
+  // Unregister prof_handler to stop receiving SIGPROF interrupts before
+  // flushing the profile data.
   DisableHandler();
-  {
-    SpinLockHolder sl(&signal_lock_);
-    collector_.FlushTable();
-  }
+
+  // DisableHandler waits for the currently running callback to complete and
+  // guarantees no future invocations. It is safe to flush the profile data.
+  collector_.FlushTable();
+
   EnableHandler();
 }
 
 bool CpuProfiler::Enabled() {
-  SpinLockHolder cl(&control_lock_);
+  SpinLockHolder cl(&lock_);
   return collector_.enabled();
 }
 
 void CpuProfiler::GetCurrentState(ProfilerState* state) {
   ProfileData::State collector_state;
   {
-    SpinLockHolder cl(&control_lock_);
+    SpinLockHolder cl(&lock_);
     collector_.GetCurrentState(&collector_state);
   }
 
@@ -328,141 +235,56 @@ void CpuProfiler::GetCurrentState(ProfilerState* state) {
   state->profile_name[buf_size-1] = '\0';
 }
 
-void CpuProfiler::RegisterThread() {
-  SpinLockHolder cl(&control_lock_);
-
-  // We try to detect whether timers are being shared by setting a
-  // timer in the first call to this function, then checking whether
-  // it's set in the second call.
-  //
-  // Note that this detection method requires that the first two calls
-  // to RegisterThread must be made from different threads.  (Subsequent
-  // calls will see timer_sharing_ set to either TIMERS_SEPARATE or
-  // TIMERS_SHARED, and won't try to detect the timer sharing type.)
-  //
-  // Also note that if timer settings were inherited across new thread
-  // creation but *not* shared, this approach wouldn't work.  That's
-  // not an issue for any Linux threading implementation, and should
-  // not be a problem for a POSIX-compliant threads implementation.
-  switch (timer_sharing_) {
-    case TIMERS_UNTOUCHED:
-      StartTimer();
-      timer_sharing_ = TIMERS_ONE_SET;
-      break;
-    case TIMERS_ONE_SET:
-      // If the timer is running, that means that the main thread's
-      // timer setup is seen in this (second) thread -- and therefore
-      // that timers are shared.
-      if (IsTimerRunning()) {
-        timer_sharing_ = TIMERS_SHARED;
-        // If profiling has already been enabled, we have to keep the
-        // timer running.  If not, we disable the timer here and
-        // re-enable it in start.
-        if (!collector_.enabled()) {
-          StopTimer();
-        }
-      } else {
-        timer_sharing_ = TIMERS_SEPARATE;
-        StartTimer();
-      }
-      break;
-    case TIMERS_SHARED:
-      // Nothing needed.
-      break;
-    case TIMERS_SEPARATE:
-      StartTimer();
-      break;
-  }
-}
-
-void CpuProfiler::StartTimer() {
-  // TODO: Randomize the initial interrupt value?
-  // TODO: Randomize the inter-interrupt period on every interrupt?
-  struct itimerval timer;
-  timer.it_interval.tv_sec = 0;
-  timer.it_interval.tv_usec = 1000000 / frequency_;
-  timer.it_value = timer.it_interval;
-  setitimer(ITIMER_PROF, &timer, 0);
-}
-
-void CpuProfiler::StopTimer() {
-  struct itimerval timer;
-  memset(&timer, 0, sizeof timer);
-  setitimer(ITIMER_PROF, &timer, 0);
-}
-
-bool CpuProfiler::IsTimerRunning() {
-  itimerval current_timer;
-  RAW_CHECK(0 == getitimer(ITIMER_PROF, &current_timer), "getitimer failed");
-  return (current_timer.it_value.tv_sec != 0 ||
-          current_timer.it_value.tv_usec != 0);
-}
-
 void CpuProfiler::EnableHandler() {
-  struct sigaction sa;
-  sa.sa_sigaction = prof_handler;
-  sa.sa_flags = SA_RESTART | SA_SIGINFO;
-  sigemptyset(&sa.sa_mask);
-  RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed");
+  RAW_CHECK(prof_handler_token_ == NULL, "SIGPROF handler already registered");
+  prof_handler_token_ = ProfileHandlerRegisterCallback(prof_handler, this);
+  RAW_CHECK(prof_handler_token_ != NULL, "Failed to set up SIGPROF handler");
 }
 
 void CpuProfiler::DisableHandler() {
-  struct sigaction sa;
-  sa.sa_handler = SIG_IGN;
-  sa.sa_flags = SA_RESTART;
-  sigemptyset(&sa.sa_mask);
-  RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed");
+  RAW_CHECK(prof_handler_token_ != NULL, "SIGPROF handler is not registered");
+  ProfileHandlerUnregisterCallback(prof_handler_token_);
+  prof_handler_token_ = NULL;
 }
 
-// Signal handler that records the pc in the profile-data structure
-//
-// NOTE: it is possible for profiling to be disabled just as this
-// signal handler starts, before signal_lock_ is acquired.  Therefore,
-// collector_.Add must check whether profiling is enabled before
-// trying to record any data.  (See also comments in Start and Stop.)
-void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext) {
-  int saved_errno = errno;
-
-  // Hold the spin lock while we're gathering the trace because there's
-  // no real harm in holding it and there's little point in releasing
-  // and re-acquiring it.  (We'll only be blocking Start, Stop, and
-  // Flush.)  We make sure to release it before restoring errno.
-  {
-    SpinLockHolder sl(&instance_.signal_lock_);
-
-    if (instance_.filter_ == NULL ||
-        (*instance_.filter_)(instance_.filter_arg_)) {
-      void* stack[ProfileData::kMaxStackDepth];
-
-      // The top-most active routine doesn't show up as a normal
-      // frame, but as the "pc" value in the signal handler context.
-      stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext));
-
-      // We skip the top two stack trace entries (this function and one
-      // signal handler frame) since they are artifacts of profiling and
-      // should not be measured.  Other profiling related frames may be
-      // removed by "pprof" at analysis time.  Instead of skipping the top
-      // frames, we could skip nothing, but that would increase the
-      // profile size unnecessarily.
-      int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1,
-                                           2, signal_ucontext);
-      depth++;              // To account for pc value in stack[0];
-
-      instance_.collector_.Add(depth, stack);
-    }
+// Signal handler that records the pc in the profile-data structure. We do no
+// synchronization here.  profile-handler.cc guarantees that at most one
+// instance of prof_handler() will run at a time. All other routines that
+// access the data touched by prof_handler() disable this signal handler before
+// accessing the data and therefore cannot execute concurrently with
+// prof_handler().
+void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext,
+                               void* cpu_profiler) {
+  CpuProfiler* instance = static_cast<CpuProfiler*>(cpu_profiler);
+
+  if (instance->filter_ == NULL ||
+      (*instance->filter_)(instance->filter_arg_)) {
+    void* stack[ProfileData::kMaxStackDepth];
+
+    // The top-most active routine doesn't show up as a normal
+    // frame, but as the "pc" value in the signal handler context.
+    stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext));
+
+    // We skip the top two stack trace entries (this function and one
+    // signal handler frame) since they are artifacts of profiling and
+    // should not be measured.  Other profiling related frames may be
+    // removed by "pprof" at analysis time.  Instead of skipping the top
+    // frames, we could skip nothing, but that would increase the
+    // profile size unnecessarily.
+    int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1,
+                                         2, signal_ucontext);
+    depth++;  // To account for pc value in stack[0];
+
+    instance->collector_.Add(depth, stack);
   }
-
-  errno = saved_errno;
 }
 
+#if !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+
 extern "C" void ProfilerRegisterThread() {
-  CpuProfiler::instance_.RegisterThread();
+  ProfileHandlerRegisterThread();
 }
 
-// DEPRECATED routines
-extern "C" void ProfilerEnable() { }
-extern "C" void ProfilerDisable() { }
-
 extern "C" void ProfilerFlush() {
   CpuProfiler::instance_.FlushTable();
 }
@@ -488,9 +310,27 @@ extern "C" void ProfilerGetCurrentState(ProfilerState* state) {
   CpuProfiler::instance_.GetCurrentState(state);
 }
 
+#else  // OS_CYGWIN
 
-REGISTER_MODULE_INITIALIZER(profiler, {
-  if (!FLAGS_cpu_profile.empty()) {
-    ProfilerStart(FLAGS_cpu_profile.c_str());
-  }
-});
+// ITIMER_PROF doesn't work under cygwin.  ITIMER_REAL is available, but doesn't
+// work as well for profiling, and also interferes with alarm().  Because of
+// these issues, unless a specific need is identified, profiler support is
+// disabled under Cygwin.
+extern "C" void ProfilerRegisterThread() { }
+extern "C" void ProfilerFlush() { }
+extern "C" int ProfilingIsEnabledForAllThreads() { return 0; }
+extern "C" int ProfilerStart(const char* fname) { return 0; }
+extern "C" int ProfilerStartWithOptions(const char *fname,
+                                        const ProfilerOptions *options) {
+  return 0;
+}
+extern "C" void ProfilerStop() { }
+extern "C" void ProfilerGetCurrentState(ProfilerState* state) {
+  memset(state, 0, sizeof(*state));
+}
+
+#endif  // OS_CYGWIN
+
+// DEPRECATED routines
+extern "C" void ProfilerEnable() { }
+extern "C" void ProfilerDisable() { }
diff --git a/src/sampler.cc b/src/sampler.cc
index dda225c..a11b893 100755
--- a/src/sampler.cc
+++ b/src/sampler.cc
@@ -39,7 +39,7 @@
 
 using std::min;
 
-// Twice the approximate gap between sampling actions.
+// The approximate gap in bytes between sampling actions.
 // I.e., we take one sample approximately once every
 // tcmalloc_sample_parameter bytes of allocation
 // i.e. about once every 512KB.
@@ -59,9 +59,9 @@ namespace tcmalloc {
 // Statics for Sampler
 double Sampler::log_table_[1<<kFastlogNumBits];
 
-// Populate the lookup table for FastLog2
-// The approximates the log2 curve with a step function
-// Steps have height equal to log2 of the mid-point of the step
+// Populate the lookup table for FastLog2.
+// This approximates the log2 curve with a step function.
+// Steps have height equal to log2 of the mid-point of the step.
 void Sampler::PopulateFastLog2Table() {
   for (int i = 0; i < (1<<kFastlogNumBits); i++) {
     log_table_[i] = (log(1.0 + static_cast<double>(i+0.5)/(1<<kFastlogNumBits))
@@ -101,32 +101,30 @@ void Sampler::InitStatics() {
 // This is done by generating a random number between 0 and 1 and applying
 // the inverse cumulative distribution function for an exponential.
 // Specifically: Let m be the inverse of the sample period, then
-// p = 1 - exp(mx)
-// q = exp(mx)
-// log_e(q) = mx
-// log_e(q)/m = x
-// log_2(q) / (log_e(2) / m) = x
-// The value (log_e(2) / m) is precomputed
-// and may also be approximated for large sampler periods by
-// 1.0 / log2(1.0-1.0/(sample_period_));
-// In the code, q is actually in the range 1 to 2**26, hence the -26
+// the probability distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
 size_t Sampler::PickNextSamplingPoint() {
-  double sample_scaling = - log(2.0) * FLAGS_tcmalloc_sample_parameter;
   rnd_ = NextRandom(rnd_);
   // Take the top 26 bits as the random number
-  // (This plus the 1<<26 sampling bound give a max step possible of
-  // 1209424308 bytes.)
+  // (This plus the 1<<58 sampling bound give a max possible step of
+  // 5194297183973780480 bytes.)
   const uint64_t prng_mod_power = 48;  // Number of bits in prng
   // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
   // under piii debug for some binaries.
   double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0;
-  // Put the computed p-value through the CDF of a geometric
+  // Put the computed p-value through the CDF of a geometric.
   // For faster performance (save ~1/20th exec time), replace
-  // min(FastLog2(q) - 26,0)  by  (Fastlog2(q) - 26.000705)
+  // min(0.0, FastLog2(q) - 26)  by  (Fastlog2(q) - 26.000705)
   // The value 26.000705 is used rather than 26 to compensate
   // for inaccuracies in FastLog2 which otherwise result in a
   // negative answer.
-  return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * sample_scaling + 1);
+  return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * (-log(2.0)
+                             * FLAGS_tcmalloc_sample_parameter) + 1);
 }
 
 }  // namespace tcmalloc
diff --git a/src/stacktrace_win32-inl.h b/src/stacktrace_win32-inl.h
index a717714..26ae297 100644
--- a/src/stacktrace_win32-inl.h
+++ b/src/stacktrace_win32-inl.h
@@ -62,7 +62,7 @@ typedef USHORT NTAPI RtlCaptureStackBackTrace_Function(
 // to worry about someone else holding the loader's lock.
 static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn =
    (RtlCaptureStackBackTrace_Function*)
-   GetProcAddress(GetModuleHandle("ntdll.dll"), "RtlCaptureStackBackTrace");
+   GetProcAddress(GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace");
 
 int GetStackTrace(void** result, int max_depth, int skip_count) {
   if (!RtlCaptureStackBackTrace_fn) {
diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h
index 902806d..9f68a03 100644
--- a/src/stacktrace_x86-inl.h
+++ b/src/stacktrace_x86-inl.h
@@ -243,7 +243,7 @@ static void **NextStackFrame(void **old_sp, const void *uc) {
   // last two pages in the address space
   if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
 #endif
-#if !defined(_WIN32)
+#ifdef HAVE_MMAP
   if (!STRICT_UNWINDING) {
     // Lax sanity checks cause a crash on AMD-based machines with
     // VDSO-enabled kernels.
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index ca88b91..e5022e3 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -131,6 +131,10 @@
 #include "tcmalloc_guard.h"
 #include "thread_cache.h"
 
+#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS)
+# define WIN32_DO_PATCHING 1
+#endif
+
 using tcmalloc::PageHeap;
 using tcmalloc::PageHeapAllocator;
 using tcmalloc::SizeMap;
@@ -171,7 +175,8 @@ DEFINE_int64(tcmalloc_large_alloc_report_threshold,
              "messages.  This bounds the amount of extra logging "
              "generated by this flag.  Default value of this flag "
              "is very large and therefore you should see no extra "
-             "logging unless the flag is overridden.");
+             "logging unless the flag is overridden.  Set to 0 to "
+             "disable reporting entirely.");
 
 // These routines are called by free(), realloc(), etc. if the pointer is
 // invalid.  This is a cheap (source-editing required) kind of exception
@@ -181,12 +186,6 @@ void InvalidFree(void* ptr) {
   CRASH("Attempt to free invalid pointer: %p\n", ptr);
 }
 
-void* InvalidRealloc(void* old_ptr, size_t new_size) {
-  CRASH("Attempt to realloc invalid pointer: %p (realloc to %" PRIuS ")\n",
-        old_ptr, new_size);
-  return NULL;
-}
-
 size_t InvalidGetSizeForRealloc(void* old_ptr) {
   CRASH("Attempt to realloc invalid pointer: %p\n", old_ptr);
   return 0;
@@ -492,7 +491,8 @@ TCMallocGuard::TCMallocGuard() {
     // Check whether the kernel also supports TLS (needs to happen at runtime)
     tcmalloc::CheckIfKernelSupportsTLS();
 #endif
-#ifdef _WIN32                   // patch the windows VirtualAlloc, etc.
+#ifdef WIN32_DO_PATCHING
+    // patch the windows VirtualAlloc, etc.
     PatchWindowsFunctions();    // defined in windows/patch_functions.cc
 #endif
     free(malloc(1));
@@ -512,7 +512,9 @@ TCMallocGuard::~TCMallocGuard() {
     }
   }
 }
+#ifndef WIN32_OVERRIDE_ALLOCATORS
 static TCMallocGuard module_enter_exit_hook;
+#endif
 
 //-------------------------------------------------------------------
 // Helpers for the exported routines below
@@ -578,8 +580,8 @@ static void ReportLargeAlloc(Length num_pages, void* result) {
   static const int N = 1000;
   char buffer[N];
   TCMalloc_Printer printer(buffer, N);
-  printer.printf("tcmalloc: large alloc %lld bytes == %p @ ",
-                 static_cast<long long>(num_pages) << kPageShift,
+  printer.printf("tcmalloc: large alloc %llu bytes == %p @ ",
+                 static_cast<unsigned long long>(num_pages) << kPageShift,
                  result);
   for (int i = 0; i < stack.depth; i++) {
     printer.printf(" %p", stack.stack[i]);
@@ -598,7 +600,7 @@ inline void* do_malloc_pages(Length num_pages) {
     SpinLockHolder h(Static::pageheap_lock());
     span = Static::pageheap()->New(num_pages);
     const int64 threshold = large_alloc_threshold;
-    if (num_pages >= (threshold >> kPageShift)) {
+    if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
       // Increase the threshold by 1/8 every time we generate a report.
       // We cap the threshold at 8GB to avoid overflow problems.
       large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
@@ -716,7 +718,7 @@ inline size_t GetSizeWithCallback(void* ptr,
     return Static::sizemap()->ByteSizeForClass(cl);
   } else {
     Span *span = Static::pageheap()->GetDescriptor(p);
-    if (span == NULL) {  // means we do now own this memory
+    if (span == NULL) {  // means we do not own this memory
       return (*invalid_getsize_fn)(ptr);
     } else if (span->sizeclass != 0) {
       Static::pageheap()->CacheSizeClass(p, span->sizeclass);
@@ -729,12 +731,12 @@ inline size_t GetSizeWithCallback(void* ptr,
 
 // This lets you call back to a given function pointer if ptr is invalid.
 // It is used primarily by windows code which wants a specialized callback.
-inline void* do_realloc_with_callback(void* old_ptr, size_t new_size,
-                                      void* (*invalid_realloc_fn)(void*,
-                                                                  size_t)) {
+inline void* do_realloc_with_callback(
+    void* old_ptr, size_t new_size,
+    void (*invalid_free_fn)(void*),
+    size_t (*invalid_get_size_fn)(void*)) {
   // Get the size of the old entry
-  const size_t old_size = GetSizeWithCallback(old_ptr,
-                                              &InvalidGetSizeForRealloc);
+  const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
 
   // Reallocate if the new size is larger than the old size,
   // or if the new size is significantly smaller than the old size.
@@ -764,7 +766,7 @@ inline void* do_realloc_with_callback(void* old_ptr, size_t new_size,
     // We could use a variant of do_free() that leverages the fact
     // that we already know the sizeclass of old_ptr.  The benefit
     // would be small, so don't bother.
-    do_free(old_ptr);
+    do_free_with_callback(old_ptr, invalid_free_fn);
     return new_ptr;
   } else {
     // We still need to call hooks to report the updated size:
@@ -775,7 +777,8 @@ inline void* do_realloc_with_callback(void* old_ptr, size_t new_size,
 }
 
 inline void* do_realloc(void* old_ptr, size_t new_size) {
-  return do_realloc_with_callback(old_ptr, new_size, &InvalidRealloc);
+  return do_realloc_with_callback(old_ptr, new_size,
+                                  &InvalidFree, &InvalidGetSizeForRealloc);
 }
 
 // For use by exported routines below that want specific alignments
@@ -893,7 +896,7 @@ static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
 inline void* cpp_alloc(size_t size, bool nothrow) {
   for (;;) {
     void* p = do_malloc(size);
-#ifdef PREANSINEW
+#if defined(PREANSINEW) || (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
     return p;
 #else
     if (p == NULL) {  // allocation failed
@@ -939,7 +942,7 @@ size_t TCMallocImplementation::GetAllocatedSize(void* ptr) {
 // Exported routines
 //-------------------------------------------------------------------
 
-#ifndef _WIN32  // windows doesn't allow overriding; use the do_* fns instead
+#ifndef WIN32_DO_PATCHING
 
 // CAVEAT: The code structure below ensures that MallocHook methods are always
 //         called from the stack frame of the invoked allocation function.
@@ -1198,4 +1201,4 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller)
 }
 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
 
-#endif  // #ifndef _WIN32
+#endif  // #ifndef WIN32_DO_PATCHING
diff --git a/src/tests/frag_unittest.cc b/src/tests/frag_unittest.cc
index c257c7d..08494b4 100644
--- a/src/tests/frag_unittest.cc
+++ b/src/tests/frag_unittest.cc
@@ -35,10 +35,13 @@
 #include "config_for_unittests.h"
 #include <stdlib.h>
 #include <stdio.h>
-#ifndef _WIN32
+#ifdef HAVE_SYS_RESOURCE_H
 #include <sys/time.h>           // for struct timeval
 #include <sys/resource.h>       // for getrusage
 #endif
+#ifdef _WIN32
+#include <windows.h>            // for GetTickCount()
+#endif
 #include <vector>
 #include "base/logging.h"
 #include <google/malloc_extension.h>
@@ -80,12 +83,14 @@ int main(int argc, char** argv) {
   // Now do timing tests
   for (int i = 0; i < 5; i++) {
     static const int kIterations = 100000;
-#ifdef _WIN32
-    long long int tv_start = GetTickCount();
-#else
+#ifdef HAVE_SYS_RESOURCE_H
     struct rusage r;
     getrusage(RUSAGE_SELF, &r);    // figure out user-time spent on this
     struct timeval tv_start = r.ru_utime;
+#elif defined(_WIN32)
+    long long int tv_start = GetTickCount();
+#else
+# error No way to calculate time on your system
 #endif
 
     for (int i = 0; i < kIterations; i++) {
@@ -94,16 +99,18 @@ int main(int argc, char** argv) {
                                                       &s);
     }
 
-#ifdef _WIN32
+#ifdef HAVE_SYS_RESOURCE_H
+    getrusage(RUSAGE_SELF, &r);
+    struct timeval tv_end = r.ru_utime;
+    int64 sumsec = static_cast<int64>(tv_end.tv_sec) - tv_start.tv_sec;
+    int64 sumusec = static_cast<int64>(tv_end.tv_usec) - tv_start.tv_usec;
+#elif defined(_WIN32)
     long long int tv_end = GetTickCount();
     int64 sumsec = (tv_end - tv_start) / 1000;
     // Resolution in windows is only to the millisecond, alas
     int64 sumusec = ((tv_end - tv_start) % 1000) * 1000;
 #else
-    getrusage(RUSAGE_SELF, &r);
-    struct timeval tv_end = r.ru_utime;
-    int64 sumsec = static_cast<int64>(tv_end.tv_sec) - tv_start.tv_sec;
-    int64 sumusec = static_cast<int64>(tv_end.tv_usec) - tv_start.tv_usec;
+# error No way to calculate time on your system
 #endif
     fprintf(stderr, "getproperty: %6.1f ns/call\n",
             (sumsec * 1e9 + sumusec * 1e3) / kIterations);
diff --git a/src/tests/profile-handler_unittest.cc b/src/tests/profile-handler_unittest.cc
new file mode 100644
index 0000000..d780aac
--- /dev/null
+++ b/src/tests/profile-handler_unittest.cc
@@ -0,0 +1,445 @@
+// Copyright 2009 Google Inc. All Rights Reserved.
+// Author: Nabeel Mian (nabeelmian@google.com)
+//         Chris Demetriou (cgd@google.com)
+//
+// This file contains the unit tests for profile-handler.h interface.
+
+#include "config.h"
+#include "profile-handler.h"
+
+#include <assert.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "base/logging.h"
+#include "base/simple_mutex.h"
+
+// Some helpful macros for the test class
+#define EXPECT_TRUE(cond)  CHECK(cond)
+#define EXPECT_FALSE(cond) CHECK(!(cond))
+#define EXPECT_EQ(a, b)    CHECK_EQ(a, b)
+#define EXPECT_NE(a, b)    CHECK_NE(a, b)
+#define EXPECT_GT(a, b)    CHECK_GT(a, b)
+#define EXPECT_LT(a, b)    CHECK_LT(a, b)
+#define EXPECT_GE(a, b)    CHECK_GE(a, b)
+#define EXPECT_LE(a, b)    CHECK_LE(a, b)
+#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0)
+#define TEST_F(cls, fn)    void cls :: fn()
+
+namespace {
+
+// TODO(csilvers): error-checking on the pthreads routines
+class Thread {
+ public:
+  Thread() : joinable_(false) { }
+  void SetJoinable(bool value) { joinable_ = value; }
+  void Start() {
+    pthread_attr_t attr;
+    pthread_attr_init(&attr);
+    pthread_attr_setdetachstate(&attr, joinable_ ? PTHREAD_CREATE_JOINABLE
+                                                 : PTHREAD_CREATE_DETACHED);
+    pthread_create(&thread_, &attr, &DoRun, this);
+    pthread_attr_destroy(&attr);
+  }
+  void Join()  {
+    assert(joinable_);
+    pthread_join(thread_, NULL);
+  }
+  virtual void Run() = 0;
+ private:
+  static void* DoRun(void* cls) {
+    ProfileHandlerRegisterThread();
+    reinterpret_cast<Thread*>(cls)->Run();
+    return NULL;
+  }
+  pthread_t thread_;
+  bool joinable_;
+};
+
+// Sleep interval in usecs. To ensure a SIGPROF timer interrupt under heavy
+// load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz)
+// TODO(nabeelmian) Under very heavy loads, the worker thread may not accumulate
+// enough cpu usage to get a profile tick.
+int kSleepInterval = 200000;
+
+// Whether each thread has separate timers.
+static bool timer_separate_ = false;
+
+// Checks whether the profile timer is enabled for the current thread.
+bool IsTimerEnabled() {
+  itimerval current_timer;
+  EXPECT_EQ(0, getitimer(ITIMER_PROF, &current_timer));
+  return (current_timer.it_value.tv_sec != 0 ||
+          current_timer.it_value.tv_usec != 0);
+}
+
+class VirtualTimerGetterThread : public Thread {
+ public:
+  VirtualTimerGetterThread() {
+    memset(&virtual_timer_, 0, sizeof virtual_timer_);
+  }
+  struct itimerval virtual_timer_;
+
+ private:
+  void Run() {
+    CHECK_EQ(0, getitimer(ITIMER_VIRTUAL, &virtual_timer_));
+  }
+};
+
+// This function checks whether the timers are shared between thread. This
+// function spawns a thread, so use it carefully when testing thread-dependent
+// behaviour.
+static bool threads_have_separate_timers() {
+  struct itimerval new_timer_val;
+
+  // Enable the virtual timer in the current thread.
+  memset(&new_timer_val, 0, sizeof new_timer_val);
+  new_timer_val.it_value.tv_sec = 1000000;  // seconds
+  CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL));
+
+  // Spawn a thread, get the virtual timer's value there.
+  VirtualTimerGetterThread thread;
+  thread.SetJoinable(true);
+  thread.Start();
+  thread.Join();
+
+  // Disable timer here.
+  memset(&new_timer_val, 0, sizeof new_timer_val);
+  CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL));
+
+  bool target_timer_enabled = (thread.virtual_timer_.it_value.tv_sec != 0 ||
+                               thread.virtual_timer_.it_value.tv_usec != 0);
+  if (!target_timer_enabled) {
+    LOG(INFO, "threads have separate timers");
+    return true;
+  } else {
+    LOG(INFO, "threads have shared timers");
+    return false;
+  }
+}
+
+// Dummy worker thread to accumulate cpu time.
+class BusyThread : public Thread {
+ public:
+  BusyThread() : stop_work_(false) {
+  }
+
+  // Setter/Getters
+  bool stop_work() {
+    MutexLock lock(&mu_);
+    return stop_work_;
+  }
+  void set_stop_work(bool stop_work) {
+    MutexLock lock(&mu_);
+    stop_work_ = stop_work;
+  }
+
+ private:
+  // Protects stop_work_ below.
+  Mutex mu_;
+  // Whether to stop work?
+  bool stop_work_;
+
+  // Do work until asked to stop.
+  void Run() {
+    while (!stop_work()) {
+    }
+    // If timers are separate, check that timer is enabled for this thread.
+    EXPECT_TRUE(!timer_separate_ || IsTimerEnabled());
+  }
+};
+
+class NullThread : public Thread {
+ private:
+  void Run() {
+    // If timers are separate, check that timer is enabled for this thread.
+    EXPECT_TRUE(!timer_separate_ || IsTimerEnabled());
+  }
+};
+
+// Signal handler which tracks the profile timer ticks.
+static void TickCounter(int sig, siginfo_t* sig_info, void *vuc,
+                        void* tick_counter) {
+  int* counter = static_cast<int*>(tick_counter);
+  ++(*counter);
+}
+
+// This class tests the profile-handler.h interface.
+class ProfileHandlerTest {
+ protected:
+
+  // Determines whether threads have separate timers.
+  static void SetUpTestCase() {
+    timer_separate_ = threads_have_separate_timers();
+  }
+
+  // Sets up the profile timers and SIGPROF handler in a known state. It does
+  // the following:
+  // 1. Unregisters all the callbacks, stops the timer (if shared) and
+  //    clears out timer_sharing state in the ProfileHandler. This clears
+  //    out any state left behind by the previous test or during module
+  //    initialization when the test program was started.
+  // 2. Spawns two threads which will be registered with the ProfileHandler.
+  //    At this time ProfileHandler knows if the timers are shared.
+  // 3. Starts a busy worker thread to accumulate CPU usage.
+  virtual void SetUp() {
+    // Reset the state of ProfileHandler between each test. This unregisters
+    // all callbacks, stops timer (if shared) and clears timer sharing state.
+    ProfileHandlerReset();
+    EXPECT_EQ(GetCallbackCount(), 0);
+    VerifyDisabled();
+    // ProfileHandler requires at least two threads to be registerd to determine
+    // whether timers are shared.
+    RegisterThread();
+    RegisterThread();
+    // Now that two threads are started, verify that the signal handler is
+    // disabled and the timers are correctly enabled/disabled.
+    VerifyDisabled();
+    // Start worker to accumulate cpu usage.
+    StartWorker();
+  }
+
+  virtual void TearDown() {
+    ProfileHandlerReset();
+    // Stops the worker thread.
+    StopWorker();
+  }
+
+  // Starts a no-op thread that gets registered with the ProfileHandler. Waits
+  // for the thread to stop.
+  void RegisterThread() {
+    NullThread t;
+    t.SetJoinable(true);
+    t.Start();
+    t.Join();
+  }
+
+  // Starts a busy worker thread to accumulate cpu time. There should be only
+  // one busy worker running. This is required for the case where there are
+  // separate timers for each thread.
+  void StartWorker() {
+    busy_worker_ = new BusyThread();
+    busy_worker_->SetJoinable(true);
+    busy_worker_->Start();
+    // Wait for worker to start up and register with the ProfileHandler.
+    // TODO(nabeelmian) This may not work under very heavy load.
+    usleep(kSleepInterval);
+  }
+
+  // Stops the worker thread.
+  void StopWorker() {
+    busy_worker_->set_stop_work(true);
+    busy_worker_->Join();
+    delete busy_worker_;
+  }
+
+  // Checks whether SIGPROF signal handler is enabled.
+  bool IsSignalEnabled() {
+    struct sigaction sa;
+    CHECK_EQ(sigaction(SIGPROF, NULL, &sa), 0);
+    return ((sa.sa_handler == SIG_IGN) || (sa.sa_handler == SIG_DFL)) ?
+        false : true;
+  }
+
+  // Gets the number of callbacks registered with the ProfileHandler.
+  uint32 GetCallbackCount() {
+    ProfileHandlerState state;
+    ProfileHandlerGetState(&state);
+    return state.callback_count;
+  }
+
+  // Gets the current ProfileHandler interrupt count.
+  uint64 GetInterruptCount() {
+    ProfileHandlerState state;
+    ProfileHandlerGetState(&state);
+    return state.interrupts;
+  }
+
+  // Verifies that a callback is correctly registered and receiving
+  // profile ticks.
+  void VerifyRegistration(const int& tick_counter) {
+    // Check the callback count.
+    EXPECT_GT(GetCallbackCount(), 0);
+    // Check that the profile timer is enabled.
+    EXPECT_TRUE(IsTimerEnabled());
+    // Check that the signal handler is enabled.
+    EXPECT_TRUE(IsSignalEnabled());
+    uint64 interrupts_before = GetInterruptCount();
+    // Sleep for a bit and check that tick counter is making progress.
+    int old_tick_count = tick_counter;
+    usleep(kSleepInterval);
+    int new_tick_count = tick_counter;
+    EXPECT_GT(new_tick_count, old_tick_count);
+    uint64 interrupts_after = GetInterruptCount();
+    EXPECT_GT(interrupts_after, interrupts_before);
+  }
+
+  // Verifies that a callback is not receiving profile ticks.
+  void VerifyUnregistration(const int& tick_counter) {
+    // Sleep for a bit and check that tick counter is not making progress.
+    int old_tick_count = tick_counter;
+    usleep(kSleepInterval);
+    int new_tick_count = tick_counter;
+    EXPECT_EQ(new_tick_count, old_tick_count);
+    // If no callbacks, signal handler and shared timer should be disabled.
+    if (GetCallbackCount() == 0) {
+      EXPECT_FALSE(IsSignalEnabled());
+      if (timer_separate_) {
+        EXPECT_TRUE(IsTimerEnabled());
+      } else {
+        EXPECT_FALSE(IsTimerEnabled());
+      }
+    }
+  }
+
+  // Verifies that the SIGPROF interrupt handler is disabled and the timer,
+  // if shared, is disabled. Expects the worker to be running.
+  void VerifyDisabled() {
+    // Check that the signal handler is disabled.
+    EXPECT_FALSE(IsSignalEnabled());
+    // Check that the callback count is 0.
+    EXPECT_EQ(GetCallbackCount(), 0);
+    // Check that the timer is disabled if shared, enabled otherwise.
+    if (timer_separate_) {
+      EXPECT_TRUE(IsTimerEnabled());
+    } else {
+      EXPECT_FALSE(IsTimerEnabled());
+    }
+    // Verify that the ProfileHandler is not accumulating profile ticks.
+    uint64 interrupts_before = GetInterruptCount();
+    usleep(kSleepInterval);
+    uint64 interrupts_after = GetInterruptCount();
+    EXPECT_EQ(interrupts_after, interrupts_before);
+  }
+
+  // Busy worker thread to accumulate cpu usage.
+  BusyThread* busy_worker_;
+
+ private:
+  // The tests to run
+  void RegisterUnregisterCallback();
+  void MultipleCallbacks();
+  void Reset();
+  void RegisterCallbackBeforeThread();
+
+ public:
+#define RUN(test)  do {                         \
+    printf("Running %s\n", #test);              \
+    ProfileHandlerTest pht;                     \
+    pht.SetUp();                                \
+    pht.test();                                 \
+    pht.TearDown();                             \
+} while (0)
+
+  static int RUN_ALL_TESTS() {
+    SetUpTestCase();
+    RUN(RegisterUnregisterCallback);
+    RUN(MultipleCallbacks);
+    RUN(Reset);
+    RUN(RegisterCallbackBeforeThread);
+    printf("Done\n");
+    return 0;
+  }
+};
+
+// Verifies ProfileHandlerRegisterCallback and
+// ProfileHandlerUnregisterCallback.
+TEST_F(ProfileHandlerTest, RegisterUnregisterCallback) {
+  int tick_count = 0;
+  ProfileHandlerToken* token = ProfileHandlerRegisterCallback(
+      TickCounter, &tick_count);
+  VerifyRegistration(tick_count);
+  ProfileHandlerUnregisterCallback(token);
+  VerifyUnregistration(tick_count);
+}
+
+// Verifies that multiple callbacks can be registered.
+TEST_F(ProfileHandlerTest, MultipleCallbacks) {
+  // Register first callback.
+  int first_tick_count;
+  ProfileHandlerToken* token1 = ProfileHandlerRegisterCallback(
+      TickCounter, &first_tick_count);
+  // Check that callback was registered correctly.
+  VerifyRegistration(first_tick_count);
+  EXPECT_EQ(GetCallbackCount(), 1);
+
+  // Register second callback.
+  int second_tick_count;
+  ProfileHandlerToken* token2 = ProfileHandlerRegisterCallback(
+      TickCounter, &second_tick_count);
+  // Check that callback was registered correctly.
+  VerifyRegistration(second_tick_count);
+  EXPECT_EQ(GetCallbackCount(), 2);
+
+  // Unregister first callback.
+  ProfileHandlerUnregisterCallback(token1);
+  VerifyUnregistration(first_tick_count);
+  EXPECT_EQ(GetCallbackCount(), 1);
+  // Verify that second callback is still registered.
+  VerifyRegistration(second_tick_count);
+
+  // Unregister second callback.
+  ProfileHandlerUnregisterCallback(token2);
+  VerifyUnregistration(second_tick_count);
+  EXPECT_EQ(GetCallbackCount(), 0);
+
+  // Verify that the signal handler and timers are correctly disabled.
+  VerifyDisabled();
+}
+
+// Verifies ProfileHandlerReset
+TEST_F(ProfileHandlerTest, Reset) {
+  // Verify that the profile timer interrupt is disabled.
+  VerifyDisabled();
+  int first_tick_count;
+  ProfileHandlerRegisterCallback(TickCounter, &first_tick_count);
+  VerifyRegistration(first_tick_count);
+  EXPECT_EQ(GetCallbackCount(), 1);
+
+  // Register second callback.
+  int second_tick_count;
+  ProfileHandlerRegisterCallback(TickCounter, &second_tick_count);
+  VerifyRegistration(second_tick_count);
+  EXPECT_EQ(GetCallbackCount(), 2);
+
+  // Reset the profile handler and verify that callback were correctly
+  // unregistered and timer/signal are disabled.
+  ProfileHandlerReset();
+  VerifyUnregistration(first_tick_count);
+  VerifyUnregistration(second_tick_count);
+  VerifyDisabled();
+}
+
+// Verifies that ProfileHandler correctly handles a case where a callback was
+// registered before the second thread started.
+TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) {
+  // Stop the worker.
+  StopWorker();
+  // Unregister all existing callbacks, stop the timer (if shared), disable
+  // the signal handler and reset the timer sharing state in the Profile
+  // Handler.
+  ProfileHandlerReset();
+  EXPECT_EQ(GetCallbackCount(), 0);
+  VerifyDisabled();
+
+  // Start the worker. At this time ProfileHandler doesn't know if timers are
+  // shared as only one thread has registered so far.
+  StartWorker();
+  // Register a callback and check that profile ticks are being delivered.
+  int tick_count;
+  ProfileHandlerRegisterCallback(TickCounter, &tick_count);
+  EXPECT_EQ(GetCallbackCount(), 1);
+  VerifyRegistration(tick_count);
+
+  // Register a second thread and verify that timer and signal handler are
+  // correctly enabled.
+  RegisterThread();
+  EXPECT_EQ(GetCallbackCount(), 1);
+  EXPECT_TRUE(IsTimerEnabled());
+  EXPECT_TRUE(IsSignalEnabled());
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+  return ProfileHandlerTest::RUN_ALL_TESTS();
+}
diff --git a/src/tests/profiledata_unittest.cc b/src/tests/profiledata_unittest.cc
index 679b9e2..31ba3b6 100644
--- a/src/tests/profiledata_unittest.cc
+++ b/src/tests/profiledata_unittest.cc
@@ -54,6 +54,7 @@ using std::string;
 #define EXPECT_TRUE(cond)  CHECK(cond)
 #define EXPECT_FALSE(cond) CHECK(!(cond))
 #define EXPECT_EQ(a, b)    CHECK_EQ(a, b)
+#define EXPECT_NE(a, b)    CHECK_NE(a, b)
 #define EXPECT_GT(a, b)    CHECK_GT(a, b)
 #define EXPECT_LT(a, b)    CHECK_LT(a, b)
 #define EXPECT_GE(a, b)    CHECK_GE(a, b)
@@ -64,9 +65,41 @@ using std::string;
 
 namespace {
 
+template<typename T> class scoped_array {
+ public:
+  scoped_array(T* data) : data_(data) { }
+  ~scoped_array() { delete[] data_; }
+  T* get() { return data_; }
+  T& operator[](int i) { return data_[i]; }
+ private:
+  T* const data_;
+};
+
 // Re-runs fn until it doesn't cause EINTR.
 #define NO_INTR(fn)   do {} while ((fn) < 0 && errno == EINTR)
 
+// Read up to "count" bytes from file descriptor "fd" into the buffer
+// starting at "buf" while handling short reads and EINTR.  On
+// success, return the number of bytes read.  Otherwise, return -1.
+static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) {
+  CHECK_GE(fd, 0);
+  char *buf0 = reinterpret_cast<char *>(buf);
+  ssize_t num_bytes = 0;
+  while (num_bytes < count) {
+    ssize_t len;
+    NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes));
+    if (len < 0) {  // There was an error other than EINTR.
+      return -1;
+    }
+    if (len == 0) {  // Reached EOF.
+      break;
+    }
+    num_bytes += len;
+  }
+  CHECK(num_bytes <= count);
+  return num_bytes;
+}
+
 // Thin wrapper around a file descriptor so that the file descriptor
 // gets closed for sure.
 struct FileDescriptor {
@@ -87,6 +120,9 @@ typedef uintptr_t ProfileDataSlot;
 // sample.
 inline void* V(intptr_t x) { return reinterpret_cast<void*>(x); }
 
+// String returned by ProfileDataChecker helper functions to indicate success.
+const char kNoError[] = "";
+
 class ProfileDataChecker {
  public:
   ProfileDataChecker() {
@@ -99,35 +135,194 @@ class ProfileDataChecker {
 
   string filename() const { return filename_; }
 
-  void Check(const ProfileDataSlot* slots, int num_slots) {
-    CheckWithSkips(slots, num_slots, NULL, 0);
+  // Checks the first 'num_slots' profile data slots in the file
+  // against the data pointed to by 'slots'.  Returns kNoError if the
+  // data matched, otherwise returns an indication of the cause of the
+  // mismatch.
+  string Check(const ProfileDataSlot* slots, int num_slots) {
+    return CheckWithSkips(slots, num_slots, NULL, 0);
   }
 
-  void CheckWithSkips(const ProfileDataSlot* slots, int num_slots,
-                      const int* skips, int num_skips) {
-    FileDescriptor fd(open(filename_.c_str(), O_RDONLY));
-    CHECK_GE(fd.get(), 0);
-
-    ProfileDataSlot* filedata = new ProfileDataSlot[num_slots];
-    size_t expected_bytes = num_slots * sizeof filedata[0];
-    ssize_t bytes_read = read(fd.get(), filedata, expected_bytes);
-    CHECK_EQ(expected_bytes, bytes_read);
-
-    for (int i = 0; i < num_slots; i++) {
-      if (num_skips > 0 && *skips == i) {
-        num_skips--;
-        skips++;
-        continue;
-      }
-      CHECK_EQ(slots[i], filedata[i]);  // "first mismatch at slot " << i;
-    }
-    delete[] filedata;
-  }
+  // Checks the first 'num_slots' profile data slots in the file
+  // against the data pointed to by 'slots', skipping over entries
+  // described by 'skips' and 'num_skips'.
+  //
+  // 'skips' must be a sorted list of (0-based) slot numbers to be
+  // skipped, of length 'num_skips'.  Note that 'num_slots' includes
+  // any skipped slots, i.e., the first 'num_slots' profile data slots
+  // will be considered, but some may be skipped.
+  //
+  // Returns kNoError if the data matched, otherwise returns an
+  // indication of the cause of the mismatch.
+  string CheckWithSkips(const ProfileDataSlot* slots, int num_slots,
+                        const int* skips, int num_skips);
+
+  // Validate that a profile is correctly formed.  The profile is
+  // assumed to have been created by the same kind of binary (e.g.,
+  // same slot size, same endian, etc.) as is validating the profile.
+  //
+  // Returns kNoError if the profile appears valid, otherwise returns
+  // an indication of the problem with the profile.
+  string ValidateProfile();
 
  private:
   string filename_;
 };
 
+string ProfileDataChecker::CheckWithSkips(const ProfileDataSlot* slots,
+                                          int num_slots, const int* skips,
+                                          int num_skips) {
+  FileDescriptor fd(open(filename_.c_str(), O_RDONLY));
+  if (fd.get() < 0)
+    return "file open error";
+
+  scoped_array<ProfileDataSlot> filedata(new ProfileDataSlot[num_slots]);
+  size_t expected_bytes = num_slots * sizeof filedata[0];
+  ssize_t bytes_read = ReadPersistent(fd.get(), filedata.get(), expected_bytes);
+  if (expected_bytes != bytes_read)
+    return "file too small";
+
+  for (int i = 0; i < num_slots; i++) {
+    if (num_skips > 0 && *skips == i) {
+      num_skips--;
+      skips++;
+      continue;
+    }
+    if (slots[i] != filedata[i])
+      return "data mismatch";
+  }
+  return kNoError;
+}
+
+string ProfileDataChecker::ValidateProfile() {
+  FileDescriptor fd(open(filename_.c_str(), O_RDONLY));
+  if (fd.get() < 0)
+    return "file open error";
+
+  struct stat statbuf;
+  if (fstat(fd.get(), &statbuf) != 0)
+    return "fstat error";
+  if (statbuf.st_size != static_cast<ssize_t>(statbuf.st_size))
+    return "file impossibly large";
+  ssize_t filesize = statbuf.st_size;
+
+  scoped_array<char> filedata(new char[filesize]);
+  if (ReadPersistent(fd.get(), filedata.get(), filesize) != filesize)
+    return "read of whole file failed";
+
+  // Must have enough data for the header and the trailer.
+  if (filesize < (5 + 3) * sizeof(ProfileDataSlot))
+    return "not enough data in profile for header + trailer";
+
+  // Check the header
+  if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[0] != 0)
+    return "error in header: non-zero count";
+  if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[1] != 3)
+    return "error in header: num_slots != 3";
+  if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[2] != 0)
+    return "error in header: non-zero format version";
+  // Period (slot 3) can have any value.
+  if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[4] != 0)
+    return "error in header: non-zero padding value";
+  ssize_t cur_offset = 5 * sizeof(ProfileDataSlot);
+
+  // While there are samples, skip them.  Each sample consists of
+  // at least three slots.
+  bool seen_trailer = false;
+  while (!seen_trailer) {
+    if (cur_offset > filesize - 3 * sizeof(ProfileDataSlot))
+      return "truncated sample header";
+    ProfileDataSlot* sample =
+        reinterpret_cast<ProfileDataSlot*>(filedata.get() + cur_offset);
+    ProfileDataSlot slots_this_sample = 2 + sample[1];
+    ssize_t size_this_sample = slots_this_sample * sizeof(ProfileDataSlot);
+    if (cur_offset > filesize - size_this_sample)
+      return "truncated sample";
+
+    if (sample[0] == 0 && sample[1] == 1 && sample[2] == 0) {
+      seen_trailer = true;
+    } else {
+      if (sample[0] < 1)
+        return "error in sample: sample count < 1";
+      if (sample[1] < 1)
+        return "error in sample: num_pcs < 1";
+      for (int i = 2; i < slots_this_sample; i++) {
+        if (sample[i] == 0)
+          return "error in sample: NULL PC";
+      }
+    }
+    cur_offset += size_this_sample;
+  }
+
+  // There must be at least one line in the (text) list of mapped objects,
+  // and it must be terminated by a newline.  Note, the use of newline
+  // here and below Might not be reasonable on non-UNIX systems.
+  if (cur_offset >= filesize)
+    return "no list of mapped objects";
+  if (filedata[filesize - 1] != '\n')
+    return "profile did not end with a complete line";
+
+  while (cur_offset < filesize) {
+    char* line_start = filedata.get() + cur_offset;
+
+    // Find the end of the line, and replace it with a NUL for easier
+    // scanning.
+    char* line_end = strchr(line_start, '\n');
+    *line_end = '\0';
+
+    // Advance past any leading space.  It's allowed in some lines,
+    // but not in others.
+    bool has_leading_space = false;
+    char* line_cur = line_start;
+    while (*line_cur == ' ') {
+      has_leading_space = true;
+      line_cur++;
+    }
+
+    bool found_match = false;
+
+    // Check for build lines.
+    if (!found_match) {
+      found_match = (strncmp(line_cur, "build=", 6) == 0);
+      // Anything may follow "build=", and leading space is allowed.
+    }
+
+    // A line from ProcMapsIterator::FormatLine, of the form:
+    //
+    // 40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
+    //
+    // Leading space is not allowed.  The filename may be omitted or
+    // may consist of multiple words, so we scan only up to the
+    // space before the filename.
+    if (!found_match) {
+      int chars_scanned = -1;
+      sscanf(line_cur, "%*x-%*x %*c%*c%*c%*c %*x %*x:%*x %*d %n",
+             &chars_scanned);
+      found_match = (chars_scanned > 0 && !has_leading_space);
+    }
+
+    // A line from DumpAddressMap, of the form:
+    //
+    // 40000000-40015000: /lib/ld-2.3.2.so
+    //
+    // Leading space is allowed.  The filename may be omitted or may
+    // consist of multiple words, so we scan only up to the space
+    // before the filename.
+    if (!found_match) {
+      int chars_scanned = -1;
+      sscanf(line_cur, "%*x-%*x: %n", &chars_scanned);
+      found_match = (chars_scanned > 0);
+    }
+
+    if (!found_match)
+      return "unrecognized line in text section";
+
+    cur_offset += (line_end - line_start) + 1;
+  }
+
+  return kNoError;
+}
+
 class ProfileDataTest {
  protected:
   void ExpectStopped() {
@@ -162,6 +357,7 @@ class ProfileDataTest {
   void CollectOne();
   void CollectTwoMatching();
   void CollectTwoFlush();
+  void StartResetRestart();
 
  public:
 #define RUN(test)  do {                         \
@@ -178,6 +374,7 @@ class ProfileDataTest {
     RUN(CollectOne);
     RUN(CollectTwoMatching);
     RUN(CollectTwoFlush);
+    RUN(StartResetRestart);
     return 0;
   }
 };
@@ -226,7 +423,8 @@ TEST_F(ProfileDataTest, StartStopEmpty) {
   ExpectRunningSamples(0);
   collector_.Stop();
   ExpectStopped();
-  checker_.Check(slots, arraysize(slots));
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
 }
 
 // Start and Stop with no options, collecting no samples.  Verify
@@ -246,8 +444,10 @@ TEST_F(ProfileDataTest, StartStopNoOptionsEmpty) {
   ExpectRunningSamples(0);
   collector_.Stop();
   ExpectStopped();
-  checker_.CheckWithSkips(slots, arraysize(slots),
-                          slots_to_skip, arraysize(slots_to_skip));
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.CheckWithSkips(slots, arraysize(slots),
+                                              slots_to_skip,
+                                              arraysize(slots_to_skip)));
 }
 
 // Start after already started.  Should return false and not impact
@@ -275,7 +475,8 @@ TEST_F(ProfileDataTest, StartWhenStarted) {
 
   collector_.Stop();
   ExpectStopped();
-  checker_.Check(slots, arraysize(slots));
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
 }
 
 // Like StartStopEmpty, but uses a different file name and frequency.
@@ -293,7 +494,8 @@ TEST_F(ProfileDataTest, StartStopEmpty2) {
   ExpectRunningSamples(0);
   collector_.Stop();
   ExpectStopped();
-  checker_.Check(slots, arraysize(slots));
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
 }
 
 TEST_F(ProfileDataTest, CollectOne) {
@@ -316,7 +518,8 @@ TEST_F(ProfileDataTest, CollectOne) {
 
   collector_.Stop();
   ExpectStopped();
-  checker_.Check(slots, arraysize(slots));
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
 }
 
 TEST_F(ProfileDataTest, CollectTwoMatching) {
@@ -341,7 +544,8 @@ TEST_F(ProfileDataTest, CollectTwoMatching) {
 
   collector_.Stop();
   ExpectStopped();
-  checker_.Check(slots, arraysize(slots));
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
 }
 
 TEST_F(ProfileDataTest, CollectTwoFlush) {
@@ -370,7 +574,41 @@ TEST_F(ProfileDataTest, CollectTwoFlush) {
 
   collector_.Stop();
   ExpectStopped();
-  checker_.Check(slots, arraysize(slots));
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
+}
+
+// Start then reset, verify that the result is *not* a valid profile.
+// Then start again and make sure the result is OK.
+TEST_F(ProfileDataTest, StartResetRestart) {
+  ExpectStopped();
+  ProfileData::Options options;
+  options.set_frequency(1);
+  EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options));
+  ExpectRunningSamples(0);
+  collector_.Reset();
+  ExpectStopped();
+  // We expect the resulting file to be empty.  This is a minimal test
+  // of ValidateProfile.
+  EXPECT_NE(kNoError, checker_.ValidateProfile());
+
+  struct stat statbuf;
+  EXPECT_EQ(0, stat(checker_.filename().c_str(), &statbuf));
+  EXPECT_EQ(0, statbuf.st_size);
+
+  const int frequency = 2;  // Different frequency than used above.
+  ProfileDataSlot slots[] = {
+    0, 3, 0, 1000000 / frequency, 0,    // binary header
+    0, 1, 0                             // binary trailer
+  };
+
+  options.set_frequency(frequency);
+  EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options));
+  ExpectRunningSamples(0);
+  collector_.Stop();
+  ExpectStopped();
+  EXPECT_EQ(kNoError, checker_.ValidateProfile());
+  EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots)));
 }
 
 }  // namespace
diff --git a/src/tests/sampling_test.cc b/src/tests/sampling_test.cc
index 6845574..b75e70e 100644
--- a/src/tests/sampling_test.cc
+++ b/src/tests/sampling_test.cc
@@ -64,7 +64,7 @@ int main(int argc, char** argv) {
     fprintf(stderr, "USAGE: %s <base of output files>\n", argv[0]);
     exit(1);
   }
-  for (int i = 0; i < 9000; i++) {
+  for (int i = 0; i < 8000; i++) {
     AllocateAllocate();
   }
 
diff --git a/src/tests/sampling_test.sh b/src/tests/sampling_test.sh
index 9e45f67..149d27b 100755
--- a/src/tests/sampling_test.sh
+++ b/src/tests/sampling_test.sh
@@ -52,7 +52,15 @@ OUTDIR="/tmp/sampling_test_dir"
 # libtool is annoying, and puts the actual executable in a different
 # directory, replacing the seeming-executable with a shell script.
 # We use the error output of sampling_test to indicate its real location
-SAMPLING_TEST_BINARY=`"$SAMPLING_TEST" 2>&1 | awk '{print $2; exit;}'`
+SAMPLING_TEST_BINARY=`"$SAMPLING_TEST" 2>&1 | awk '/USAGE/ {print $2; exit;}'`
+
+# A kludge for cygwin.  Unfortunately, 'test -f' says that 'foo' exists
+# even when it doesn't, and only foo.exe exists.  Other unix utilities
+# (like nm) need you to say 'foo.exe'.  We use one such utility, cat, to
+# see what the *real* binary name is.
+if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then
+  SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe
+fi
 
 die() {
     echo "FAILED"
@@ -66,20 +74,20 @@ rm -rf "$OUTDIR" || die "Unable to delete $OUTDIR"
 mkdir "$OUTDIR" || die "Unable to create $OUTDIR"
 
 # This puts the output into out.heap and out.growth.  It allocates
-# 9*10^7 bytes of memory, which is 85M.  Because we sample, the
+# 8*10^7 bytes of memory, which is 76M.  Because we sample, the
 # estimate may be a bit high or a bit low: we accept anything from
-# 70M to 99M.
+# 50M to 99M.
 "$SAMPLING_TEST" "$OUTDIR/out"
 
 echo -n "Testing heap output..."
 "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \
-   | grep '^ *[7-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
+   | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
    || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"`
 echo "OK"
 
 echo -n "Testing growth output..."
 "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \
-   | grep '^ *[7-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
+   | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \
    || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"`
 echo "OK"
 
diff --git a/src/tests/testutil.cc b/src/tests/testutil.cc
index 6643443..f2b8592 100644
--- a/src/tests/testutil.cc
+++ b/src/tests/testutil.cc
@@ -99,7 +99,9 @@ extern "C" void RunManyThreadsWithId(void (*fn)(int), int count, int) {
 
 #elif defined(_WIN32)
 
+#ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN  /* We always want minimal includes */
+#endif
 #include <windows.h>
 
 extern "C" {
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index d2b0c4f..a1fdf0f 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -489,16 +489,23 @@ void ThreadCache::RecomputePerThreadCacheSize() {
   double ratio = space / max<double>(1, per_thread_cache_size_);
   size_t claimed = 0;
   for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    // Don't circumvent the slow-start growth of max_size_ by increasing
-    // the total cache size.
-    if (!use_dynamic_cache_size_ || ratio < 1.0) {
-      h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
+    if (use_dynamic_cache_size_) {
+      // Don't circumvent the slow-start growth of max_size_ by increasing the
+      // total cache size.
+      if (ratio < 1.0) {
+        h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
+      }
+    } else {
+      // Don't try to be clever and multiply by 'ratio' because rounding
+      // errors will eventually cause long-lived threads to have zero
+      // max_size_.
+      h->max_size_ = space;
     }
     claimed += h->max_size_;
   }
   unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
   per_thread_cache_size_ = space;
-  //MESSAGE("Threads %d => cache size %8d\n", n, int(space));
+  //  TCMalloc_MESSAGE(__FILE__, __LINE__, "Threads %d => cache size %8d\n", n, int(space));
 }
 
 void ThreadCache::Print(TCMalloc_Printer* out) const {
diff --git a/src/windows/addr2line-pdb.c b/src/windows/addr2line-pdb.c
index 5384731..97b614b 100644
--- a/src/windows/addr2line-pdb.c
+++ b/src/windows/addr2line-pdb.c
@@ -45,7 +45,6 @@
 #include <windows.h>
 #include <dbghelp.h>
 
-
 #define SEARCH_CAP (1024*1024)
 #define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols"
 
diff --git a/src/windows/config.h b/src/windows/config.h
index 6be561e..2811296 100644
--- a/src/windows/config.h
+++ b/src/windows/config.h
@@ -12,6 +12,13 @@
 #ifndef GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_
 #define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_
 
+/* define this if you are linking tcmalloc statically and overriding the
+ * default allocators.
+ * For instructions on how to use this mode, see
+ * http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b
+ */
+#undef WIN32_OVERRIDE_ALLOCATORS
+
 /* the location of <hash_map> */
 #define HASH_MAP_H  <hash_map>
 
@@ -21,6 +28,9 @@
 /* the location of <hash_set> */
 #define HASH_SET_H  <hash_set>
 
+/* Define to 1 if your libc has a snprintf implementation */
+#undef HAVE_SNPRINTF
+
 /* Define to 1 if compiler supports __builtin_stack_pointer */
 #undef HAVE_BUILTIN_STACK_POINTER
 
@@ -162,9 +172,6 @@
 /* Define to 1 if you have the <unwind.h> header file. */
 #undef HAVE_UNWIND_H
 
-/* Define to 1 if you have the <windows.h> header file. */
-#define HAVE_WINDOWS_H 1
-
 /* define if your compiler has __attribute__ */
 #undef HAVE___ATTRIBUTE__
 
@@ -177,6 +184,9 @@
 /* Define to 1 if int32_t is equivalent to intptr_t */
 #undef INT32_EQUALS_INTPTR
 
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+
 /* Name of package */
 #undef PACKAGE
 
diff --git a/src/windows/mingw.h b/src/windows/mingw.h
index 1745723..e69b5da 100644
--- a/src/windows/mingw.h
+++ b/src/windows/mingw.h
@@ -47,6 +47,8 @@
 
 #include "windows/port.h"
 
+#define HAVE_SNPRINTF 1
+
 #endif  /* __MINGW32__ */
 
 #endif  /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */
diff --git a/src/windows/nm-pdb.c b/src/windows/nm-pdb.c
index ec0ddf9..726d345 100644
--- a/src/windows/nm-pdb.c
+++ b/src/windows/nm-pdb.c
@@ -41,6 +41,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>   // for _strdup
 
 #include <windows.h>
 #include <dbghelp.h>
diff --git a/src/windows/override_functions.cc b/src/windows/override_functions.cc
new file mode 100644
index 0000000..2ad6bbc
--- /dev/null
+++ b/src/windows/override_functions.cc
@@ -0,0 +1,118 @@
+// Copyright (c) 2007, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// ---
+// Author: Mike Belshe
+// 
+// To link tcmalloc into a EXE or DLL statically without using the patching
+// facility, we can take a stock libcmt and remove all the allocator functions.
+// When we relink the EXE/DLL with the modified libcmt and tcmalloc, a few
+// functions are missing.  This file contains the additional overrides which
+// are required in the VS2005 libcmt in order to link the modified libcmt.
+//
+// See also
+// http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b
+
+#include "config.h"
+
+#ifndef _WIN32
+# error You should only be including this file in a windows environment!
+#endif
+
+#ifndef WIN32_OVERRIDE_ALLOCATORS
+# error This file is intended for use when overriding allocators
+#endif
+
+#include "tcmalloc.cc"
+
+extern "C" void* _recalloc(void* p, size_t n, size_t size) {
+  void* result = realloc(p, n * size);
+  memset(result, 0, n * size);
+  return result;
+}
+
+extern "C" void* _calloc_impl(size_t n, size_t size) {
+  return calloc(n, size);
+}
+
+extern "C" size_t _msize(void* p) {
+  return MallocExtension::instance()->GetAllocatedSize(p);
+}
+
+extern "C" intptr_t _get_heap_handle() {
+  return 0;
+}
+
+// The CRT heap initialization stub.
+extern "C" int _heap_init() {
+  // We intentionally leak this object.  It lasts for the process
+  // lifetime.  Trying to teardown at _heap_term() is so late that
+  // you can't do anything useful anyway.
+  new TCMallocGuard();
+  return 1;
+}
+
+// The CRT heap cleanup stub.
+extern "C" void _heap_term() {
+}
+
+#ifndef NDEBUG
+#undef malloc
+#undef free
+#undef calloc
+int _CrtDbgReport(int, const char*, int, const char*, const char*, ...) {
+  return 0;
+}
+
+int _CrtDbgReportW(int, const wchar_t*, int, const wchar_t*, const wchar_t*, ...) {
+  return 0;
+}
+
+int _CrtSetReportMode(int, int) {
+  return 0;
+}
+
+extern "C" void* _malloc_dbg(size_t size, int , const char*, int) {
+  return malloc(size);
+}
+
+extern "C" void _free_dbg(void* ptr, int) {
+  free(ptr);
+}
+
+extern "C" void* _calloc_dbg(size_t n, size_t size, int, const char*, int) {
+  return calloc(n, size);
+}
+#endif  // NDEBUG
+
+// We set this to 1 because part of the CRT uses a check of _crtheap != 0
+// to test whether the CRT has been initialized.  Once we've ripped out
+// the allocators from libcmt, we need to provide this definition so that
+// the rest of the CRT is still usable.
+extern "C" void* _crtheap = reinterpret_cast<void*>(1);
diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc
index d73c064..ff5acad 100644
--- a/src/windows/patch_functions.cc
+++ b/src/windows/patch_functions.cc
@@ -28,7 +28,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // ---
-// Author: Craig Silversteion
+// Author: Craig Silverstein
 //
 // The main purpose of this file is to patch the libc allocation
 // routines (malloc and friends, but also _msize and other
@@ -67,6 +67,11 @@
 #endif
 
 #include "config.h"
+
+#ifdef WIN32_OVERRIDE_ALLOCATORS
+#error This file is intended for patching allocators - use override_functions.cc instead.
+#endif
+
 #include <windows.h>
 #include <malloc.h>       // for _msize and _expand
 #include <tlhelp32.h>     // for CreateToolhelp32Snapshot()
@@ -476,7 +481,7 @@ void LibcInfoWithPatchFunctions<T>::Unpatch() {
 }
 
 void WindowsInfo::Patch() {
-  HMODULE hkernel32 = ::GetModuleHandle("kernel32");
+  HMODULE hkernel32 = ::GetModuleHandleA("kernel32");
   CHECK_NE(hkernel32, NULL);
 
   // Unlike for libc, we know these exist in our module, so we can get
@@ -693,8 +698,10 @@ void* LibcInfoWithPatchFunctions<T>::Perftools_realloc(
                           (void (*)(void*))origstub_fn_[kFree]);
     return NULL;
   }
-  return do_realloc_with_callback(old_ptr, new_size, ((void* (*)(void*, size_t))
-                                                      origstub_fn_[kRealloc]));
+  return do_realloc_with_callback(
+      old_ptr, new_size,
+      (void (*)(void*))origstub_fn_[kFree],
+      (size_t (*)(void*))origstub_fn_[k_Msize]);
 }
 
 template<int T>
@@ -772,28 +779,7 @@ void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow(
 
 template<int T>
 size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW {
-  // Get the size of the old entry
-  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
-  size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
-  Span *span = NULL;
-  size_t old_size;
-  if (cl == 0) {
-    span = Static::pageheap()->GetDescriptor(p);
-    if (!span) {
-      // This can happen on windows because some constructors may
-      // construct things before tcmalloc hooks _msize().
-      return ((size_t (*)(void*))origstub_fn_[k_Msize])(ptr);
-    }
-    cl = span->sizeclass;
-    Static::pageheap()->CacheSizeClass(p, cl);
-  }
-  if (cl != 0) {
-    old_size = Static::sizemap()->ByteSizeForClass(cl);
-  } else {
-    ASSERT(span != NULL);
-    old_size = span->length << kPageShift;
-  }
-  return old_size;
+  return GetSizeWithCallback(ptr, (size_t (*)(void*))origstub_fn_[k_Msize]);
 }
 
 // We need to define this because internal windows functions like to
diff --git a/src/windows/port.cc b/src/windows/port.cc
index 7cb3a57..0f1a700 100644
--- a/src/windows/port.cc
+++ b/src/windows/port.cc
@@ -55,8 +55,7 @@ int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
   return _vsnprintf(str, size-1, format, ap);
 }
 
-// mingw defines its own snprintf, though msvc does not
-#ifndef __MINGW32__
+#ifndef HAVE_SNPRINTF
 int snprintf(char *str, size_t size, const char *format, ...) {
   va_list ap;
   va_start(ap, format);
diff --git a/src/windows/port.h b/src/windows/port.h
index e5b9b5f..50866ec 100644
--- a/src/windows/port.h
+++ b/src/windows/port.h
@@ -49,7 +49,9 @@
 
 #ifdef _WIN32
 
+#ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN  /* We always want minimal includes */
+#endif
 #include <windows.h>
 #include <io.h>              /* because we so often use open/close/etc */
 #include <stdarg.h>          /* for va_list */
author	csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>	2009-04-18 00:02:25 +0000
committer	csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>	2009-04-18 00:02:25 +0000
commit	beb6a9a183c1ca25c99e4401b58266ce73b8c846 (patch)
tree	b02a2cfe46761e177303c1dbaf420f7cfb14642f /src
parent	edd03a831f350bc72d76d4fad2b390d43faccb79 (diff)
download	gperftools-beb6a9a183c1ca25c99e4401b58266ce73b8c846.tar.gz