diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2009-04-18 00:02:25 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2009-04-18 00:02:25 +0000 |
commit | beb6a9a183c1ca25c99e4401b58266ce73b8c846 (patch) | |
tree | b02a2cfe46761e177303c1dbaf420f7cfb14642f /src | |
parent | edd03a831f350bc72d76d4fad2b390d43faccb79 (diff) | |
download | gperftools-beb6a9a183c1ca25c99e4401b58266ce73b8c846.tar.gz |
Fri Apr 17 16:40:48 2009 Google Inc. <opensource@google.com>
* google-perftools: version 1.2 release
* Allow large_alloc_threshold=0 to turn it off entirely (csilvers)
* Die more helpfully when out of memory for internal data (csilvers)
* Refactor profile-data gathering, add a new unittest (cgd, nabeelmian)
* BUGFIX: fix rounding errors with static thread-size caches (addi)
* BUGFIX: disable hooks better when forking in leak-checker (csilvers)
* BUGFIX: fix realloc of crt pointers on windows (csilvers)
* BUGFIX: do a better job of finding binaries in .sh tests (csilvers)
* WINDOWS: allow overriding malloc/etc instead of patching (mbelshe)
* PORTING: fix compilation error in a ppc-specific file (csilvers)
* PORTING: deal with quirks in cygwin's /proc/self/maps (csilvers)
* PORTING: use 'A' version of functions for ascii input (mbelshe)
* PORTING: generate .so's on cygwin and mingw (ajenjo)
* PORTING: disable profiler methods on cygwin (jperkins)
* Updated autoconf version to 2.61 and libtool version to 1.5.26
git-svn-id: http://gperftools.googlecode.com/svn/trunk@68 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src')
38 files changed, 1931 insertions, 567 deletions
diff --git a/src/base/atomicops-internals-linuxppc.h b/src/base/atomicops-internals-linuxppc.h index dcf143e..d8e23fe 100644 --- a/src/base/atomicops-internals-linuxppc.h +++ b/src/base/atomicops-internals-linuxppc.h @@ -407,9 +407,4 @@ inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { } // namespace base::subtle } // namespace base -// NOTE(vchen): The following is also deprecated. New callers should use -// the base::subtle namespace. -inline void MemoryBarrier() { - base::subtle::MemoryBarrier(); -} #endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ diff --git a/src/base/logging.cc b/src/base/logging.cc index 2f56fce..a68401c 100644 --- a/src/base/logging.cc +++ b/src/base/logging.cc @@ -39,7 +39,7 @@ DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0), "--verbose == -4 means we log fatal errors only."); -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) // While windows does have a POSIX-compatible API // (_open/_write/_close), it acquires memory. Using this lower-level @@ -49,8 +49,8 @@ RawFD RawOpenForWriting(const char* filename) { // that ever becomes a problem then we ought to compute the absolute // path on its behalf (perhaps the ntdll/kernel function isn't aware // of the working directory?) - RawFD fd = CreateFile(filename, GENERIC_WRITE, 0, NULL, - CREATE_ALWAYS, 0, NULL); + RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL, + CREATE_ALWAYS, 0, NULL); if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS) SetEndOfFile(fd); // truncate the existing file return fd; @@ -71,7 +71,7 @@ void RawClose(RawFD handle) { CloseHandle(handle); } -#else // _WIN32 +#else // _WIN32 || __CYGWIN__ || __CYGWIN32__ #ifdef HAVE_SYS_TYPES_H #include <sys/types.h> @@ -104,4 +104,4 @@ void RawClose(RawFD fd) { NO_INTR(close(fd)); } -#endif // _WIN32 +#endif // _WIN32 || __CYGWIN__ || __CYGWIN32__ diff --git a/src/base/logging.h b/src/base/logging.h index 77ee988..bc1a4c2 100644 --- a/src/base/logging.h +++ b/src/base/logging.h @@ -208,14 +208,14 @@ inline void LOG_IF(int lvl, bool cond, const char* pat, ...) { // to allow even more low-level stuff in the future. // Like other "raw" routines, these functions are best effort, and // thus don't return error codes (except RawOpenForWriting()). -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) #include <windows.h> typedef HANDLE RawFD; const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE; #else typedef int RawFD; const RawFD kIllegalRawFD = -1; // what open returns if it fails -#endif // _WIN32 +#endif // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) RawFD RawOpenForWriting(const char* filename); // uses default permissions void RawWrite(RawFD fd, const char* buf, size_t len); diff --git a/src/base/simple_mutex.h b/src/base/simple_mutex.h index d59f5a0..0eed34f 100644 --- a/src/base/simple_mutex.h +++ b/src/base/simple_mutex.h @@ -95,8 +95,10 @@ #if defined(NO_THREADS) typedef int MutexType; // to keep a lock-count -#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__) -# define WIN32_LEAN_AND_MEAN // We only need minimal includes +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN // We only need minimal includes +# endif // We need Windows NT or later for TryEnterCriticalSection(). If you // don't need that functionality, you can remove these _WIN32_WINNT // lines, and change TryLock() to assert(0) or something. @@ -152,7 +154,7 @@ class Mutex { inline void SetIsSafe() { is_safe_ = true; } // Catch the error of writing Mutex when intending MutexLock. - Mutex(Mutex *ignored) {} + Mutex(Mutex* /*ignored*/) {} // Disallow "evil" constructors Mutex(const Mutex&); void operator=(const Mutex&); @@ -180,7 +182,7 @@ bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; } void Mutex::ReaderLock() { assert(++mutex_ > 0); } void Mutex::ReaderUnlock() { assert(mutex_-- > 0); } -#elif defined(_WIN32) || defined(__CYGWIN32__) || defined(__CYGWIN64__) +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) Mutex::Mutex() { InitializeCriticalSection(&mutex_); SetIsSafe(); } Mutex::~Mutex() { DeleteCriticalSection(&mutex_); } @@ -206,7 +208,8 @@ Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy); } void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock); } void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } bool Mutex::TryLock() { return is_safe_ ? - pthread_rwlock_trywrlock(&mutex_) == 0 : true; } + pthread_rwlock_trywrlock(&mutex_) == 0 : + true; } void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock); } void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } #undef SAFE_PTHREAD diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc index 1f542ae..a6bd3a0 100644 --- a/src/base/sysinfo.cc +++ b/src/base/sysinfo.cc @@ -31,6 +31,10 @@ // Author: Mike Burrows #include "config.h" +#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32) +# define OS_WINDOWS 1 +#endif + #include <stdlib.h> // for getenv() #include <stdio.h> // for snprintf(), sscanf() #include <string.h> // for memmove(), memchr(), etc. @@ -48,7 +52,7 @@ #include <sys/sysctl.h> #elif defined __sun__ // Solaris #include <procfs.h> // for, e.g., prmap_t -#elif defined(_WIN32) || defined(__MINGW32__) +#elif defined(OS_WINDOWS) #include <process.h> // for getpid() (actually, _getpid()) #include <shlwapi.h> // for SHGetValueA() #include <tlhelp32.h> // for Module32First() @@ -58,7 +62,7 @@ #include "base/logging.h" #include "base/cycleclock.h" -#ifdef _WIN32 +#ifdef OS_WINDOWS #ifdef MODULEENTRY32 // In a change from the usual W-A pattern, there is no A variant of // MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. @@ -75,7 +79,7 @@ #ifndef TH32CS_SNAPMODULE32 #define TH32CS_SNAPMODULE32 0 #endif /* TH32CS_SNAPMODULE32 */ -#endif /* _WIN32 */ +#endif /* OS_WINDOWS */ // Re-run fn until it doesn't cause EINTR. #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) @@ -177,7 +181,7 @@ static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous static int cpuinfo_num_cpus = 1; // Conservative guess static void SleepForMilliseconds(int milliseconds) { -#ifdef _WIN32 +#ifdef OS_WINDOWS _sleep(milliseconds); // Windows's _sleep takes milliseconds argument #else // Sleep for a few milliseconds @@ -334,7 +338,7 @@ static void InitializeSystemInfo() { } // TODO(csilvers): also figure out cpuinfo_num_cpus -#elif defined(_WIN32) || defined(__MINGW32__) +#elif defined(OS_WINDOWS) # pragma comment(lib, "shlwapi.lib") // for SHGetValue() // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. @@ -410,7 +414,7 @@ bool HasPosixThreads() { if (confstr(_CS_GNU_LIBPTHREAD_VERSION, buf, sizeof(buf)) == 0) return false; return strncmp(buf, "NPTL", 4) == 0; -#elif defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) || defined(__CYGWIN32__) +#elif defined(OS_WINDOWS) || defined(__CYGWIN__) || defined(__CYGWIN32__) return false; #else // other OS return true; // Assume that everything else has Posix @@ -492,7 +496,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, #elif defined(__MACH__) current_image_ = _dyld_image_count(); // count down from the top current_load_cmd_ = -1; -#elif defined(_WIN32) || defined(__MINGW32__) +#elif defined(OS_WINDOWS) snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | TH32CS_SNAPMODULE32, GetCurrentProcessId()); @@ -504,7 +508,7 @@ void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, } ProcMapsIterator::~ProcMapsIterator() { -#if defined(_WIN32) || defined(__MINGW32__) +#if defined(OS_WINDOWS) if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_); #elif defined(__MACH__) // no cleanup necessary! @@ -515,7 +519,7 @@ ProcMapsIterator::~ProcMapsIterator() { } bool ProcMapsIterator::Valid() const { -#if defined(_WIN32) || defined(__MINGW32__) +#if defined(OS_WINDOWS) return snapshot_ != INVALID_HANDLE_VALUE; #elif defined(__MACH__) return 1; @@ -579,7 +583,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, int64 tmpinode; int major, minor; unsigned filename_offset = 0; -#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) +#if defined(__linux__) // for now, assume all linuxes have the same format if (sscanf(stext_, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n", start ? start : &tmpstart, @@ -588,6 +592,24 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, offset ? offset : &tmpoffset, &major, &minor, inode ? inode : &tmpinode, &filename_offset) != 7) continue; +#elif defined(__CYGWIN__) || defined(__CYGWIN32__) + // cygwin is like linux, except the third field is the "entry point" + // rather than the offset (see format_process_maps at + // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src + // Offset is always be 0 on cygwin: cygwin implements an mmap + // by loading the whole file and then calling NtMapViewOfSection. + // Cygwin also seems to set its flags kinda randomly; use windows default. + char tmpflags[5]; + if (offset) + *offset = 0; + strcpy(flags_, "r-xp"); + if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n", + start ? start : &tmpstart, + end ? end : &tmpend, + tmpflags, + &tmpoffset, + &major, &minor, + inode ? inode : &tmpinode, &filename_offset) != 7) continue; #elif defined(__FreeBSD__) // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup tmpstart = tmpend = tmpoffset = 0; @@ -722,7 +744,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, // If we get here, no more load_cmd's in this image talk about // segments. Go on to the next image. } -#elif defined(_WIN32) || defined(__MINGW32__) +#elif defined(OS_WINDOWS) static char kDefaultPerms[5] = "r-xp"; BOOL ok; if (module_.dwSize == 0) { // only possible before first call diff --git a/src/base/sysinfo.h b/src/base/sysinfo.h index 86d998c..fb276eb 100644 --- a/src/base/sysinfo.h +++ b/src/base/sysinfo.h @@ -39,7 +39,7 @@ #include "config.h" #include <time.h> -#if defined(_WIN32) || defined(__MINGW32__) +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) #include <windows.h> // for DWORD #include <TlHelp32.h> // for CreateToolhelp32Snapshot #endif @@ -190,7 +190,7 @@ class ProcMapsIterator { char *etext_; // end of text char *nextline_; // start of next line char *ebuf_; // end of buffer (1 char for a nul) -#if defined(_WIN32) || defined(__MINGW32__) +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) HANDLE snapshot_; // filehandle on dll info // In a change from the usual W-A pattern, there is no A variant of // MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. diff --git a/src/config.h.in b/src/config.h.in index d225d49..bfac21c 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -150,9 +150,6 @@ /* Define to 1 if you have the <unwind.h> header file. */ #undef HAVE_UNWIND_H -/* Define to 1 if you have the <windows.h> header file. */ -#undef HAVE_WINDOWS_H - /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ @@ -165,6 +162,9 @@ /* Define to 1 if int32_t is equivalent to intptr_t */ #undef INT32_EQUALS_INTPTR +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + /* Name of package */ #undef PACKAGE diff --git a/src/google/heap-checker.h b/src/google/heap-checker.h index 89a2512..acedd46 100644 --- a/src/google/heap-checker.h +++ b/src/google/heap-checker.h @@ -120,41 +120,22 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { // has been called at least once). ~HeapLeakChecker(); - // Return true iff the heap does not have more objects allocated - // w.r.t. its state at the time of our construction. - // This does full pprof heap change checking and reporting. - // To detect tricky leaks it depends on correct working pprof implementation - // referred by FLAGS_heap_profile_pprof. - // (By 'tricky leaks' we mean a change of heap state that e.g. for SameHeap - // preserves the number of allocated objects and bytes - // -- see TestHeapLeakCheckerTrick in heap-checker_unittest.cc -- - // and thus is not detected by BriefNoLeaks.) - // CAVEAT: pprof will do no checking over stripped binaries - // (our automatic test binaries are stripped) - // NOTE: All *NoLeaks() and *SameHeap() methods can be called many times - // to check for leaks at different end-points in program's execution. - bool NoLeaks() { return DoNoLeaks(NO_LEAKS, USE_PPROF, PPROF_REPORT); } - - // Return true iff the heap does not seem to have more objects allocated - // w.r.t. its state at the time of our construction - // by looking at the number of objects & bytes allocated. - // This also tries to do pprof reporting of detected leaks. - bool QuickNoLeaks() { return DoNoLeaks(NO_LEAKS, USE_COUNTS, PPROF_REPORT); } - - // Return true iff the heap does not seem to have more objects allocated - // w.r.t. its state at the time of our construction - // by looking at the number of objects & bytes allocated. - // This does not try to use pprof at all. - bool BriefNoLeaks() { return DoNoLeaks(NO_LEAKS, USE_COUNTS, NO_REPORT); } - - // These are similar to their *NoLeaks counterparts, - // but they in addition require no negative leaks, - // i.e. the state of the heap must be exactly the same - // as at the time of our construction. - bool SameHeap() { return DoNoLeaks(SAME_HEAP, USE_PPROF, PPROF_REPORT); } - bool QuickSameHeap() - { return DoNoLeaks(SAME_HEAP, USE_COUNTS, PPROF_REPORT); } - bool BriefSameHeap() { return DoNoLeaks(SAME_HEAP, USE_COUNTS, NO_REPORT); } + // These used to be different but are all the same now: they return + // true iff all memory allocated since this HeapLeakChecker object + // was constructor is still reachable from global state. + // + // Because we fork to convert addresses to symbol-names, and forking + // is not thread-safe, and we may be called in a threaded context, + // we do not try to symbolize addresses when called manually. + bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); } + + // These forms are obsolete; use NoLeaks() instead. + // TODO(csilvers): mark with ATTRIBUTE_DEPRECATED. + bool QuickNoLeaks() { return NoLeaks(); } + bool BriefNoLeaks() { return NoLeaks(); } + bool SameHeap() { return NoLeaks(); } + bool QuickSameHeap() { return NoLeaks(); } + bool BriefSameHeap() { return NoLeaks(); } // Detailed information about the number of leaked bytes and objects // (both of these can be negative as well). @@ -231,15 +212,10 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { // Helper for constructors void Create(const char *name, bool make_start_snapshot); - // Types for DoNoLeaks and its helpers. - enum CheckType { SAME_HEAP, NO_LEAKS }; - enum CheckFullness { USE_PPROF, USE_COUNTS }; - enum ReportMode { PPROF_REPORT, NO_REPORT }; + enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE }; // Helper for *NoLeaks and *SameHeap - bool DoNoLeaks(CheckType check_type, - CheckFullness fullness, - ReportMode report_mode); + bool DoNoLeaks(ShouldSymbolize should_symbolize); // These used to be public, but they are now deprecated. // Will remove entirely when all internal uses are fixed. diff --git a/src/google/profiler.h b/src/google/profiler.h index be7dbf3..74b936f 100644 --- a/src/google/profiler.h +++ b/src/google/profiler.h @@ -146,9 +146,7 @@ PERFTOOLS_DLL_DECL void ProfilerDisable(); /* Returns nonzero if profile is currently enabled, zero if it's not. */ PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads(); -/* Routine for registering new threads with the profiler. This routine - * is called by the Thread module in google3/thread whenever a new - * thread is created. +/* Routine for registering new threads with the profiler. */ PERFTOOLS_DLL_DECL void ProfilerRegisterThread(); diff --git a/src/heap-checker.cc b/src/heap-checker.cc index ef37df2..4c446c1 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -124,9 +124,7 @@ DEFINE_string(heap_check, " or the empty string are the supported choices. " "(See HeapLeakChecker::InternalInitStart for details.)"); -DEFINE_bool(heap_check_report, - EnvToBool("HEAP_CHECK_REPORT", true), - "If overall heap check should report the found leaks via pprof"); +DEFINE_bool(heap_check_report, true, "Obsolete"); DEFINE_bool(heap_check_before_constructors, true, @@ -137,13 +135,7 @@ DEFINE_bool(heap_check_after_destructors, "If overall heap check is to end after global destructors " "or right after all REGISTER_HEAPCHECK_CLEANUP's"); -DEFINE_bool(heap_check_strict_check, - EnvToBool("HEAP_CHECK_STRICT_CHECK", true), - "If overall heap check is to be done " - "via HeapLeakChecker::*SameHeap " - "or HeapLeakChecker::*NoLeaks call"); - // heap_check_strict_check == false - // is useful only when heap_check_before_constructors == false +DEFINE_bool(heap_check_strict_check, true, "Obsolete"); DEFINE_bool(heap_check_ignore_global_live, EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true), @@ -264,6 +256,9 @@ static const int heap_checker_info_level = 0; // The larger it can be, the lesser is the chance of missing real leaks. static const size_t kPointerSourceAlignment = sizeof(void*); +// Cancel our InitialMallocHook_* if present. +static void CancelInitialMallocHooks(); // defined below + //---------------------------------------------------------------------- // HeapLeakChecker's own memory allocator that is // independent of the normal program allocator. @@ -573,11 +568,13 @@ enum StackDirection { // Determine which way the stack grows: -static StackDirection ATTRIBUTE_NOINLINE GetStackDirection() { - if (__builtin_frame_address(0) > __builtin_frame_address(1)) - return GROWS_TOWARDS_HIGH_ADDRESSES; - if (__builtin_frame_address(0) < __builtin_frame_address(1)) +static StackDirection ATTRIBUTE_NOINLINE GetStackDirection( + const uintptr_t *const ptr) { + uintptr_t x; + if (&x < ptr) return GROWS_TOWARDS_LOW_ADDRESSES; + if (ptr < &x) + return GROWS_TOWARDS_HIGH_ADDRESSES; RAW_CHECK(0, ""); // Couldn't determine the stack direction. @@ -597,7 +594,7 @@ static void RegisterStackLocked(const void* top_ptr) { // make sure stack_direction is initialized if (stack_direction == UNKNOWN_DIRECTION) { - stack_direction = GetStackDirection(); + stack_direction = GetStackDirection(&top); } // Find memory region with this stack @@ -1454,7 +1451,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) { //---------------------------------------------------------------------- char* HeapLeakChecker::MakeProfileNameLocked() { - RAW_DCHECK(lock_.IsHeld(), ""); + RAW_DCHECK(lock_->IsHeld(), ""); RAW_DCHECK(heap_checker_lock.IsHeld(), ""); const int len = profile_name_prefix->size() + strlen(name_) + 5 + strlen(HeapProfileTable::kFileExt) + 1; @@ -1596,14 +1593,23 @@ static void SuggestPprofCommand(const char* pprof_file_arg) { ); } -bool HeapLeakChecker::DoNoLeaks(CheckType check_type, - CheckFullness fullness, - ReportMode report_mode) { +bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { SpinLockHolder l(lock_); // The locking also helps us keep the messages // for the two checks close together. SpinLockHolder al(&alignment_checker_lock); + // thread-safe: protected by alignment_checker_lock + static bool have_disabled_hooks_for_symbolize = false; + // Once we've checked for leaks and symbolized the results once, it's + // not safe to do it again. This is because in order to symbolize + // safely, we had to disable all the malloc hooks here, so we no + // longer can be confident we've collected all the data we need. + if (have_disabled_hooks_for_symbolize) { + RAW_LOG(FATAL, "Must not call heap leak checker manually after " + " program-exit's automatic check."); + } + HeapProfileTable::Snapshot* leaks = NULL; char* pprof_file = NULL; @@ -1709,7 +1715,20 @@ bool HeapLeakChecker::DoNoLeaks(CheckType check_type, int64(stats.allocs - stats.frees), int64(stats.alloc_size - stats.free_size)); } else { - leaks->ReportLeaks(name_, pprof_file); + if (should_symbolize == SYMBOLIZE) { + // To turn addresses into symbols, we need to fork, which is a + // problem if both parent and child end up trying to call the + // same malloc-hooks we've set up, at the same time. To avoid + // trouble, we turn off the hooks before symbolizing. Note that + // this makes it unsafe to ever leak-report again! Luckily, we + // typically only want to report once in a program's run, at the + // very end. + CancelInitialMallocHooks(); + have_disabled_hooks_for_symbolize = true; + leaks->ReportLeaks(name_, pprof_file, true); // true = should_symbolize + } else { + leaks->ReportLeaks(name_, pprof_file, false); + } if (FLAGS_heap_check_identify_leaks) { leaks->ReportIndividualObjects(); } @@ -1854,7 +1873,6 @@ static bool internal_init_start_has_run = false; // (ignore more) FLAGS_heap_check_after_destructors = false; // to after cleanup // (most data is live) - FLAGS_heap_check_strict_check = false; // < profile check (ignore more) FLAGS_heap_check_ignore_thread_live = true; // ignore all live FLAGS_heap_check_ignore_global_live = true; // ignore all live } else if (FLAGS_heap_check == "normal") { @@ -1862,7 +1880,6 @@ static bool internal_init_start_has_run = false; FLAGS_heap_check_before_constructors = true; // from no profile (fast) FLAGS_heap_check_after_destructors = false; // to after cleanup // (most data is live) - FLAGS_heap_check_strict_check = true; // == profile check (fast) FLAGS_heap_check_ignore_thread_live = true; // ignore all live FLAGS_heap_check_ignore_global_live = true; // ignore all live } else if (FLAGS_heap_check == "strict") { @@ -1871,7 +1888,6 @@ static bool internal_init_start_has_run = false; FLAGS_heap_check_before_constructors = true; // from no profile (fast) FLAGS_heap_check_after_destructors = true; // to after destructors // (less data live) - FLAGS_heap_check_strict_check = true; // == profile check (fast) FLAGS_heap_check_ignore_thread_live = true; // ignore all live FLAGS_heap_check_ignore_global_live = true; // ignore all live } else if (FLAGS_heap_check == "draconian") { @@ -1879,7 +1895,6 @@ static bool internal_init_start_has_run = false; FLAGS_heap_check_before_constructors = true; // from no profile (fast) FLAGS_heap_check_after_destructors = true; // to after destructors // (need them) - FLAGS_heap_check_strict_check = true; // == profile check (fast) FLAGS_heap_check_ignore_thread_live = false; // no live flood (stricter) FLAGS_heap_check_ignore_global_live = false; // no live flood (stricter) } else if (FLAGS_heap_check == "as-is") { @@ -1983,6 +1998,7 @@ bool HeapLeakChecker::DoMainHeapCheck() { RAW_DCHECK(heap_checker_pid == getpid(), ""); do_main_heap_check = false; // will do it now; no need to do it more } + if (!NoGlobalLeaks()) { if (FLAGS_heap_check_identify_leaks) { RAW_LOG(FATAL, "Whole-program memory leaks found."); @@ -2005,15 +2021,14 @@ bool HeapLeakChecker::NoGlobalLeaks() { // we never delete or change main_heap_checker once it's set: HeapLeakChecker* main_hc = GlobalChecker(); if (main_hc) { - CheckType check_type = FLAGS_heap_check_strict_check ? SAME_HEAP : NO_LEAKS; - if (FLAGS_heap_check_before_constructors) check_type = SAME_HEAP; - // NO_LEAKS here just would make it slower in this case - // (we don't use the starting profile anyway) - CheckFullness fullness = check_type == NO_LEAKS ? USE_PPROF : USE_COUNTS; - // use pprof if it can help ignore false leaks - ReportMode report_mode = FLAGS_heap_check_report ? PPROF_REPORT : NO_REPORT; RAW_VLOG(1, "Checking for whole-program memory leaks"); - return main_hc->DoNoLeaks(check_type, fullness, report_mode); + // The program is over, so it's safe to symbolize addresses (which + // requires a fork) because no serious work is expected to be done + // after this. Symbolizing is really useful -- knowing what + // function has a leak is better than knowing just an address -- + // and while we can only safely symbolize once in a program run, + // now is the time (after all, there's no "later" that would be better). + return main_hc->DoNoLeaks(SYMBOLIZE); } return true; } @@ -2034,9 +2049,6 @@ void HeapLeakChecker::CancelGlobalCheck() { static bool in_initial_malloc_hook = false; -// Cancel our InitialMallocHook_* if present. -static void CancelInitialMallocHooks(); // defined below - #ifdef HAVE___ATTRIBUTE__ // we need __attribute__((weak)) for this to work #define INSTALLED_INITIAL_MALLOC_HOOKS diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc index aaa4a2f..4d0ad8b 100644 --- a/src/heap-profile-table.cc +++ b/src/heap-profile-table.cc @@ -306,7 +306,7 @@ int HeapProfileTable::UnparseBucket(const Bucket& b, return buflen; } -HeapProfileTable::Bucket** +HeapProfileTable::Bucket** HeapProfileTable::MakeSortedBucketList() const { Bucket** list = reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_)); @@ -602,7 +602,8 @@ static bool Symbolize(void *pc, char *out, int out_size) { } void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name, - const char* filename) { + const char* filename, + bool should_symbolize) { // This is only used by the heap leak checker, but is intimately // tied to the allocation map that belongs in this module and is // therefore placed here. @@ -644,7 +645,8 @@ void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name, for (int j = 0; j < e.bucket->depth; j++) { const void* pc = e.bucket->stack[j]; const char* sym; - if (Symbolize(const_cast<void*>(pc), sym_buffer, sizeof(sym_buffer))) { + if (should_symbolize && + Symbolize(const_cast<void*>(pc), sym_buffer, sizeof(sym_buffer))) { sym = sym_buffer; } else { sym = ""; diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h index acbe14b..92d237e 100644 --- a/src/heap-profile-table.h +++ b/src/heap-profile-table.h @@ -335,9 +335,12 @@ class HeapProfileTable::Snapshot { // Report anything in this snapshot as a leak. // May use new/delete for temporary storage. + // If should_symbolize is true, will fork (which is not threadsafe) + // to turn addresses into symbol names. Set to false for maximum safety. // Also writes a heap profile to "filename" that contains // all of the objects in this snapshot. - void ReportLeaks(const char* checker_name, const char* filename); + void ReportLeaks(const char* checker_name, const char* filename, + bool should_symbolize); // Report the addresses of all leaked objects. // May use new/delete for temporary storage. diff --git a/src/page_heap_allocator.h b/src/page_heap_allocator.h index 1911bc5..20e1ab1 100644 --- a/src/page_heap_allocator.h +++ b/src/page_heap_allocator.h @@ -63,7 +63,11 @@ class PageHeapAllocator { if (free_avail_ < kAlignedSize) { // Need more room free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); - CHECK_CONDITION(free_area_ != NULL); + if (free_area_ == NULL) { + CRASH("FATAL ERROR: Out of memory trying to allocate internal " + "tcmalloc data (%d bytes, object-size %d)\n", + kAllocIncrement, static_cast<int>(sizeof(T))); + } free_avail_ = kAllocIncrement; } result = free_area_; @@ -72,7 +72,7 @@ use strict; use warnings; use Getopt::Long; -my $PPROF_VERSION = "1.1"; +my $PPROF_VERSION = "1.2"; # These are the object tools we use which can come from a # user-specified location using --tools, from the PPROF_TOOLS @@ -649,6 +649,7 @@ sub InteractiveMode { while (1) { print "(pprof) "; $_ = <STDIN>; + last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines # Save some flags that might be reset by InteractiveCommand() @@ -919,7 +920,7 @@ sub PrintCallgrind { map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; [$_, $1, $2] } keys %$calls ) { - my $count = $calls->{$call}; + my $count = int($calls->{$call}); $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; my ( $caller_file, $caller_line, $caller_function, $callee_file, $callee_line, $callee_function ) = @@ -1714,6 +1715,22 @@ sub IsSecondPcAlwaysTheSame { return $second_pc; } +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + # Extracts a graph of calls. sub ExtractCalls { my $symbols = shift; @@ -1722,20 +1739,13 @@ sub ExtractCalls { my $calls = {}; while( my ($stack_trace, $count) = each %$profile ) { my @address = split(/\n/, $stack_trace); + my $destination = ExtractSymbolLocation($symbols, $address[0]); + AddEntry($calls, $destination, $count); for (my $i = 1; $i <= $#address; $i++) { - # TODO(csilvers): what should we do if $addresses[$i-1] doesn't exist? - if (exists $symbols->{$address[$i]}) { - my $source = $symbols->{$address[$i]}->[1] . ":" . - $symbols->{$address[$i]}->[0]; - my $destination = $symbols->{$address[$i-1]}->[1] . ":" . - $symbols->{$address[$i-1]}->[0]; - my $call = "$source -> $destination"; - AddEntry($calls, $call, $count); - - if ($i == 1) { - AddEntry($calls, $destination, $count); - } - } + my $source = ExtractSymbolLocation($symbols, $address[$i]); + my $call = "$source -> $destination"; + AddEntry($calls, $call, $count); + $destination = $source; } } @@ -2938,7 +2948,7 @@ sub ParseLibraries { my $finish; my $offset; my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|exe)(\.\d+)*\w*)/i) { + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib)(\.\d+)*\w*)/i) { # Full line from /proc/self/maps. Example: # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so $start = HexExtend($1); @@ -3250,6 +3260,7 @@ sub GetLineNumbersViaAddr2Line { $count++; } close(SYMBOLS); + return $count; } sub GetLineNumbers { @@ -3268,20 +3279,15 @@ sub GetLineNumbers { # Pass to addr2line my $addr2line = $obj_tool_map{"addr2line"}; - GetLineNumbersViaAddr2Line("$addr2line -f -C -e $image", - $pclist, - $symbols); - - # If the executable is an MS Windows PDB-format executable, we'll - # have set up obj_tool_map{"addr2line_pdb"}. In this case, we - # actually want to use both unix addr2line and windows-specific - # addr2line_pdb, since PDB-format executables can apparently include - # dwarf .o files. + my @addr2line_commands = ("$addr2line -f -C -e $image"); if (exists $obj_tool_map{"addr2line_pdb"}) { my $addr2line_pdb = $obj_tool_map{"addr2line_pdb"}; - GetLineNumbersViaAddr2Line("$addr2line_pdb --demangle -f -C -e $image", - $pclist, - $symbols); + push(@addr2line_commands, "$addr2line_pdb --demangle -f -C -e $image"); + } + foreach my $addr2line_command (@addr2line_commands) { + if (GetLineNumbersViaAddr2Line("$addr2line_command", $pclist, $symbols)) { + last; + } } } @@ -3550,16 +3556,33 @@ sub GetProcedureBoundaries { my $cppfilt = $obj_tool_map{"c++filt"}; # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm - # binary doesn't support --demangle. For the first, we try with -D - # to at least get *exported* symbols. For the second, we use c++filt - # instead of --demangle. (c++filt is less reliable though, because it - # might interpret nm meta-data as c++ symbols and try to demangle it :-/) - my @nm_commands = ("$nm -n --demangle $image 2>/dev/null", - "$nm -n $image 2>&1 | $cppfilt", - "$nm -D -n --demangle $image 2>/dev/null", - "$nm -D -n $image 2>&1 | $cppfilt", - "$nm -n $image 2>/dev/null", - "$nm -D -n $image 2>/dev/null"); + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + if (system("$nm --demangle $image >/dev/null 2>&1") == 0) { + # In this mode, we do "nm --demangle <foo>" + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) { + # In this mode, we do "nm <foo> | c++filt" + $cppfilt_flag = " | $cppfilt"; + }; + my $flatten_flag = ""; + if (system("$nm -f $image >/dev/null 2>&1") == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag", + "$nm -D -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag"); # If the executable is an MS Windows PDB-format executable, we'll # have set up obj_tool_map("nm_pdb"). In this case, we actually # want to use both unix nm and windows-specific nm_pdb, since diff --git a/src/profile-handler.cc b/src/profile-handler.cc new file mode 100644 index 0000000..0a9f54c --- /dev/null +++ b/src/profile-handler.cc @@ -0,0 +1,498 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Nabeel Mian +// +// Implements management of profile timers and the corresponding signal handler. + +#include "config.h" +#include "profile-handler.h" + +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + +#include <stdio.h> +#include <errno.h> +#include <sys/time.h> + +#include <list> +#include <string> + +#include "base/dynamic_annotations.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "maybe_threads.h" + +using std::list; +using std::string; + +// This structure is used by ProfileHandlerRegisterCallback and +// ProfileHandlerUnregisterCallback as a handle to a registered callback. +struct ProfileHandlerToken { + // Sets the callback and associated arg. + ProfileHandlerToken(ProfileHandlerCallback cb, void* cb_arg) + : callback(cb), + callback_arg(cb_arg) { + } + + // Callback function to be invoked on receiving a profile timer interrupt. + ProfileHandlerCallback callback; + // Argument for the callback function. + void* callback_arg; +}; + +// This class manages profile timers and associated signal handler. This is a +// a singleton. +class ProfileHandler { + public: + // Registers the current thread with the profile handler. On systems which + // have a separate interval timer for each thread, this function starts the + // timer for the current thread. + // + // The function also attempts to determine whether or not timers are shared by + // all threads in the process. (With LinuxThreads, and with NPTL on some + // Linux kernel versions, each thread has separate timers.) + // + // Prior to determining whether timers are shared, this function will + // unconditionally start the timer. However, if this function determines + // that timers are shared, then it will stop the timer if no callbacks are + // currently registered. + void RegisterThread(); + + // Registers a callback routine to receive profile timer ticks. The returned + // token is to be used when unregistering this callback and must not be + // deleted by the caller. Registration of the first callback enables the + // SIGPROF handler. + ProfileHandlerToken* RegisterCallback(ProfileHandlerCallback callback, + void* callback_arg); + + // Unregisters a previously registered callback. Expects the token returned + // by the corresponding RegisterCallback routine. Unregistering the last + // callback disables the SIGPROF handler. + void UnregisterCallback(ProfileHandlerToken* token); + + // Unregisters all the callbacks, stops the timer if shared, disables the + // SIGPROF handler and clears the timer_sharing_ state. + void Reset(); + + // Gets the current state of profile handler. + void GetState(ProfileHandlerState* state); + + // Initializes and returns the ProfileHandler singleton. + static ProfileHandler* Instance(); + + private: + ProfileHandler(); + ~ProfileHandler(); + + // Largest allowed frequency. + static const int32 kMaxFrequency = 4000; + // Default frequency. + static const int32 kDefaultFrequency = 100; + + // ProfileHandler singleton. + static ProfileHandler* instance_; + + // pthread_once_t for one time initialization of ProfileHandler singleton. + static pthread_once_t once_; + + // Initializes the ProfileHandler singleton via GoogleOnceInit. + static void Init(); + + // Counts the number of SIGPROF interrupts received. + int64 interrupts_ GUARDED_BY(signal_lock_); + + // SIGPROF interrupt frequency, read-only after construction. + int32 frequency_; + + // Counts the number of callbacks registered. + int32 callback_count_ GUARDED_BY(control_lock_); + + // Whether or not the threading system provides interval timers that are + // shared by all threads in a process. + enum { + // No timer initialization attempted yet. + TIMERS_UNTOUCHED, + // First thread has registered and set timer. + TIMERS_ONE_SET, + // Timers are shared by all threads. + TIMERS_SHARED, + // Timers are separate in each thread. + TIMERS_SEPARATE + } timer_sharing_ GUARDED_BY(control_lock_); + + // This lock serializes the registration of threads and protects the + // callbacks_ list below. + // Locking order: + // In the context of a signal handler, acquire signal_lock_ to walk the + // callback list. Otherwise, acquire control_lock_, disable the signal + // handler and then acquire signal_lock_. + SpinLock control_lock_ ACQUIRED_BEFORE(signal_lock_); + SpinLock signal_lock_; + + // Holds the list of registered callbacks. We expect the list to be pretty + // small. Currently, the cpu profiler (base/profiler) and thread module + // (base/thread.h) are the only two components registering callbacks. + // Following are the locking requirements for callbacks_: + // For read-write access outside the SIGPROF handler: + // - Acquire control_lock_ + // - Disable SIGPROF handler. + // - Acquire signal_lock_ + // For read-only access in the context of SIGPROF handler + // (Read-write access is *not allowed* in the SIGPROF handler) + // - Acquire signal_lock_ + // For read-only access outside SIGPROF handler: + // - Acquire control_lock_ + typedef list<ProfileHandlerToken*> CallbackList; + typedef CallbackList::iterator CallbackIterator; + CallbackList callbacks_ GUARDED_BY(signal_lock_); + + // Starts the interval timer. If the thread library shares timers between + // threads, this function starts the shared timer. Otherwise, this will start + // the timer in the current thread. + void StartTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Stops the interval timer. If the thread library shares timers between + // threads, this fucntion stops the shared timer. Otherwise, this will stop + // the timer in the current thread. + void StopTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Returns true if the profile interval timer is enabled in the current + // thread. This actually checks the kernel's interval timer setting. (It is + // used to detect whether timers are shared or separate.) + bool IsTimerRunning() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Sets the timer interrupt signal handler. + void EnableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Disables (ignores) the timer interrupt signal. + void DisableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // SIGPROF handler. Iterate over and call all the registered callbacks. + static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext); + + DISALLOW_EVIL_CONSTRUCTORS(ProfileHandler); +}; + +ProfileHandler* ProfileHandler::instance_ = NULL; +pthread_once_t ProfileHandler::once_ = PTHREAD_ONCE_INIT; + +const int32 ProfileHandler::kMaxFrequency; +const int32 ProfileHandler::kDefaultFrequency; + +// If we are LD_PRELOAD-ed against a non-pthreads app, then +// pthread_once won't be defined. We declare it here, for that +// case (with weak linkage) which will cause the non-definition to +// resolve to NULL. We can then check for NULL or not in Instance. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif +extern "C" int pthread_once(pthread_once_t *, void (*)(void)) + __THROW ATTRIBUTE_WEAK; + +void ProfileHandler::Init() { + instance_ = new ProfileHandler(); +} + +ProfileHandler* ProfileHandler::Instance() { + if (pthread_once) { + pthread_once(&once_, Init); + } + if (instance_ == NULL) { + // This will be true on systems that don't link in pthreads, + // including on FreeBSD where pthread_once has a non-zero address + // (but doesn't do anything) even when pthreads isn't linked in. + Init(); + assert(instance_ != NULL); + } + return instance_; +} + +ProfileHandler::ProfileHandler() + : interrupts_(0), + callback_count_(0), + timer_sharing_(TIMERS_UNTOUCHED) { + SpinLockHolder cl(&control_lock_); + // Get frequency of interrupts (if specified) + char junk; + const char* fr = getenv("CPUPROFILE_FREQUENCY"); + if (fr != NULL && (sscanf(fr, "%u%c", &frequency_, &junk) == 1) && + (frequency_ > 0)) { + // Limit to kMaxFrequency + frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_; + } else { + frequency_ = kDefaultFrequency; + } + + // Ignore signals until we decide to turn profiling on. (Paranoia; + // should already be ignored.) + DisableHandler(); +} + +ProfileHandler::~ProfileHandler() { + Reset(); +} + +void ProfileHandler::RegisterThread() { + SpinLockHolder cl(&control_lock_); + + // We try to detect whether timers are being shared by setting a + // timer in the first call to this function, then checking whether + // it's set in the second call. + // + // Note that this detection method requires that the first two calls + // to RegisterThread must be made from different threads. (Subsequent + // calls will see timer_sharing_ set to either TIMERS_SEPARATE or + // TIMERS_SHARED, and won't try to detect the timer sharing type.) + // + // Also note that if timer settings were inherited across new thread + // creation but *not* shared, this approach wouldn't work. That's + // not an issue for any Linux threading implementation, and should + // not be a problem for a POSIX-compliant threads implementation. + switch (timer_sharing_) { + case TIMERS_UNTOUCHED: + StartTimer(); + timer_sharing_ = TIMERS_ONE_SET; + break; + case TIMERS_ONE_SET: + // If the timer is running, that means that the main thread's + // timer setup is seen in this (second) thread -- and therefore + // that timers are shared. + if (IsTimerRunning()) { + timer_sharing_ = TIMERS_SHARED; + // If callback is already registered, we have to keep the timer + // running. If not, we disable the timer here. + if (callback_count_ == 0) { + StopTimer(); + } + } else { + timer_sharing_ = TIMERS_SEPARATE; + StartTimer(); + } + break; + case TIMERS_SHARED: + // Nothing needed. + break; + case TIMERS_SEPARATE: + StartTimer(); + break; + } +} + +ProfileHandlerToken* ProfileHandler::RegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + ProfileHandlerToken* token = new ProfileHandlerToken(callback, callback_arg); + + SpinLockHolder cl(&control_lock_); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); + callbacks_.push_back(token); + } + // Start the timer if timer is shared and this is a first callback. + if ((callback_count_ == 0) && (timer_sharing_ == TIMERS_SHARED)) { + StartTimer(); + } + ++callback_count_; + EnableHandler(); + return token; +} + +void ProfileHandler::UnregisterCallback(ProfileHandlerToken* token) { + SpinLockHolder cl(&control_lock_); + for (CallbackIterator it = callbacks_.begin(); it != callbacks_.end(); + ++it) { + if ((*it) == token) { + RAW_CHECK(callback_count_ > 0, "Invalid callback count"); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); + delete *it; + callbacks_.erase(it); + } + --callback_count_; + if (callback_count_ > 0) { + EnableHandler(); + } else if (timer_sharing_ == TIMERS_SHARED) { + StopTimer(); + } + return; + } + } + // Unknown token. + RAW_LOG(FATAL, "Invalid token"); +} + +void ProfileHandler::Reset() { + SpinLockHolder cl(&control_lock_); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); + CallbackIterator it = callbacks_.begin(); + while (it != callbacks_.end()) { + CallbackIterator tmp = it; + ++it; + delete *tmp; + callbacks_.erase(tmp); + } + } + callback_count_ = 0; + if (timer_sharing_ == TIMERS_SHARED) { + StopTimer(); + } + timer_sharing_ = TIMERS_UNTOUCHED; +} + +void ProfileHandler::GetState(ProfileHandlerState* state) { + SpinLockHolder cl(&control_lock_); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); // Protects interrupts_. + state->interrupts = interrupts_; + } + if (callback_count_ > 0) { + EnableHandler(); + } + state->frequency = frequency_; + state->callback_count = callback_count_; +} + +void ProfileHandler::StartTimer() { + struct itimerval timer; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 1000000 / frequency_; + timer.it_value = timer.it_interval; + setitimer(ITIMER_PROF, &timer, 0); +} + +void ProfileHandler::StopTimer() { + struct itimerval timer; + memset(&timer, 0, sizeof timer); + setitimer(ITIMER_PROF, &timer, 0); +} + +bool ProfileHandler::IsTimerRunning() { + struct itimerval current_timer; + RAW_CHECK(0 == getitimer(ITIMER_PROF, ¤t_timer), "getitimer"); + return (current_timer.it_value.tv_sec != 0 || + current_timer.it_value.tv_usec != 0); +} + +void ProfileHandler::EnableHandler() { + struct sigaction sa; + sa.sa_sigaction = SignalHandler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (enable)"); +} + +void ProfileHandler::DisableHandler() { + struct sigaction sa; + sa.sa_handler = SIG_IGN; + sa.sa_flags = SA_RESTART; + sigemptyset(&sa.sa_mask); + RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (disable)"); +} + +void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) { + int saved_errno = errno; + RAW_CHECK(instance_ != NULL, "ProfileHandler is not initialized"); + { + SpinLockHolder sl(&instance_->signal_lock_); + ++instance_->interrupts_; + for (CallbackIterator it = instance_->callbacks_.begin(); + it != instance_->callbacks_.end(); + ++it) { + (*it)->callback(sig, sinfo, ucontext, (*it)->callback_arg); + } + } + errno = saved_errno; +} + +// The sole purpose of this class is to initialize the ProfileHandler singleton +// when the global static objects are created. Note that the main thread will +// be registered at this time. +class ProfileHandlerInitializer { + public: + ProfileHandlerInitializer() { + ProfileHandler::Instance()->RegisterThread(); + } + + private: + DISALLOW_EVIL_CONSTRUCTORS(ProfileHandlerInitializer); +}; +// ProfileHandlerInitializer singleton +static ProfileHandlerInitializer profile_handler_initializer; + +extern "C" void ProfileHandlerRegisterThread() { + ProfileHandler::Instance()->RegisterThread(); +} + +extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return ProfileHandler::Instance()->RegisterCallback(callback, callback_arg); +} + +extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { + ProfileHandler::Instance()->UnregisterCallback(token); +} + +extern "C" void ProfileHandlerReset() { + return ProfileHandler::Instance()->Reset(); +} + +extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) { + ProfileHandler::Instance()->GetState(state); +} + +#else // OS_CYGWIN + +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +extern "C" void ProfileHandlerRegisterThread() { +} + +extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return NULL; +} + +extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { +} + +extern "C" void ProfileHandlerReset() { +} + +extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) { +} + +#endif // OS_CYGWIN diff --git a/src/profile-handler.h b/src/profile-handler.h new file mode 100644 index 0000000..1cbe253 --- /dev/null +++ b/src/profile-handler.h @@ -0,0 +1,147 @@ +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Nabeel Mian + * + * This module manages the cpu profile timers and the associated interrupt + * handler. When enabled, all registered threads in the program are profiled. + * (Note: if using linux 2.4 or earlier, you must use the Thread class, in + * google3/thread, to ensure all threads are profiled.) + * + * Any component interested in receiving a profile timer interrupt can do so by + * registering a callback. All registered callbacks must be async-signal-safe. + * + * Note: This module requires the sole ownership of ITIMER_PROF timer and the + * SIGPROF signal. + */ + +#ifndef BASE_PROFILE_HANDLER_H_ +#define BASE_PROFILE_HANDLER_H_ + +#include "config.h" +#include <signal.h> +#ifdef COMPILER_MSVC +#include "conflict-signal.h" +#endif +#include "base/basictypes.h" + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward declaration. */ +struct ProfileHandlerToken; + +/* + * Callback function to be used with ProfilefHandlerRegisterCallback. This + * function will be called in the context of SIGPROF signal handler and must + * be async-signal-safe. The first three arguments are the values provided by + * the SIGPROF signal handler. We use void* to avoid using ucontext_t on + * non-POSIX systems. + * + * Requirements: + * - Callback must be async-signal-safe. + * - None of the functions in ProfileHandler are async-signal-safe. Therefore, + * callback function *must* not call any of the ProfileHandler functions. + * - Callback is not required to be re-entrant. At most one instance of + * callback can run at a time. + * + * Notes: + * - The SIGPROF signal handler saves and restores errno, so the callback + * doesn't need to. + * - Callback code *must* not acquire lock(s) to serialize access to data shared + * with the code outside the signal handler (callback must be + * async-signal-safe). If such a serialization is needed, follow the model + * used by profiler.cc: + * + * When code other than the signal handler modifies the shared data it must: + * - Acquire lock. + * - Unregister the callback with the ProfileHandler. + * - Modify shared data. + * - Re-register the callback. + * - Release lock. + * and the callback code gets a lockless, read-write access to the data. + */ +typedef void (*ProfileHandlerCallback)(int sig, siginfo_t* sig_info, + void* ucontext, void* callback_arg); + +/* + * Registers a new thread with profile handler and should be called only once + * per thread. The main thread is registered at program startup. This routine + * is called by the Thread module in google3/thread whenever a new thread is + * created. This function is not async-signal-safe. + */ +void ProfileHandlerRegisterThread(); + +/* + * Registers a callback routine. This callback function will be called in the + * context of SIGPROF handler, so must be async-signal-safe. The returned token + * is to be used when unregistering this callback via + * ProfileHandlerUnregisterCallback. Registering the first callback enables + * the SIGPROF signal handler. Caller must not free the returned token. This + * function is not async-signal-safe. + */ +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg); + +/* + * Unregisters a previously registered callback. Expects the token returned + * by the corresponding ProfileHandlerRegisterCallback and asserts that the + * passed token is valid. Unregistering the last callback disables the SIGPROF + * signal handler. It waits for the currently running callback to + * complete before returning. This function is not async-signal-safe. + */ +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token); + +/* + * FOR TESTING ONLY + * Unregisters all the callbacks, stops the timers (if shared) and disables the + * SIGPROF handler. All the threads, including the main thread, need to be + * re-registered after this call. This function is not async-signal-safe. + */ +void ProfileHandlerReset(); + +/* + * Stores profile handler's current state. This function is not + * async-signal-safe. + */ +struct ProfileHandlerState { + int32 frequency; /* Profiling frequency */ + int32 callback_count; /* Number of callbacks registered */ + int64 interrupts; /* Number of interrupts received */ +}; +void ProfileHandlerGetState(struct ProfileHandlerState* state); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* BASE_PROFILE_HANDLER_H_ */ diff --git a/src/profiledata.cc b/src/profiledata.cc index e622b28..873100e 100644 --- a/src/profiledata.cc +++ b/src/profiledata.cc @@ -190,13 +190,25 @@ void ProfileData::Stop() { // Dump "/proc/self/maps" so we get list of mapped shared libraries DumpProcSelfMaps(out_); - close(out_); + Reset(); fprintf(stderr, "PROFILE: interrupts/evictions/bytes = %d/%d/%" PRIuS "\n", count_, evictions_, total_bytes_); +} + +void ProfileData::Reset() { + if (!enabled()) { + return; + } + + // Don't reset count_, evictions_, or total_bytes_ here. They're used + // by Stop to print information about the profile after reset, and are + // cleared by Start when starting a new profile. + close(out_); delete[] hash_; hash_ = 0; delete[] evict_; evict_ = 0; + num_evicted_ = 0; free(fname_); fname_ = 0; start_time_ = 0; diff --git a/src/profiledata.h b/src/profiledata.h index 008c8a4..29bc1b7 100644 --- a/src/profiledata.h +++ b/src/profiledata.h @@ -60,11 +60,11 @@ // - 'Add' may be called from asynchronous signals, but is not // re-entrant. // -// - None of 'Start', 'Stop', 'Flush', and 'Add' may be called at the -// same time. +// - None of 'Start', 'Stop', 'Reset', 'Flush', and 'Add' may be +// called at the same time. // -// - 'Start' and 'Stop' should not be called while 'Enabled' or -// 'GetCurrent' are running, and vice versa. +// - 'Start', 'Stop', or 'Reset' should not be called while 'Enabled' +// or 'GetCurrent' are running, and vice versa. // // A profiler which uses asyncronous signals to add samples will // typically use two locks to protect this data structure: @@ -72,7 +72,7 @@ // - A SpinLock which is held over all calls except for the 'Add' // call made from the signal handler. // -// - A SpinLock which is held over calls to 'Start', 'Stop', +// - A SpinLock which is held over calls to 'Start', 'Stop', 'Reset', // 'Flush', and 'Add'. (This SpinLock should be acquired after // the first SpinLock in all cases where both are needed.) class ProfileData { @@ -118,6 +118,10 @@ class ProfileData { // data to disk. void Stop(); + // Stop data collection without writing anything else to disk, and + // discard any collected data. + void Reset(); + // If data collection is enabled, record a sample with 'depth' // entries from 'stack'. (depth must be > 0.) At most // kMaxStackDepth stack entries will be recorded, starting with diff --git a/src/profiler.cc b/src/profiler.cc index 8675348..c51c7b2 100644 --- a/src/profiler.cc +++ b/src/profiler.cc @@ -58,15 +58,13 @@ typedef int ucontext_t; // just to quiet the compiler, mostly #include "base/spinlock.h" #include "base/sysinfo.h" /* for GetUniquePathFromEnv, etc */ #include "profiledata.h" +#include "profile-handler.h" #ifdef HAVE_CONFLICT_SIGNAL_H #include "conflict-signal.h" /* used on msvc machines */ #endif using std::string; -DEFINE_string(cpu_profile, "", - "Profile file name (used if CPUPROFILE env var not specified)"); - // Collects up all profile data. This is a singleton, which is // initialized by a constructor at startup. class CpuProfiler { @@ -87,94 +85,40 @@ class CpuProfiler { void GetCurrentState(ProfilerState* state); - // Register the current thread with the profiler. This should be - // called only once per thread. - // - // The profiler attempts to determine whether or not timers are - // shared by all threads in the process. (With LinuxThreads, and - // with NPTL on some Linux kernel versions, each thread has separate - // timers.) - // - // On systems which have a separate interval timer for each thread, - // this function starts the timer for the current thread. Profiling - // is disabled by ignoring the resulting signals, and enabled by - // setting their handler to be prof_handler. - // - // Prior to determining whether timers are shared, this function - // will unconditionally start the timer. However, if this function - // determines that timers are shared, then it will stop the timer if - // profiling is not currently enabled. - void RegisterThread(); - static CpuProfiler instance_; private: - static const int kMaxFrequency = 4000; // Largest allowed frequency - static const int kDefaultFrequency = 100; // Default frequency - - // Sample frequency, read-only after construction. - int frequency_; - - // These locks implement the locking requirements described in the - // ProfileData documentation, specifically: - // - // control_lock_ is held all over all collector_ method calls except for - // the 'Add' call made from the signal handler, to protect against - // concurrent use of collector_'s control routines. + // This lock implements the locking requirements described in the ProfileData + // documentation, specifically: // - // signal_lock_ is held over calls to 'Start', 'Stop', 'Flush', and - // 'Add', to protect against concurrent use of data collection and - // writing routines. Code other than the signal handler must disable - // the timer signal while holding signal_lock, to prevent deadlock. - // - // Locking order is control_lock_ first, and then signal_lock_. - // signal_lock_ is acquired by the prof_handler without first - // acquiring control_lock_. - SpinLock control_lock_; - SpinLock signal_lock_; + // lock_ is held all over all collector_ method calls except for the 'Add' + // call made from the signal handler, to protect against concurrent use of + // collector_'s control routines. Code other than signal handler must + // unregister the signal handler before calling any collector_ method. + // 'Add' method in the collector is protected by a guarantee from + // ProfileHandle that only one instance of prof_handler can run at a time. + SpinLock lock_; ProfileData collector_; - // Filter function and its argument, if any. (NULL means include - // all samples). Set at start, read-only while running. Written - // while holding both control_lock_ and signal_lock_, read and - // executed under signal_lock_. + // Filter function and its argument, if any. (NULL means include all + // samples). Set at start, read-only while running. Written while holding + // lock_, read and executed in the context of SIGPROF interrupt. int (*filter_)(void*); void* filter_arg_; - // Whether or not the threading system provides interval timers - // that are shared by all threads in a process. - enum { - TIMERS_UNTOUCHED, // No timer initialization attempted yet. - TIMERS_ONE_SET, // First thread has registered and set timer. - TIMERS_SHARED, // Timers are shared by all threads. - TIMERS_SEPARATE // Timers are separate in each thread. - } timer_sharing_; - - // Start the interval timer used for profiling. If the thread - // library shares timers between threads, this is used to enable and - // disable the timer when starting and stopping profiling. If - // timers are not shared, this is used to enable the timer in each - // thread. - void StartTimer(); - - // Stop the interval timer used for profiling. Used only if the - // thread library shares timers between threads. - void StopTimer(); - - // Returns true if the profiling interval timer enabled in the - // current thread. This actually checks the kernel's interval timer - // setting. (It is used to detect whether timers are shared or - // separate.) - bool IsTimerRunning(); - - // Sets the timer interrupt signal handler to one that stores the pc. - static void EnableHandler(); - - // Disables (ignores) the timer interrupt signal. - static void DisableHandler(); - - // Signale handler that records the interrupted pc in the profile data - static void prof_handler(int sig, siginfo_t*, void* signal_ucontext); + // Opague token returned by the profile handler. To be used when calling + // ProfileHandlerUnregisterCallback. + ProfileHandlerToken* prof_handler_token_; + + // Sets up a callback to receive SIGPROF interrupt. + void EnableHandler(); + + // Disables receiving SIGPROF interrupt. + void DisableHandler(); + + // Signal handler that records the interrupted pc in the profile data. + static void prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler); }; // Profile data structure singleton: Constructor will check to see if @@ -184,25 +128,10 @@ CpuProfiler CpuProfiler::instance_; // Initialize profiling: activated if getenv("CPUPROFILE") exists. CpuProfiler::CpuProfiler() - : timer_sharing_(TIMERS_UNTOUCHED) { - // Get frequency of interrupts (if specified) - char junk; - const char* fr = getenv("CPUPROFILE_FREQUENCY"); - if (fr != NULL && (sscanf(fr, "%d%c", &frequency_, &junk) == 1) && - (frequency_ > 0)) { - // Limit to kMaxFrequency - frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_; - } else { - frequency_ = kDefaultFrequency; - } - - // Ignore signals until we decide to turn profiling on. (Paranoia; - // should already be ignored.) - DisableHandler(); - - RegisterThread(); - - // Should profiling be enabled automatically at start? + : prof_handler_token_(NULL) { + // TODO(cgd) Move this code *out* of the CpuProfile constructor into a + // separate object responsible for initialization. With ProfileHandler there + // is no need to limit the number of profilers. char fname[PATH_MAX]; if (!GetUniquePathFromEnv("CPUPROFILE", fname)) { return; @@ -219,41 +148,26 @@ CpuProfiler::CpuProfiler() } } -bool CpuProfiler::Start(const char* fname, - const ProfilerOptions* options) { - SpinLockHolder cl(&control_lock_); +bool CpuProfiler::Start(const char* fname, const ProfilerOptions* options) { + SpinLockHolder cl(&lock_); if (collector_.enabled()) { return false; } - { - // spin lock really is needed to protect init here, since it's - // conceivable that prof_handler may still be running from a - // previous profiler run. (For instance, if prof_handler just - // started, had not grabbed the spinlock, then was switched out, - // it might start again right now.) Any such late sample will be - // recorded against the new profile, but there's no harm in that. - SpinLockHolder sl(&signal_lock_); - - ProfileData::Options collector_options; - collector_options.set_frequency(frequency_); - if (!collector_.Start(fname, collector_options)) { - return false; - } - - filter_ = NULL; - if (options != NULL && options->filter_in_thread != NULL) { - filter_ = options->filter_in_thread; - filter_arg_ = options->filter_in_thread_arg; - } - - // Must unlock before setting prof_handler to avoid deadlock - // with signal delivered to this thread. + ProfileHandlerState prof_handler_state; + ProfileHandlerGetState(&prof_handler_state); + + ProfileData::Options collector_options; + collector_options.set_frequency(prof_handler_state.frequency); + if (!collector_.Start(fname, collector_options)) { + return false; } - if (timer_sharing_ == TIMERS_SHARED) { - StartTimer(); + filter_ = NULL; + if (options != NULL && options->filter_in_thread != NULL) { + filter_ = options->filter_in_thread; + filter_arg_ = options->filter_in_thread_arg; } // Setup handler for SIGPROF interrupts @@ -268,55 +182,48 @@ CpuProfiler::~CpuProfiler() { // Stop profiling and write out any collected profile data void CpuProfiler::Stop() { - SpinLockHolder cl(&control_lock_); + SpinLockHolder cl(&lock_); if (!collector_.enabled()) { return; } - // Ignore timer signals. Note that the handler may have just - // started and might not have taken signal_lock_ yet. Holding - // signal_lock_ below along with the semantics of collector_.Add() - // (which does nothing if collection is not enabled) prevents that - // late sample from causing a problem. + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // stopping the collector. DisableHandler(); - if (timer_sharing_ == TIMERS_SHARED) { - StopTimer(); - } - - { - SpinLockHolder sl(&signal_lock_); - collector_.Stop(); - } + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to stop the collector. + collector_.Stop(); } void CpuProfiler::FlushTable() { - SpinLockHolder cl(&control_lock_); + SpinLockHolder cl(&lock_); if (!collector_.enabled()) { return; } - // Disable timer signal while holding signal_lock_, to prevent deadlock - // if we take a timer signal while flushing. + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // flushing the profile data. DisableHandler(); - { - SpinLockHolder sl(&signal_lock_); - collector_.FlushTable(); - } + + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to flush the profile data. + collector_.FlushTable(); + EnableHandler(); } bool CpuProfiler::Enabled() { - SpinLockHolder cl(&control_lock_); + SpinLockHolder cl(&lock_); return collector_.enabled(); } void CpuProfiler::GetCurrentState(ProfilerState* state) { ProfileData::State collector_state; { - SpinLockHolder cl(&control_lock_); + SpinLockHolder cl(&lock_); collector_.GetCurrentState(&collector_state); } @@ -328,141 +235,56 @@ void CpuProfiler::GetCurrentState(ProfilerState* state) { state->profile_name[buf_size-1] = '\0'; } -void CpuProfiler::RegisterThread() { - SpinLockHolder cl(&control_lock_); - - // We try to detect whether timers are being shared by setting a - // timer in the first call to this function, then checking whether - // it's set in the second call. - // - // Note that this detection method requires that the first two calls - // to RegisterThread must be made from different threads. (Subsequent - // calls will see timer_sharing_ set to either TIMERS_SEPARATE or - // TIMERS_SHARED, and won't try to detect the timer sharing type.) - // - // Also note that if timer settings were inherited across new thread - // creation but *not* shared, this approach wouldn't work. That's - // not an issue for any Linux threading implementation, and should - // not be a problem for a POSIX-compliant threads implementation. - switch (timer_sharing_) { - case TIMERS_UNTOUCHED: - StartTimer(); - timer_sharing_ = TIMERS_ONE_SET; - break; - case TIMERS_ONE_SET: - // If the timer is running, that means that the main thread's - // timer setup is seen in this (second) thread -- and therefore - // that timers are shared. - if (IsTimerRunning()) { - timer_sharing_ = TIMERS_SHARED; - // If profiling has already been enabled, we have to keep the - // timer running. If not, we disable the timer here and - // re-enable it in start. - if (!collector_.enabled()) { - StopTimer(); - } - } else { - timer_sharing_ = TIMERS_SEPARATE; - StartTimer(); - } - break; - case TIMERS_SHARED: - // Nothing needed. - break; - case TIMERS_SEPARATE: - StartTimer(); - break; - } -} - -void CpuProfiler::StartTimer() { - // TODO: Randomize the initial interrupt value? - // TODO: Randomize the inter-interrupt period on every interrupt? - struct itimerval timer; - timer.it_interval.tv_sec = 0; - timer.it_interval.tv_usec = 1000000 / frequency_; - timer.it_value = timer.it_interval; - setitimer(ITIMER_PROF, &timer, 0); -} - -void CpuProfiler::StopTimer() { - struct itimerval timer; - memset(&timer, 0, sizeof timer); - setitimer(ITIMER_PROF, &timer, 0); -} - -bool CpuProfiler::IsTimerRunning() { - itimerval current_timer; - RAW_CHECK(0 == getitimer(ITIMER_PROF, ¤t_timer), "getitimer failed"); - return (current_timer.it_value.tv_sec != 0 || - current_timer.it_value.tv_usec != 0); -} - void CpuProfiler::EnableHandler() { - struct sigaction sa; - sa.sa_sigaction = prof_handler; - sa.sa_flags = SA_RESTART | SA_SIGINFO; - sigemptyset(&sa.sa_mask); - RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed"); + RAW_CHECK(prof_handler_token_ == NULL, "SIGPROF handler already registered"); + prof_handler_token_ = ProfileHandlerRegisterCallback(prof_handler, this); + RAW_CHECK(prof_handler_token_ != NULL, "Failed to set up SIGPROF handler"); } void CpuProfiler::DisableHandler() { - struct sigaction sa; - sa.sa_handler = SIG_IGN; - sa.sa_flags = SA_RESTART; - sigemptyset(&sa.sa_mask); - RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigaction failed"); + RAW_CHECK(prof_handler_token_ != NULL, "SIGPROF handler is not registered"); + ProfileHandlerUnregisterCallback(prof_handler_token_); + prof_handler_token_ = NULL; } -// Signal handler that records the pc in the profile-data structure -// -// NOTE: it is possible for profiling to be disabled just as this -// signal handler starts, before signal_lock_ is acquired. Therefore, -// collector_.Add must check whether profiling is enabled before -// trying to record any data. (See also comments in Start and Stop.) -void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext) { - int saved_errno = errno; - - // Hold the spin lock while we're gathering the trace because there's - // no real harm in holding it and there's little point in releasing - // and re-acquiring it. (We'll only be blocking Start, Stop, and - // Flush.) We make sure to release it before restoring errno. - { - SpinLockHolder sl(&instance_.signal_lock_); - - if (instance_.filter_ == NULL || - (*instance_.filter_)(instance_.filter_arg_)) { - void* stack[ProfileData::kMaxStackDepth]; - - // The top-most active routine doesn't show up as a normal - // frame, but as the "pc" value in the signal handler context. - stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext)); - - // We skip the top two stack trace entries (this function and one - // signal handler frame) since they are artifacts of profiling and - // should not be measured. Other profiling related frames may be - // removed by "pprof" at analysis time. Instead of skipping the top - // frames, we could skip nothing, but that would increase the - // profile size unnecessarily. - int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1, - 2, signal_ucontext); - depth++; // To account for pc value in stack[0]; - - instance_.collector_.Add(depth, stack); - } +// Signal handler that records the pc in the profile-data structure. We do no +// synchronization here. profile-handler.cc guarantees that at most one +// instance of prof_handler() will run at a time. All other routines that +// access the data touched by prof_handler() disable this signal handler before +// accessing the data and therefore cannot execute concurrently with +// prof_handler(). +void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler) { + CpuProfiler* instance = static_cast<CpuProfiler*>(cpu_profiler); + + if (instance->filter_ == NULL || + (*instance->filter_)(instance->filter_arg_)) { + void* stack[ProfileData::kMaxStackDepth]; + + // The top-most active routine doesn't show up as a normal + // frame, but as the "pc" value in the signal handler context. + stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext)); + + // We skip the top two stack trace entries (this function and one + // signal handler frame) since they are artifacts of profiling and + // should not be measured. Other profiling related frames may be + // removed by "pprof" at analysis time. Instead of skipping the top + // frames, we could skip nothing, but that would increase the + // profile size unnecessarily. + int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1, + 2, signal_ucontext); + depth++; // To account for pc value in stack[0]; + + instance->collector_.Add(depth, stack); } - - errno = saved_errno; } +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + extern "C" void ProfilerRegisterThread() { - CpuProfiler::instance_.RegisterThread(); + ProfileHandlerRegisterThread(); } -// DEPRECATED routines -extern "C" void ProfilerEnable() { } -extern "C" void ProfilerDisable() { } - extern "C" void ProfilerFlush() { CpuProfiler::instance_.FlushTable(); } @@ -488,9 +310,27 @@ extern "C" void ProfilerGetCurrentState(ProfilerState* state) { CpuProfiler::instance_.GetCurrentState(state); } +#else // OS_CYGWIN -REGISTER_MODULE_INITIALIZER(profiler, { - if (!FLAGS_cpu_profile.empty()) { - ProfilerStart(FLAGS_cpu_profile.c_str()); - } -}); +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +extern "C" void ProfilerRegisterThread() { } +extern "C" void ProfilerFlush() { } +extern "C" int ProfilingIsEnabledForAllThreads() { return 0; } +extern "C" int ProfilerStart(const char* fname) { return 0; } +extern "C" int ProfilerStartWithOptions(const char *fname, + const ProfilerOptions *options) { + return 0; +} +extern "C" void ProfilerStop() { } +extern "C" void ProfilerGetCurrentState(ProfilerState* state) { + memset(state, 0, sizeof(*state)); +} + +#endif // OS_CYGWIN + +// DEPRECATED routines +extern "C" void ProfilerEnable() { } +extern "C" void ProfilerDisable() { } diff --git a/src/sampler.cc b/src/sampler.cc index dda225c..a11b893 100755 --- a/src/sampler.cc +++ b/src/sampler.cc @@ -39,7 +39,7 @@ using std::min; -// Twice the approximate gap between sampling actions. +// The approximate gap in bytes between sampling actions. // I.e., we take one sample approximately once every // tcmalloc_sample_parameter bytes of allocation // i.e. about once every 512KB. @@ -59,9 +59,9 @@ namespace tcmalloc { // Statics for Sampler double Sampler::log_table_[1<<kFastlogNumBits]; -// Populate the lookup table for FastLog2 -// The approximates the log2 curve with a step function -// Steps have height equal to log2 of the mid-point of the step +// Populate the lookup table for FastLog2. +// This approximates the log2 curve with a step function. +// Steps have height equal to log2 of the mid-point of the step. void Sampler::PopulateFastLog2Table() { for (int i = 0; i < (1<<kFastlogNumBits); i++) { log_table_[i] = (log(1.0 + static_cast<double>(i+0.5)/(1<<kFastlogNumBits)) @@ -101,32 +101,30 @@ void Sampler::InitStatics() { // This is done by generating a random number between 0 and 1 and applying // the inverse cumulative distribution function for an exponential. // Specifically: Let m be the inverse of the sample period, then -// p = 1 - exp(mx) -// q = exp(mx) -// log_e(q) = mx -// log_e(q)/m = x -// log_2(q) / (log_e(2) / m) = x -// The value (log_e(2) / m) is precomputed -// and may also be approximated for large sampler periods by -// 1.0 / log2(1.0-1.0/(sample_period_)); -// In the code, q is actually in the range 1 to 2**26, hence the -26 +// the probability distribution function is m*exp(-mx) so the CDF is +// p = 1 - exp(-mx), so +// q = 1 - p = exp(-mx) +// log_e(q) = -mx +// -log_e(q)/m = x +// log_2(q) * (-log_e(2) * 1/m) = x +// In the code, q is actually in the range 1 to 2**26, hence the -26 below size_t Sampler::PickNextSamplingPoint() { - double sample_scaling = - log(2.0) * FLAGS_tcmalloc_sample_parameter; rnd_ = NextRandom(rnd_); // Take the top 26 bits as the random number - // (This plus the 1<<26 sampling bound give a max step possible of - // 1209424308 bytes.) + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) const uint64_t prng_mod_power = 48; // Number of bits in prng // The uint32_t cast is to prevent a (hard-to-reproduce) NAN // under piii debug for some binaries. double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0; - // Put the computed p-value through the CDF of a geometric + // Put the computed p-value through the CDF of a geometric. // For faster performance (save ~1/20th exec time), replace - // min(FastLog2(q) - 26,0) by (Fastlog2(q) - 26.000705) + // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) // The value 26.000705 is used rather than 26 to compensate // for inaccuracies in FastLog2 which otherwise result in a // negative answer. - return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * sample_scaling + 1); + return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * (-log(2.0) + * FLAGS_tcmalloc_sample_parameter) + 1); } } // namespace tcmalloc diff --git a/src/stacktrace_win32-inl.h b/src/stacktrace_win32-inl.h index a717714..26ae297 100644 --- a/src/stacktrace_win32-inl.h +++ b/src/stacktrace_win32-inl.h @@ -62,7 +62,7 @@ typedef USHORT NTAPI RtlCaptureStackBackTrace_Function( // to worry about someone else holding the loader's lock. static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn = (RtlCaptureStackBackTrace_Function*) - GetProcAddress(GetModuleHandle("ntdll.dll"), "RtlCaptureStackBackTrace"); + GetProcAddress(GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace"); int GetStackTrace(void** result, int max_depth, int skip_count) { if (!RtlCaptureStackBackTrace_fn) { diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h index 902806d..9f68a03 100644 --- a/src/stacktrace_x86-inl.h +++ b/src/stacktrace_x86-inl.h @@ -243,7 +243,7 @@ static void **NextStackFrame(void **old_sp, const void *uc) { // last two pages in the address space if ((uintptr_t)new_sp >= 0xffffe000) return NULL; #endif -#if !defined(_WIN32) +#ifdef HAVE_MMAP if (!STRICT_UNWINDING) { // Lax sanity checks cause a crash on AMD-based machines with // VDSO-enabled kernels. diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index ca88b91..e5022e3 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -131,6 +131,10 @@ #include "tcmalloc_guard.h" #include "thread_cache.h" +#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS) +# define WIN32_DO_PATCHING 1 +#endif + using tcmalloc::PageHeap; using tcmalloc::PageHeapAllocator; using tcmalloc::SizeMap; @@ -171,7 +175,8 @@ DEFINE_int64(tcmalloc_large_alloc_report_threshold, "messages. This bounds the amount of extra logging " "generated by this flag. Default value of this flag " "is very large and therefore you should see no extra " - "logging unless the flag is overridden."); + "logging unless the flag is overridden. Set to 0 to " + "disable reporting entirely."); // These routines are called by free(), realloc(), etc. if the pointer is // invalid. This is a cheap (source-editing required) kind of exception @@ -181,12 +186,6 @@ void InvalidFree(void* ptr) { CRASH("Attempt to free invalid pointer: %p\n", ptr); } -void* InvalidRealloc(void* old_ptr, size_t new_size) { - CRASH("Attempt to realloc invalid pointer: %p (realloc to %" PRIuS ")\n", - old_ptr, new_size); - return NULL; -} - size_t InvalidGetSizeForRealloc(void* old_ptr) { CRASH("Attempt to realloc invalid pointer: %p\n", old_ptr); return 0; @@ -492,7 +491,8 @@ TCMallocGuard::TCMallocGuard() { // Check whether the kernel also supports TLS (needs to happen at runtime) tcmalloc::CheckIfKernelSupportsTLS(); #endif -#ifdef _WIN32 // patch the windows VirtualAlloc, etc. +#ifdef WIN32_DO_PATCHING + // patch the windows VirtualAlloc, etc. PatchWindowsFunctions(); // defined in windows/patch_functions.cc #endif free(malloc(1)); @@ -512,7 +512,9 @@ TCMallocGuard::~TCMallocGuard() { } } } +#ifndef WIN32_OVERRIDE_ALLOCATORS static TCMallocGuard module_enter_exit_hook; +#endif //------------------------------------------------------------------- // Helpers for the exported routines below @@ -578,8 +580,8 @@ static void ReportLargeAlloc(Length num_pages, void* result) { static const int N = 1000; char buffer[N]; TCMalloc_Printer printer(buffer, N); - printer.printf("tcmalloc: large alloc %lld bytes == %p @ ", - static_cast<long long>(num_pages) << kPageShift, + printer.printf("tcmalloc: large alloc %llu bytes == %p @ ", + static_cast<unsigned long long>(num_pages) << kPageShift, result); for (int i = 0; i < stack.depth; i++) { printer.printf(" %p", stack.stack[i]); @@ -598,7 +600,7 @@ inline void* do_malloc_pages(Length num_pages) { SpinLockHolder h(Static::pageheap_lock()); span = Static::pageheap()->New(num_pages); const int64 threshold = large_alloc_threshold; - if (num_pages >= (threshold >> kPageShift)) { + if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { // Increase the threshold by 1/8 every time we generate a report. // We cap the threshold at 8GB to avoid overflow problems. large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 @@ -716,7 +718,7 @@ inline size_t GetSizeWithCallback(void* ptr, return Static::sizemap()->ByteSizeForClass(cl); } else { Span *span = Static::pageheap()->GetDescriptor(p); - if (span == NULL) { // means we do now own this memory + if (span == NULL) { // means we do not own this memory return (*invalid_getsize_fn)(ptr); } else if (span->sizeclass != 0) { Static::pageheap()->CacheSizeClass(p, span->sizeclass); @@ -729,12 +731,12 @@ inline size_t GetSizeWithCallback(void* ptr, // This lets you call back to a given function pointer if ptr is invalid. // It is used primarily by windows code which wants a specialized callback. -inline void* do_realloc_with_callback(void* old_ptr, size_t new_size, - void* (*invalid_realloc_fn)(void*, - size_t)) { +inline void* do_realloc_with_callback( + void* old_ptr, size_t new_size, + void (*invalid_free_fn)(void*), + size_t (*invalid_get_size_fn)(void*)) { // Get the size of the old entry - const size_t old_size = GetSizeWithCallback(old_ptr, - &InvalidGetSizeForRealloc); + const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); // Reallocate if the new size is larger than the old size, // or if the new size is significantly smaller than the old size. @@ -764,7 +766,7 @@ inline void* do_realloc_with_callback(void* old_ptr, size_t new_size, // We could use a variant of do_free() that leverages the fact // that we already know the sizeclass of old_ptr. The benefit // would be small, so don't bother. - do_free(old_ptr); + do_free_with_callback(old_ptr, invalid_free_fn); return new_ptr; } else { // We still need to call hooks to report the updated size: @@ -775,7 +777,8 @@ inline void* do_realloc_with_callback(void* old_ptr, size_t new_size, } inline void* do_realloc(void* old_ptr, size_t new_size) { - return do_realloc_with_callback(old_ptr, new_size, &InvalidRealloc); + return do_realloc_with_callback(old_ptr, new_size, + &InvalidFree, &InvalidGetSizeForRealloc); } // For use by exported routines below that want specific alignments @@ -893,7 +896,7 @@ static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED); inline void* cpp_alloc(size_t size, bool nothrow) { for (;;) { void* p = do_malloc(size); -#ifdef PREANSINEW +#if defined(PREANSINEW) || (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) return p; #else if (p == NULL) { // allocation failed @@ -939,7 +942,7 @@ size_t TCMallocImplementation::GetAllocatedSize(void* ptr) { // Exported routines //------------------------------------------------------------------- -#ifndef _WIN32 // windows doesn't allow overriding; use the do_* fns instead +#ifndef WIN32_DO_PATCHING // CAVEAT: The code structure below ensures that MallocHook methods are always // called from the stack frame of the invoked allocation function. @@ -1198,4 +1201,4 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) } void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; -#endif // #ifndef _WIN32 +#endif // #ifndef WIN32_DO_PATCHING diff --git a/src/tests/frag_unittest.cc b/src/tests/frag_unittest.cc index c257c7d..08494b4 100644 --- a/src/tests/frag_unittest.cc +++ b/src/tests/frag_unittest.cc @@ -35,10 +35,13 @@ #include "config_for_unittests.h" #include <stdlib.h> #include <stdio.h> -#ifndef _WIN32 +#ifdef HAVE_SYS_RESOURCE_H #include <sys/time.h> // for struct timeval #include <sys/resource.h> // for getrusage #endif +#ifdef _WIN32 +#include <windows.h> // for GetTickCount() +#endif #include <vector> #include "base/logging.h" #include <google/malloc_extension.h> @@ -80,12 +83,14 @@ int main(int argc, char** argv) { // Now do timing tests for (int i = 0; i < 5; i++) { static const int kIterations = 100000; -#ifdef _WIN32 - long long int tv_start = GetTickCount(); -#else +#ifdef HAVE_SYS_RESOURCE_H struct rusage r; getrusage(RUSAGE_SELF, &r); // figure out user-time spent on this struct timeval tv_start = r.ru_utime; +#elif defined(_WIN32) + long long int tv_start = GetTickCount(); +#else +# error No way to calculate time on your system #endif for (int i = 0; i < kIterations; i++) { @@ -94,16 +99,18 @@ int main(int argc, char** argv) { &s); } -#ifdef _WIN32 +#ifdef HAVE_SYS_RESOURCE_H + getrusage(RUSAGE_SELF, &r); + struct timeval tv_end = r.ru_utime; + int64 sumsec = static_cast<int64>(tv_end.tv_sec) - tv_start.tv_sec; + int64 sumusec = static_cast<int64>(tv_end.tv_usec) - tv_start.tv_usec; +#elif defined(_WIN32) long long int tv_end = GetTickCount(); int64 sumsec = (tv_end - tv_start) / 1000; // Resolution in windows is only to the millisecond, alas int64 sumusec = ((tv_end - tv_start) % 1000) * 1000; #else - getrusage(RUSAGE_SELF, &r); - struct timeval tv_end = r.ru_utime; - int64 sumsec = static_cast<int64>(tv_end.tv_sec) - tv_start.tv_sec; - int64 sumusec = static_cast<int64>(tv_end.tv_usec) - tv_start.tv_usec; +# error No way to calculate time on your system #endif fprintf(stderr, "getproperty: %6.1f ns/call\n", (sumsec * 1e9 + sumusec * 1e3) / kIterations); diff --git a/src/tests/profile-handler_unittest.cc b/src/tests/profile-handler_unittest.cc new file mode 100644 index 0000000..d780aac --- /dev/null +++ b/src/tests/profile-handler_unittest.cc @@ -0,0 +1,445 @@ +// Copyright 2009 Google Inc. All Rights Reserved. +// Author: Nabeel Mian (nabeelmian@google.com) +// Chris Demetriou (cgd@google.com) +// +// This file contains the unit tests for profile-handler.h interface. + +#include "config.h" +#include "profile-handler.h" + +#include <assert.h> +#include <sys/time.h> +#include <pthread.h> +#include "base/logging.h" +#include "base/simple_mutex.h" + +// Some helpful macros for the test class +#define EXPECT_TRUE(cond) CHECK(cond) +#define EXPECT_FALSE(cond) CHECK(!(cond)) +#define EXPECT_EQ(a, b) CHECK_EQ(a, b) +#define EXPECT_NE(a, b) CHECK_NE(a, b) +#define EXPECT_GT(a, b) CHECK_GT(a, b) +#define EXPECT_LT(a, b) CHECK_LT(a, b) +#define EXPECT_GE(a, b) CHECK_GE(a, b) +#define EXPECT_LE(a, b) CHECK_LE(a, b) +#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0) +#define TEST_F(cls, fn) void cls :: fn() + +namespace { + +// TODO(csilvers): error-checking on the pthreads routines +class Thread { + public: + Thread() : joinable_(false) { } + void SetJoinable(bool value) { joinable_ = value; } + void Start() { + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, joinable_ ? PTHREAD_CREATE_JOINABLE + : PTHREAD_CREATE_DETACHED); + pthread_create(&thread_, &attr, &DoRun, this); + pthread_attr_destroy(&attr); + } + void Join() { + assert(joinable_); + pthread_join(thread_, NULL); + } + virtual void Run() = 0; + private: + static void* DoRun(void* cls) { + ProfileHandlerRegisterThread(); + reinterpret_cast<Thread*>(cls)->Run(); + return NULL; + } + pthread_t thread_; + bool joinable_; +}; + +// Sleep interval in usecs. To ensure a SIGPROF timer interrupt under heavy +// load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz) +// TODO(nabeelmian) Under very heavy loads, the worker thread may not accumulate +// enough cpu usage to get a profile tick. +int kSleepInterval = 200000; + +// Whether each thread has separate timers. +static bool timer_separate_ = false; + +// Checks whether the profile timer is enabled for the current thread. +bool IsTimerEnabled() { + itimerval current_timer; + EXPECT_EQ(0, getitimer(ITIMER_PROF, ¤t_timer)); + return (current_timer.it_value.tv_sec != 0 || + current_timer.it_value.tv_usec != 0); +} + +class VirtualTimerGetterThread : public Thread { + public: + VirtualTimerGetterThread() { + memset(&virtual_timer_, 0, sizeof virtual_timer_); + } + struct itimerval virtual_timer_; + + private: + void Run() { + CHECK_EQ(0, getitimer(ITIMER_VIRTUAL, &virtual_timer_)); + } +}; + +// This function checks whether the timers are shared between thread. This +// function spawns a thread, so use it carefully when testing thread-dependent +// behaviour. +static bool threads_have_separate_timers() { + struct itimerval new_timer_val; + + // Enable the virtual timer in the current thread. + memset(&new_timer_val, 0, sizeof new_timer_val); + new_timer_val.it_value.tv_sec = 1000000; // seconds + CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL)); + + // Spawn a thread, get the virtual timer's value there. + VirtualTimerGetterThread thread; + thread.SetJoinable(true); + thread.Start(); + thread.Join(); + + // Disable timer here. + memset(&new_timer_val, 0, sizeof new_timer_val); + CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL)); + + bool target_timer_enabled = (thread.virtual_timer_.it_value.tv_sec != 0 || + thread.virtual_timer_.it_value.tv_usec != 0); + if (!target_timer_enabled) { + LOG(INFO, "threads have separate timers"); + return true; + } else { + LOG(INFO, "threads have shared timers"); + return false; + } +} + +// Dummy worker thread to accumulate cpu time. +class BusyThread : public Thread { + public: + BusyThread() : stop_work_(false) { + } + + // Setter/Getters + bool stop_work() { + MutexLock lock(&mu_); + return stop_work_; + } + void set_stop_work(bool stop_work) { + MutexLock lock(&mu_); + stop_work_ = stop_work; + } + + private: + // Protects stop_work_ below. + Mutex mu_; + // Whether to stop work? + bool stop_work_; + + // Do work until asked to stop. + void Run() { + while (!stop_work()) { + } + // If timers are separate, check that timer is enabled for this thread. + EXPECT_TRUE(!timer_separate_ || IsTimerEnabled()); + } +}; + +class NullThread : public Thread { + private: + void Run() { + // If timers are separate, check that timer is enabled for this thread. + EXPECT_TRUE(!timer_separate_ || IsTimerEnabled()); + } +}; + +// Signal handler which tracks the profile timer ticks. +static void TickCounter(int sig, siginfo_t* sig_info, void *vuc, + void* tick_counter) { + int* counter = static_cast<int*>(tick_counter); + ++(*counter); +} + +// This class tests the profile-handler.h interface. +class ProfileHandlerTest { + protected: + + // Determines whether threads have separate timers. + static void SetUpTestCase() { + timer_separate_ = threads_have_separate_timers(); + } + + // Sets up the profile timers and SIGPROF handler in a known state. It does + // the following: + // 1. Unregisters all the callbacks, stops the timer (if shared) and + // clears out timer_sharing state in the ProfileHandler. This clears + // out any state left behind by the previous test or during module + // initialization when the test program was started. + // 2. Spawns two threads which will be registered with the ProfileHandler. + // At this time ProfileHandler knows if the timers are shared. + // 3. Starts a busy worker thread to accumulate CPU usage. + virtual void SetUp() { + // Reset the state of ProfileHandler between each test. This unregisters + // all callbacks, stops timer (if shared) and clears timer sharing state. + ProfileHandlerReset(); + EXPECT_EQ(GetCallbackCount(), 0); + VerifyDisabled(); + // ProfileHandler requires at least two threads to be registerd to determine + // whether timers are shared. + RegisterThread(); + RegisterThread(); + // Now that two threads are started, verify that the signal handler is + // disabled and the timers are correctly enabled/disabled. + VerifyDisabled(); + // Start worker to accumulate cpu usage. + StartWorker(); + } + + virtual void TearDown() { + ProfileHandlerReset(); + // Stops the worker thread. + StopWorker(); + } + + // Starts a no-op thread that gets registered with the ProfileHandler. Waits + // for the thread to stop. + void RegisterThread() { + NullThread t; + t.SetJoinable(true); + t.Start(); + t.Join(); + } + + // Starts a busy worker thread to accumulate cpu time. There should be only + // one busy worker running. This is required for the case where there are + // separate timers for each thread. + void StartWorker() { + busy_worker_ = new BusyThread(); + busy_worker_->SetJoinable(true); + busy_worker_->Start(); + // Wait for worker to start up and register with the ProfileHandler. + // TODO(nabeelmian) This may not work under very heavy load. + usleep(kSleepInterval); + } + + // Stops the worker thread. + void StopWorker() { + busy_worker_->set_stop_work(true); + busy_worker_->Join(); + delete busy_worker_; + } + + // Checks whether SIGPROF signal handler is enabled. + bool IsSignalEnabled() { + struct sigaction sa; + CHECK_EQ(sigaction(SIGPROF, NULL, &sa), 0); + return ((sa.sa_handler == SIG_IGN) || (sa.sa_handler == SIG_DFL)) ? + false : true; + } + + // Gets the number of callbacks registered with the ProfileHandler. + uint32 GetCallbackCount() { + ProfileHandlerState state; + ProfileHandlerGetState(&state); + return state.callback_count; + } + + // Gets the current ProfileHandler interrupt count. + uint64 GetInterruptCount() { + ProfileHandlerState state; + ProfileHandlerGetState(&state); + return state.interrupts; + } + + // Verifies that a callback is correctly registered and receiving + // profile ticks. + void VerifyRegistration(const int& tick_counter) { + // Check the callback count. + EXPECT_GT(GetCallbackCount(), 0); + // Check that the profile timer is enabled. + EXPECT_TRUE(IsTimerEnabled()); + // Check that the signal handler is enabled. + EXPECT_TRUE(IsSignalEnabled()); + uint64 interrupts_before = GetInterruptCount(); + // Sleep for a bit and check that tick counter is making progress. + int old_tick_count = tick_counter; + usleep(kSleepInterval); + int new_tick_count = tick_counter; + EXPECT_GT(new_tick_count, old_tick_count); + uint64 interrupts_after = GetInterruptCount(); + EXPECT_GT(interrupts_after, interrupts_before); + } + + // Verifies that a callback is not receiving profile ticks. + void VerifyUnregistration(const int& tick_counter) { + // Sleep for a bit and check that tick counter is not making progress. + int old_tick_count = tick_counter; + usleep(kSleepInterval); + int new_tick_count = tick_counter; + EXPECT_EQ(new_tick_count, old_tick_count); + // If no callbacks, signal handler and shared timer should be disabled. + if (GetCallbackCount() == 0) { + EXPECT_FALSE(IsSignalEnabled()); + if (timer_separate_) { + EXPECT_TRUE(IsTimerEnabled()); + } else { + EXPECT_FALSE(IsTimerEnabled()); + } + } + } + + // Verifies that the SIGPROF interrupt handler is disabled and the timer, + // if shared, is disabled. Expects the worker to be running. + void VerifyDisabled() { + // Check that the signal handler is disabled. + EXPECT_FALSE(IsSignalEnabled()); + // Check that the callback count is 0. + EXPECT_EQ(GetCallbackCount(), 0); + // Check that the timer is disabled if shared, enabled otherwise. + if (timer_separate_) { + EXPECT_TRUE(IsTimerEnabled()); + } else { + EXPECT_FALSE(IsTimerEnabled()); + } + // Verify that the ProfileHandler is not accumulating profile ticks. + uint64 interrupts_before = GetInterruptCount(); + usleep(kSleepInterval); + uint64 interrupts_after = GetInterruptCount(); + EXPECT_EQ(interrupts_after, interrupts_before); + } + + // Busy worker thread to accumulate cpu usage. + BusyThread* busy_worker_; + + private: + // The tests to run + void RegisterUnregisterCallback(); + void MultipleCallbacks(); + void Reset(); + void RegisterCallbackBeforeThread(); + + public: +#define RUN(test) do { \ + printf("Running %s\n", #test); \ + ProfileHandlerTest pht; \ + pht.SetUp(); \ + pht.test(); \ + pht.TearDown(); \ +} while (0) + + static int RUN_ALL_TESTS() { + SetUpTestCase(); + RUN(RegisterUnregisterCallback); + RUN(MultipleCallbacks); + RUN(Reset); + RUN(RegisterCallbackBeforeThread); + printf("Done\n"); + return 0; + } +}; + +// Verifies ProfileHandlerRegisterCallback and +// ProfileHandlerUnregisterCallback. +TEST_F(ProfileHandlerTest, RegisterUnregisterCallback) { + int tick_count = 0; + ProfileHandlerToken* token = ProfileHandlerRegisterCallback( + TickCounter, &tick_count); + VerifyRegistration(tick_count); + ProfileHandlerUnregisterCallback(token); + VerifyUnregistration(tick_count); +} + +// Verifies that multiple callbacks can be registered. +TEST_F(ProfileHandlerTest, MultipleCallbacks) { + // Register first callback. + int first_tick_count; + ProfileHandlerToken* token1 = ProfileHandlerRegisterCallback( + TickCounter, &first_tick_count); + // Check that callback was registered correctly. + VerifyRegistration(first_tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + + // Register second callback. + int second_tick_count; + ProfileHandlerToken* token2 = ProfileHandlerRegisterCallback( + TickCounter, &second_tick_count); + // Check that callback was registered correctly. + VerifyRegistration(second_tick_count); + EXPECT_EQ(GetCallbackCount(), 2); + + // Unregister first callback. + ProfileHandlerUnregisterCallback(token1); + VerifyUnregistration(first_tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + // Verify that second callback is still registered. + VerifyRegistration(second_tick_count); + + // Unregister second callback. + ProfileHandlerUnregisterCallback(token2); + VerifyUnregistration(second_tick_count); + EXPECT_EQ(GetCallbackCount(), 0); + + // Verify that the signal handler and timers are correctly disabled. + VerifyDisabled(); +} + +// Verifies ProfileHandlerReset +TEST_F(ProfileHandlerTest, Reset) { + // Verify that the profile timer interrupt is disabled. + VerifyDisabled(); + int first_tick_count; + ProfileHandlerRegisterCallback(TickCounter, &first_tick_count); + VerifyRegistration(first_tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + + // Register second callback. + int second_tick_count; + ProfileHandlerRegisterCallback(TickCounter, &second_tick_count); + VerifyRegistration(second_tick_count); + EXPECT_EQ(GetCallbackCount(), 2); + + // Reset the profile handler and verify that callback were correctly + // unregistered and timer/signal are disabled. + ProfileHandlerReset(); + VerifyUnregistration(first_tick_count); + VerifyUnregistration(second_tick_count); + VerifyDisabled(); +} + +// Verifies that ProfileHandler correctly handles a case where a callback was +// registered before the second thread started. +TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) { + // Stop the worker. + StopWorker(); + // Unregister all existing callbacks, stop the timer (if shared), disable + // the signal handler and reset the timer sharing state in the Profile + // Handler. + ProfileHandlerReset(); + EXPECT_EQ(GetCallbackCount(), 0); + VerifyDisabled(); + + // Start the worker. At this time ProfileHandler doesn't know if timers are + // shared as only one thread has registered so far. + StartWorker(); + // Register a callback and check that profile ticks are being delivered. + int tick_count; + ProfileHandlerRegisterCallback(TickCounter, &tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + VerifyRegistration(tick_count); + + // Register a second thread and verify that timer and signal handler are + // correctly enabled. + RegisterThread(); + EXPECT_EQ(GetCallbackCount(), 1); + EXPECT_TRUE(IsTimerEnabled()); + EXPECT_TRUE(IsSignalEnabled()); +} + +} // namespace + +int main(int argc, char** argv) { + return ProfileHandlerTest::RUN_ALL_TESTS(); +} diff --git a/src/tests/profiledata_unittest.cc b/src/tests/profiledata_unittest.cc index 679b9e2..31ba3b6 100644 --- a/src/tests/profiledata_unittest.cc +++ b/src/tests/profiledata_unittest.cc @@ -54,6 +54,7 @@ using std::string; #define EXPECT_TRUE(cond) CHECK(cond) #define EXPECT_FALSE(cond) CHECK(!(cond)) #define EXPECT_EQ(a, b) CHECK_EQ(a, b) +#define EXPECT_NE(a, b) CHECK_NE(a, b) #define EXPECT_GT(a, b) CHECK_GT(a, b) #define EXPECT_LT(a, b) CHECK_LT(a, b) #define EXPECT_GE(a, b) CHECK_GE(a, b) @@ -64,9 +65,41 @@ using std::string; namespace { +template<typename T> class scoped_array { + public: + scoped_array(T* data) : data_(data) { } + ~scoped_array() { delete[] data_; } + T* get() { return data_; } + T& operator[](int i) { return data_[i]; } + private: + T* const data_; +}; + // Re-runs fn until it doesn't cause EINTR. #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) +// Read up to "count" bytes from file descriptor "fd" into the buffer +// starting at "buf" while handling short reads and EINTR. On +// success, return the number of bytes read. Otherwise, return -1. +static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) { + CHECK_GE(fd, 0); + char *buf0 = reinterpret_cast<char *>(buf); + ssize_t num_bytes = 0; + while (num_bytes < count) { + ssize_t len; + NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes)); + if (len < 0) { // There was an error other than EINTR. + return -1; + } + if (len == 0) { // Reached EOF. + break; + } + num_bytes += len; + } + CHECK(num_bytes <= count); + return num_bytes; +} + // Thin wrapper around a file descriptor so that the file descriptor // gets closed for sure. struct FileDescriptor { @@ -87,6 +120,9 @@ typedef uintptr_t ProfileDataSlot; // sample. inline void* V(intptr_t x) { return reinterpret_cast<void*>(x); } +// String returned by ProfileDataChecker helper functions to indicate success. +const char kNoError[] = ""; + class ProfileDataChecker { public: ProfileDataChecker() { @@ -99,35 +135,194 @@ class ProfileDataChecker { string filename() const { return filename_; } - void Check(const ProfileDataSlot* slots, int num_slots) { - CheckWithSkips(slots, num_slots, NULL, 0); + // Checks the first 'num_slots' profile data slots in the file + // against the data pointed to by 'slots'. Returns kNoError if the + // data matched, otherwise returns an indication of the cause of the + // mismatch. + string Check(const ProfileDataSlot* slots, int num_slots) { + return CheckWithSkips(slots, num_slots, NULL, 0); } - void CheckWithSkips(const ProfileDataSlot* slots, int num_slots, - const int* skips, int num_skips) { - FileDescriptor fd(open(filename_.c_str(), O_RDONLY)); - CHECK_GE(fd.get(), 0); - - ProfileDataSlot* filedata = new ProfileDataSlot[num_slots]; - size_t expected_bytes = num_slots * sizeof filedata[0]; - ssize_t bytes_read = read(fd.get(), filedata, expected_bytes); - CHECK_EQ(expected_bytes, bytes_read); - - for (int i = 0; i < num_slots; i++) { - if (num_skips > 0 && *skips == i) { - num_skips--; - skips++; - continue; - } - CHECK_EQ(slots[i], filedata[i]); // "first mismatch at slot " << i; - } - delete[] filedata; - } + // Checks the first 'num_slots' profile data slots in the file + // against the data pointed to by 'slots', skipping over entries + // described by 'skips' and 'num_skips'. + // + // 'skips' must be a sorted list of (0-based) slot numbers to be + // skipped, of length 'num_skips'. Note that 'num_slots' includes + // any skipped slots, i.e., the first 'num_slots' profile data slots + // will be considered, but some may be skipped. + // + // Returns kNoError if the data matched, otherwise returns an + // indication of the cause of the mismatch. + string CheckWithSkips(const ProfileDataSlot* slots, int num_slots, + const int* skips, int num_skips); + + // Validate that a profile is correctly formed. The profile is + // assumed to have been created by the same kind of binary (e.g., + // same slot size, same endian, etc.) as is validating the profile. + // + // Returns kNoError if the profile appears valid, otherwise returns + // an indication of the problem with the profile. + string ValidateProfile(); private: string filename_; }; +string ProfileDataChecker::CheckWithSkips(const ProfileDataSlot* slots, + int num_slots, const int* skips, + int num_skips) { + FileDescriptor fd(open(filename_.c_str(), O_RDONLY)); + if (fd.get() < 0) + return "file open error"; + + scoped_array<ProfileDataSlot> filedata(new ProfileDataSlot[num_slots]); + size_t expected_bytes = num_slots * sizeof filedata[0]; + ssize_t bytes_read = ReadPersistent(fd.get(), filedata.get(), expected_bytes); + if (expected_bytes != bytes_read) + return "file too small"; + + for (int i = 0; i < num_slots; i++) { + if (num_skips > 0 && *skips == i) { + num_skips--; + skips++; + continue; + } + if (slots[i] != filedata[i]) + return "data mismatch"; + } + return kNoError; +} + +string ProfileDataChecker::ValidateProfile() { + FileDescriptor fd(open(filename_.c_str(), O_RDONLY)); + if (fd.get() < 0) + return "file open error"; + + struct stat statbuf; + if (fstat(fd.get(), &statbuf) != 0) + return "fstat error"; + if (statbuf.st_size != static_cast<ssize_t>(statbuf.st_size)) + return "file impossibly large"; + ssize_t filesize = statbuf.st_size; + + scoped_array<char> filedata(new char[filesize]); + if (ReadPersistent(fd.get(), filedata.get(), filesize) != filesize) + return "read of whole file failed"; + + // Must have enough data for the header and the trailer. + if (filesize < (5 + 3) * sizeof(ProfileDataSlot)) + return "not enough data in profile for header + trailer"; + + // Check the header + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[0] != 0) + return "error in header: non-zero count"; + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[1] != 3) + return "error in header: num_slots != 3"; + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[2] != 0) + return "error in header: non-zero format version"; + // Period (slot 3) can have any value. + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[4] != 0) + return "error in header: non-zero padding value"; + ssize_t cur_offset = 5 * sizeof(ProfileDataSlot); + + // While there are samples, skip them. Each sample consists of + // at least three slots. + bool seen_trailer = false; + while (!seen_trailer) { + if (cur_offset > filesize - 3 * sizeof(ProfileDataSlot)) + return "truncated sample header"; + ProfileDataSlot* sample = + reinterpret_cast<ProfileDataSlot*>(filedata.get() + cur_offset); + ProfileDataSlot slots_this_sample = 2 + sample[1]; + ssize_t size_this_sample = slots_this_sample * sizeof(ProfileDataSlot); + if (cur_offset > filesize - size_this_sample) + return "truncated sample"; + + if (sample[0] == 0 && sample[1] == 1 && sample[2] == 0) { + seen_trailer = true; + } else { + if (sample[0] < 1) + return "error in sample: sample count < 1"; + if (sample[1] < 1) + return "error in sample: num_pcs < 1"; + for (int i = 2; i < slots_this_sample; i++) { + if (sample[i] == 0) + return "error in sample: NULL PC"; + } + } + cur_offset += size_this_sample; + } + + // There must be at least one line in the (text) list of mapped objects, + // and it must be terminated by a newline. Note, the use of newline + // here and below Might not be reasonable on non-UNIX systems. + if (cur_offset >= filesize) + return "no list of mapped objects"; + if (filedata[filesize - 1] != '\n') + return "profile did not end with a complete line"; + + while (cur_offset < filesize) { + char* line_start = filedata.get() + cur_offset; + + // Find the end of the line, and replace it with a NUL for easier + // scanning. + char* line_end = strchr(line_start, '\n'); + *line_end = '\0'; + + // Advance past any leading space. It's allowed in some lines, + // but not in others. + bool has_leading_space = false; + char* line_cur = line_start; + while (*line_cur == ' ') { + has_leading_space = true; + line_cur++; + } + + bool found_match = false; + + // Check for build lines. + if (!found_match) { + found_match = (strncmp(line_cur, "build=", 6) == 0); + // Anything may follow "build=", and leading space is allowed. + } + + // A line from ProcMapsIterator::FormatLine, of the form: + // + // 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + // + // Leading space is not allowed. The filename may be omitted or + // may consist of multiple words, so we scan only up to the + // space before the filename. + if (!found_match) { + int chars_scanned = -1; + sscanf(line_cur, "%*x-%*x %*c%*c%*c%*c %*x %*x:%*x %*d %n", + &chars_scanned); + found_match = (chars_scanned > 0 && !has_leading_space); + } + + // A line from DumpAddressMap, of the form: + // + // 40000000-40015000: /lib/ld-2.3.2.so + // + // Leading space is allowed. The filename may be omitted or may + // consist of multiple words, so we scan only up to the space + // before the filename. + if (!found_match) { + int chars_scanned = -1; + sscanf(line_cur, "%*x-%*x: %n", &chars_scanned); + found_match = (chars_scanned > 0); + } + + if (!found_match) + return "unrecognized line in text section"; + + cur_offset += (line_end - line_start) + 1; + } + + return kNoError; +} + class ProfileDataTest { protected: void ExpectStopped() { @@ -162,6 +357,7 @@ class ProfileDataTest { void CollectOne(); void CollectTwoMatching(); void CollectTwoFlush(); + void StartResetRestart(); public: #define RUN(test) do { \ @@ -178,6 +374,7 @@ class ProfileDataTest { RUN(CollectOne); RUN(CollectTwoMatching); RUN(CollectTwoFlush); + RUN(StartResetRestart); return 0; } }; @@ -226,7 +423,8 @@ TEST_F(ProfileDataTest, StartStopEmpty) { ExpectRunningSamples(0); collector_.Stop(); ExpectStopped(); - checker_.Check(slots, arraysize(slots)); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); } // Start and Stop with no options, collecting no samples. Verify @@ -246,8 +444,10 @@ TEST_F(ProfileDataTest, StartStopNoOptionsEmpty) { ExpectRunningSamples(0); collector_.Stop(); ExpectStopped(); - checker_.CheckWithSkips(slots, arraysize(slots), - slots_to_skip, arraysize(slots_to_skip)); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.CheckWithSkips(slots, arraysize(slots), + slots_to_skip, + arraysize(slots_to_skip))); } // Start after already started. Should return false and not impact @@ -275,7 +475,8 @@ TEST_F(ProfileDataTest, StartWhenStarted) { collector_.Stop(); ExpectStopped(); - checker_.Check(slots, arraysize(slots)); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); } // Like StartStopEmpty, but uses a different file name and frequency. @@ -293,7 +494,8 @@ TEST_F(ProfileDataTest, StartStopEmpty2) { ExpectRunningSamples(0); collector_.Stop(); ExpectStopped(); - checker_.Check(slots, arraysize(slots)); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); } TEST_F(ProfileDataTest, CollectOne) { @@ -316,7 +518,8 @@ TEST_F(ProfileDataTest, CollectOne) { collector_.Stop(); ExpectStopped(); - checker_.Check(slots, arraysize(slots)); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); } TEST_F(ProfileDataTest, CollectTwoMatching) { @@ -341,7 +544,8 @@ TEST_F(ProfileDataTest, CollectTwoMatching) { collector_.Stop(); ExpectStopped(); - checker_.Check(slots, arraysize(slots)); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); } TEST_F(ProfileDataTest, CollectTwoFlush) { @@ -370,7 +574,41 @@ TEST_F(ProfileDataTest, CollectTwoFlush) { collector_.Stop(); ExpectStopped(); - checker_.Check(slots, arraysize(slots)); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +// Start then reset, verify that the result is *not* a valid profile. +// Then start again and make sure the result is OK. +TEST_F(ProfileDataTest, StartResetRestart) { + ExpectStopped(); + ProfileData::Options options; + options.set_frequency(1); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + collector_.Reset(); + ExpectStopped(); + // We expect the resulting file to be empty. This is a minimal test + // of ValidateProfile. + EXPECT_NE(kNoError, checker_.ValidateProfile()); + + struct stat statbuf; + EXPECT_EQ(0, stat(checker_.filename().c_str(), &statbuf)); + EXPECT_EQ(0, statbuf.st_size); + + const int frequency = 2; // Different frequency than used above. + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 0, 1, 0 // binary trailer + }; + + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); } } // namespace diff --git a/src/tests/sampling_test.cc b/src/tests/sampling_test.cc index 6845574..b75e70e 100644 --- a/src/tests/sampling_test.cc +++ b/src/tests/sampling_test.cc @@ -64,7 +64,7 @@ int main(int argc, char** argv) { fprintf(stderr, "USAGE: %s <base of output files>\n", argv[0]); exit(1); } - for (int i = 0; i < 9000; i++) { + for (int i = 0; i < 8000; i++) { AllocateAllocate(); } diff --git a/src/tests/sampling_test.sh b/src/tests/sampling_test.sh index 9e45f67..149d27b 100755 --- a/src/tests/sampling_test.sh +++ b/src/tests/sampling_test.sh @@ -52,7 +52,15 @@ OUTDIR="/tmp/sampling_test_dir" # libtool is annoying, and puts the actual executable in a different # directory, replacing the seeming-executable with a shell script. # We use the error output of sampling_test to indicate its real location -SAMPLING_TEST_BINARY=`"$SAMPLING_TEST" 2>&1 | awk '{print $2; exit;}'` +SAMPLING_TEST_BINARY=`"$SAMPLING_TEST" 2>&1 | awk '/USAGE/ {print $2; exit;}'` + +# A kludge for cygwin. Unfortunately, 'test -f' says that 'foo' exists +# even when it doesn't, and only foo.exe exists. Other unix utilities +# (like nm) need you to say 'foo.exe'. We use one such utility, cat, to +# see what the *real* binary name is. +if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then + SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe +fi die() { echo "FAILED" @@ -66,20 +74,20 @@ rm -rf "$OUTDIR" || die "Unable to delete $OUTDIR" mkdir "$OUTDIR" || die "Unable to create $OUTDIR" # This puts the output into out.heap and out.growth. It allocates -# 9*10^7 bytes of memory, which is 85M. Because we sample, the +# 8*10^7 bytes of memory, which is 76M. Because we sample, the # estimate may be a bit high or a bit low: we accept anything from -# 70M to 99M. +# 50M to 99M. "$SAMPLING_TEST" "$OUTDIR/out" echo -n "Testing heap output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \ - | grep '^ *[7-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ + | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"` echo "OK" echo -n "Testing growth output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \ - | grep '^ *[7-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ + | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"` echo "OK" diff --git a/src/tests/testutil.cc b/src/tests/testutil.cc index 6643443..f2b8592 100644 --- a/src/tests/testutil.cc +++ b/src/tests/testutil.cc @@ -99,7 +99,9 @@ extern "C" void RunManyThreadsWithId(void (*fn)(int), int count, int) { #elif defined(_WIN32) +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN /* We always want minimal includes */ +#endif #include <windows.h> extern "C" { diff --git a/src/thread_cache.cc b/src/thread_cache.cc index d2b0c4f..a1fdf0f 100644 --- a/src/thread_cache.cc +++ b/src/thread_cache.cc @@ -489,16 +489,23 @@ void ThreadCache::RecomputePerThreadCacheSize() { double ratio = space / max<double>(1, per_thread_cache_size_); size_t claimed = 0; for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { - // Don't circumvent the slow-start growth of max_size_ by increasing - // the total cache size. - if (!use_dynamic_cache_size_ || ratio < 1.0) { - h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); + if (use_dynamic_cache_size_) { + // Don't circumvent the slow-start growth of max_size_ by increasing the + // total cache size. + if (ratio < 1.0) { + h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); + } + } else { + // Don't try to be clever and multiply by 'ratio' because rounding + // errors will eventually cause long-lived threads to have zero + // max_size_. + h->max_size_ = space; } claimed += h->max_size_; } unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; per_thread_cache_size_ = space; - //MESSAGE("Threads %d => cache size %8d\n", n, int(space)); + // TCMalloc_MESSAGE(__FILE__, __LINE__, "Threads %d => cache size %8d\n", n, int(space)); } void ThreadCache::Print(TCMalloc_Printer* out) const { diff --git a/src/windows/addr2line-pdb.c b/src/windows/addr2line-pdb.c index 5384731..97b614b 100644 --- a/src/windows/addr2line-pdb.c +++ b/src/windows/addr2line-pdb.c @@ -45,7 +45,6 @@ #include <windows.h> #include <dbghelp.h> - #define SEARCH_CAP (1024*1024) #define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" diff --git a/src/windows/config.h b/src/windows/config.h index 6be561e..2811296 100644 --- a/src/windows/config.h +++ b/src/windows/config.h @@ -12,6 +12,13 @@ #ifndef GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ #define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +/* define this if you are linking tcmalloc statically and overriding the + * default allocators. + * For instructions on how to use this mode, see + * http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + */ +#undef WIN32_OVERRIDE_ALLOCATORS + /* the location of <hash_map> */ #define HASH_MAP_H <hash_map> @@ -21,6 +28,9 @@ /* the location of <hash_set> */ #define HASH_SET_H <hash_set> +/* Define to 1 if your libc has a snprintf implementation */ +#undef HAVE_SNPRINTF + /* Define to 1 if compiler supports __builtin_stack_pointer */ #undef HAVE_BUILTIN_STACK_POINTER @@ -162,9 +172,6 @@ /* Define to 1 if you have the <unwind.h> header file. */ #undef HAVE_UNWIND_H -/* Define to 1 if you have the <windows.h> header file. */ -#define HAVE_WINDOWS_H 1 - /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ @@ -177,6 +184,9 @@ /* Define to 1 if int32_t is equivalent to intptr_t */ #undef INT32_EQUALS_INTPTR +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + /* Name of package */ #undef PACKAGE diff --git a/src/windows/mingw.h b/src/windows/mingw.h index 1745723..e69b5da 100644 --- a/src/windows/mingw.h +++ b/src/windows/mingw.h @@ -47,6 +47,8 @@ #include "windows/port.h" +#define HAVE_SNPRINTF 1 + #endif /* __MINGW32__ */ #endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */ diff --git a/src/windows/nm-pdb.c b/src/windows/nm-pdb.c index ec0ddf9..726d345 100644 --- a/src/windows/nm-pdb.c +++ b/src/windows/nm-pdb.c @@ -41,6 +41,7 @@ #include <stdio.h> #include <stdlib.h> +#include <string.h> // for _strdup #include <windows.h> #include <dbghelp.h> diff --git a/src/windows/override_functions.cc b/src/windows/override_functions.cc new file mode 100644 index 0000000..2ad6bbc --- /dev/null +++ b/src/windows/override_functions.cc @@ -0,0 +1,118 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Mike Belshe +// +// To link tcmalloc into a EXE or DLL statically without using the patching +// facility, we can take a stock libcmt and remove all the allocator functions. +// When we relink the EXE/DLL with the modified libcmt and tcmalloc, a few +// functions are missing. This file contains the additional overrides which +// are required in the VS2005 libcmt in order to link the modified libcmt. +// +// See also +// http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + +#include "config.h" + +#ifndef _WIN32 +# error You should only be including this file in a windows environment! +#endif + +#ifndef WIN32_OVERRIDE_ALLOCATORS +# error This file is intended for use when overriding allocators +#endif + +#include "tcmalloc.cc" + +extern "C" void* _recalloc(void* p, size_t n, size_t size) { + void* result = realloc(p, n * size); + memset(result, 0, n * size); + return result; +} + +extern "C" void* _calloc_impl(size_t n, size_t size) { + return calloc(n, size); +} + +extern "C" size_t _msize(void* p) { + return MallocExtension::instance()->GetAllocatedSize(p); +} + +extern "C" intptr_t _get_heap_handle() { + return 0; +} + +// The CRT heap initialization stub. +extern "C" int _heap_init() { + // We intentionally leak this object. It lasts for the process + // lifetime. Trying to teardown at _heap_term() is so late that + // you can't do anything useful anyway. + new TCMallocGuard(); + return 1; +} + +// The CRT heap cleanup stub. +extern "C" void _heap_term() { +} + +#ifndef NDEBUG +#undef malloc +#undef free +#undef calloc +int _CrtDbgReport(int, const char*, int, const char*, const char*, ...) { + return 0; +} + +int _CrtDbgReportW(int, const wchar_t*, int, const wchar_t*, const wchar_t*, ...) { + return 0; +} + +int _CrtSetReportMode(int, int) { + return 0; +} + +extern "C" void* _malloc_dbg(size_t size, int , const char*, int) { + return malloc(size); +} + +extern "C" void _free_dbg(void* ptr, int) { + free(ptr); +} + +extern "C" void* _calloc_dbg(size_t n, size_t size, int, const char*, int) { + return calloc(n, size); +} +#endif // NDEBUG + +// We set this to 1 because part of the CRT uses a check of _crtheap != 0 +// to test whether the CRT has been initialized. Once we've ripped out +// the allocators from libcmt, we need to provide this definition so that +// the rest of the CRT is still usable. +extern "C" void* _crtheap = reinterpret_cast<void*>(1); diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc index d73c064..ff5acad 100644 --- a/src/windows/patch_functions.cc +++ b/src/windows/patch_functions.cc @@ -28,7 +28,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // --- -// Author: Craig Silversteion +// Author: Craig Silverstein // // The main purpose of this file is to patch the libc allocation // routines (malloc and friends, but also _msize and other @@ -67,6 +67,11 @@ #endif #include "config.h" + +#ifdef WIN32_OVERRIDE_ALLOCATORS +#error This file is intended for patching allocators - use override_functions.cc instead. +#endif + #include <windows.h> #include <malloc.h> // for _msize and _expand #include <tlhelp32.h> // for CreateToolhelp32Snapshot() @@ -476,7 +481,7 @@ void LibcInfoWithPatchFunctions<T>::Unpatch() { } void WindowsInfo::Patch() { - HMODULE hkernel32 = ::GetModuleHandle("kernel32"); + HMODULE hkernel32 = ::GetModuleHandleA("kernel32"); CHECK_NE(hkernel32, NULL); // Unlike for libc, we know these exist in our module, so we can get @@ -693,8 +698,10 @@ void* LibcInfoWithPatchFunctions<T>::Perftools_realloc( (void (*)(void*))origstub_fn_[kFree]); return NULL; } - return do_realloc_with_callback(old_ptr, new_size, ((void* (*)(void*, size_t)) - origstub_fn_[kRealloc])); + return do_realloc_with_callback( + old_ptr, new_size, + (void (*)(void*))origstub_fn_[kFree], + (size_t (*)(void*))origstub_fn_[k_Msize]); } template<int T> @@ -772,28 +779,7 @@ void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow( template<int T> size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW { - // Get the size of the old entry - const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; - size_t cl = Static::pageheap()->GetSizeClassIfCached(p); - Span *span = NULL; - size_t old_size; - if (cl == 0) { - span = Static::pageheap()->GetDescriptor(p); - if (!span) { - // This can happen on windows because some constructors may - // construct things before tcmalloc hooks _msize(). - return ((size_t (*)(void*))origstub_fn_[k_Msize])(ptr); - } - cl = span->sizeclass; - Static::pageheap()->CacheSizeClass(p, cl); - } - if (cl != 0) { - old_size = Static::sizemap()->ByteSizeForClass(cl); - } else { - ASSERT(span != NULL); - old_size = span->length << kPageShift; - } - return old_size; + return GetSizeWithCallback(ptr, (size_t (*)(void*))origstub_fn_[k_Msize]); } // We need to define this because internal windows functions like to diff --git a/src/windows/port.cc b/src/windows/port.cc index 7cb3a57..0f1a700 100644 --- a/src/windows/port.cc +++ b/src/windows/port.cc @@ -55,8 +55,7 @@ int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) { return _vsnprintf(str, size-1, format, ap); } -// mingw defines its own snprintf, though msvc does not -#ifndef __MINGW32__ +#ifndef HAVE_SNPRINTF int snprintf(char *str, size_t size, const char *format, ...) { va_list ap; va_start(ap, format); diff --git a/src/windows/port.h b/src/windows/port.h index e5b9b5f..50866ec 100644 --- a/src/windows/port.h +++ b/src/windows/port.h @@ -49,7 +49,9 @@ #ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN /* We always want minimal includes */ +#endif #include <windows.h> #include <io.h> /* because we so often use open/close/etc */ #include <stdarg.h> /* for va_list */ |