diff options
author | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2007-11-29 23:39:24 +0000 |
---|---|---|
committer | csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> | 2007-11-29 23:39:24 +0000 |
commit | 11b02f7aebd05cf39f6f93bdd48786909f99f34e (patch) | |
tree | 660c613e74cffd643ee4a7d0f0cb170057abbf7d /src/stacktrace_x86-inl.h | |
parent | 49b74b9508797f8aafe6b86e62e7efc4ec200e48 (diff) | |
download | gperftools-11b02f7aebd05cf39f6f93bdd48786909f99f34e.tar.gz |
Thu Nov 29 07:59:43 2007 Google Inc. <opensource@google.com>
* google-perftools: version 0.94 release
* PORTING: MinGW/Msys support -- runs same code as MSVC does (csilvers)
* PORTING: Add NumCPUs support for Mac OS X (csilvers)
* Work around a sscanf bug in glibc(?) (waldemar)
* Fix Windows MSVC bug triggered by thread deletion (csilvers)
* Fix bug that triggers in MSVC /O2: missing volatile (gpike)
* March-of-time support: quiet warnings/errors for gcc 4.2, OS X 10.5
* Modify pprof so it works without nm: useful for windows (csilvers)
* pprof: Support filtering for CPU profiles (cgd)
* Bugfix: have realloc report to hooks in all situations (maxim)
* Speed improvement: replace slow memcpy with std::copy (soren)
* Speed: better iterator efficiency in RecordRegionRemoval (soren)
* Speed: minor speed improvements via better bitfield alignment (gpike)
* Documentation: add documentation of binary profile output (cgd)
git-svn-id: http://gperftools.googlecode.com/svn/trunk@40 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src/stacktrace_x86-inl.h')
-rw-r--r-- | src/stacktrace_x86-inl.h | 110 |
1 files changed, 105 insertions, 5 deletions
diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h index 5dcbc7a..794aaf5 100644 --- a/src/stacktrace_x86-inl.h +++ b/src/stacktrace_x86-inl.h @@ -37,14 +37,29 @@ #include "google/stacktrace.h" // Given a pointer to a stack frame, locate and return the calling -// stackframe, or return NULL if no stackframe can be found. Perform -// sanity checks to reduce the chance that a bad pointer is returned. +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> static void **NextStackFrame(void **old_sp) { void **new_sp = (void **) *old_sp; // Check that the transition from frame pointer old_sp to frame // pointer new_sp isn't clearly bogus - if (new_sp <= old_sp) return NULL; + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL; + } if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; #ifdef __i386__ // On 64-bit machines, the stack pointer can be very close to @@ -52,10 +67,10 @@ static void **NextStackFrame(void **old_sp) { // last two pages in the address space if ((uintptr_t)new_sp >= 0xffffe000) return NULL; #endif - if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; return new_sp; } +// If you change this function, also change GetStackFrames below. int GetStackTrace(void** result, int max_depth, int skip_count) { void **sp; #ifdef __i386__ @@ -95,7 +110,92 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { } else { result[n++] = *(sp+1); } - sp = NextStackFrame(sp); + // Use strict unwinding rules. + sp = NextStackFrame<true>(sp); + } + return n; +} + +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { + void **sp; +#ifdef __i386__ + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + sp = (void **)&pcs - 2; +#endif + +#ifdef __x86_64__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8 + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &pcs + sp = (void **) rbp; +#endif + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == (void *)0) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false>(sp); + if (skip_count > 0) { + skip_count--; + } else { + pcs[n] = *(sp+1); + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + n++; + } + sp = next_sp; } return n; } |