summaryrefslogtreecommitdiff
path: root/src/stacktrace_x86-inl.h
diff options
context:
space:
mode:
authorcsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-11-29 23:39:24 +0000
committercsilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50>2007-11-29 23:39:24 +0000
commit11b02f7aebd05cf39f6f93bdd48786909f99f34e (patch)
tree660c613e74cffd643ee4a7d0f0cb170057abbf7d /src/stacktrace_x86-inl.h
parent49b74b9508797f8aafe6b86e62e7efc4ec200e48 (diff)
downloadgperftools-11b02f7aebd05cf39f6f93bdd48786909f99f34e.tar.gz
Thu Nov 29 07:59:43 2007 Google Inc. <opensource@google.com>
* google-perftools: version 0.94 release * PORTING: MinGW/Msys support -- runs same code as MSVC does (csilvers) * PORTING: Add NumCPUs support for Mac OS X (csilvers) * Work around a sscanf bug in glibc(?) (waldemar) * Fix Windows MSVC bug triggered by thread deletion (csilvers) * Fix bug that triggers in MSVC /O2: missing volatile (gpike) * March-of-time support: quiet warnings/errors for gcc 4.2, OS X 10.5 * Modify pprof so it works without nm: useful for windows (csilvers) * pprof: Support filtering for CPU profiles (cgd) * Bugfix: have realloc report to hooks in all situations (maxim) * Speed improvement: replace slow memcpy with std::copy (soren) * Speed: better iterator efficiency in RecordRegionRemoval (soren) * Speed: minor speed improvements via better bitfield alignment (gpike) * Documentation: add documentation of binary profile output (cgd) git-svn-id: http://gperftools.googlecode.com/svn/trunk@40 6b5cf1ce-ec42-a296-1ba9-69fdba395a50
Diffstat (limited to 'src/stacktrace_x86-inl.h')
-rw-r--r--src/stacktrace_x86-inl.h110
1 files changed, 105 insertions, 5 deletions
diff --git a/src/stacktrace_x86-inl.h b/src/stacktrace_x86-inl.h
index 5dcbc7a..794aaf5 100644
--- a/src/stacktrace_x86-inl.h
+++ b/src/stacktrace_x86-inl.h
@@ -37,14 +37,29 @@
#include "google/stacktrace.h"
// Given a pointer to a stack frame, locate and return the calling
-// stackframe, or return NULL if no stackframe can be found. Perform
-// sanity checks to reduce the chance that a bad pointer is returned.
+// stackframe, or return NULL if no stackframe can be found. Perform sanity
+// checks (the strictness of which is controlled by the boolean parameter
+// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
+template<bool STRICT_UNWINDING>
static void **NextStackFrame(void **old_sp) {
void **new_sp = (void **) *old_sp;
// Check that the transition from frame pointer old_sp to frame
// pointer new_sp isn't clearly bogus
- if (new_sp <= old_sp) return NULL;
+ if (STRICT_UNWINDING) {
+ // With the stack growing downwards, older stack frame must be
+ // at a greater address that the current one.
+ if (new_sp <= old_sp) return NULL;
+ // Assume stack frames larger than 100,000 bytes are bogus.
+ if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
+ } else {
+ // In the non-strict mode, allow discontiguous stack frames.
+ // (alternate-signal-stacks for example).
+ if (new_sp == old_sp) return NULL;
+ // And allow frames upto about 1MB.
+ if ((new_sp > old_sp)
+ && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL;
+ }
if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
#ifdef __i386__
// On 64-bit machines, the stack pointer can be very close to
@@ -52,10 +67,10 @@ static void **NextStackFrame(void **old_sp) {
// last two pages in the address space
if ((uintptr_t)new_sp >= 0xffffe000) return NULL;
#endif
- if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
return new_sp;
}
+// If you change this function, also change GetStackFrames below.
int GetStackTrace(void** result, int max_depth, int skip_count) {
void **sp;
#ifdef __i386__
@@ -95,7 +110,92 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
} else {
result[n++] = *(sp+1);
}
- sp = NextStackFrame(sp);
+ // Use strict unwinding rules.
+ sp = NextStackFrame<true>(sp);
+ }
+ return n;
+}
+
+// If you change this function, also change GetStackTrace above:
+//
+// This GetStackFrames routine shares a lot of code with GetStackTrace
+// above. This code could have been refactored into a common routine,
+// and then both GetStackTrace/GetStackFrames could call that routine.
+// There are two problems with that:
+//
+// (1) The performance of the refactored-code suffers substantially - the
+// refactored needs to be able to record the stack trace when called
+// from GetStackTrace, and both the stack trace and stack frame sizes,
+// when called from GetStackFrames - this introduces enough new
+// conditionals that GetStackTrace performance can degrade by as much
+// as 50%.
+//
+// (2) Whether the refactored routine gets inlined into GetStackTrace and
+// GetStackFrames depends on the compiler, and we can't guarantee the
+// behavior either-way, even with "__attribute__ ((always_inline))"
+// or "__attribute__ ((noinline))". But we need this guarantee or the
+// frame counts may be off by one.
+//
+// Both (1) and (2) can be addressed without this code duplication, by
+// clever use of template functions, and by defining GetStackTrace and
+// GetStackFrames as macros that expand to these template functions.
+// However, this approach comes with its own set of problems - namely,
+// macros and preprocessor trouble - for example, if GetStackTrace
+// and/or GetStackFrames is ever defined as a member functions in some
+// class, we are in trouble.
+int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
+ void **sp;
+#ifdef __i386__
+ // Stack frame format:
+ // sp[0] pointer to previous frame
+ // sp[1] caller address
+ // sp[2] first argument
+ // ...
+ sp = (void **)&pcs - 2;
+#endif
+
+#ifdef __x86_64__
+ // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8
+ unsigned long rbp;
+ // Move the value of the register %rbp into the local variable rbp.
+ // We need 'volatile' to prevent this instruction from getting moved
+ // around during optimization to before function prologue is done.
+ // An alternative way to achieve this
+ // would be (before this __asm__ instruction) to call Noop() defined as
+ // static void Noop() __attribute__ ((noinline)); // prevent inlining
+ // static void Noop() { asm(""); } // prevent optimizing-away
+ __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
+ // Arguments are passed in registers on x86-64, so we can't just
+ // offset from &pcs
+ sp = (void **) rbp;
+#endif
+
+ int n = 0;
+ while (sp && n < max_depth) {
+ if (*(sp+1) == (void *)0) {
+ // In 64-bit code, we often see a frame that
+ // points to itself and has a return address of 0.
+ break;
+ }
+ // The GetStackFrames routine is called when we are in some
+ // informational context (the failure signal handler for example).
+ // Use the non-strict unwinding rules to produce a stack trace
+ // that is as complete as possible (even if it contains a few bogus
+ // entries in some rare cases).
+ void **next_sp = NextStackFrame<false>(sp);
+ if (skip_count > 0) {
+ skip_count--;
+ } else {
+ pcs[n] = *(sp+1);
+ if (next_sp > sp) {
+ sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
+ } else {
+ // A frame-size of 0 is used to indicate unknown frame size.
+ sizes[n] = 0;
+ }
+ n++;
+ }
+ sp = next_sp;
}
return n;
}