summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLassi Tuura <lat@cern.ch>2011-04-17 20:33:09 -0700
committerArun Sharma <asharma@fb.com>2011-04-17 20:34:38 -0700
commitae5c1f2adf4da04235d87d024d4d942c01b2b447 (patch)
treea608b026890ccd2823670cb4cc4e42e3e7fd07f5
parente2962af9d31266761700b431da894421c0d757ec (diff)
downloadlibunwind-ae5c1f2adf4da04235d87d024d4d942c01b2b447.tar.gz
Performance optimisations for fast trace.
Insert static branch prediction predicates in useful places and avoid unnecessary code in the hottest paths. Bypass unnecessary indirect calls, in particular to access_mem(), when known to be safe.
-rw-r--r--include/libunwind_i.h8
-rw-r--r--include/tdep-x86_64/libunwind_i.h8
-rw-r--r--src/mi/backtrace.c6
-rw-r--r--src/x86_64/Ginit.c15
-rw-r--r--src/x86_64/Ginit_local.c2
-rw-r--r--src/x86_64/Gos-freebsd.c1
-rw-r--r--src/x86_64/Gos-linux.c1
-rw-r--r--src/x86_64/Gtrace.c49
-rw-r--r--src/x86_64/init.h48
-rw-r--r--src/x86_64/unwind_i.h12
10 files changed, 88 insertions, 62 deletions
diff --git a/include/libunwind_i.h b/include/libunwind_i.h
index 9b91a12f..d7353d18 100644
--- a/include/libunwind_i.h
+++ b/include/libunwind_i.h
@@ -301,6 +301,14 @@ struct elf_image
size_t size; /* (file-) size of the image */
};
+/* Provide a place holder for architecture to override for fast access
+ to memory when known not to need to validate and know the access
+ will be local to the process. A suitable override will improve
+ unw_tdep_trace() performance in particular. */
+#define ACCESS_MEM_FAST(ret,validate,cur,addr,to) \
+ do { (ret) = dwarf_get ((cur), DWARF_MEM_LOC ((cur), (addr)), &(to)); } \
+ while (0)
+
#include "tdep/libunwind_i.h"
#ifndef tdep_get_func_addr
diff --git a/include/tdep-x86_64/libunwind_i.h b/include/tdep-x86_64/libunwind_i.h
index 22697b13..c4c69600 100644
--- a/include/tdep-x86_64/libunwind_i.h
+++ b/include/tdep-x86_64/libunwind_i.h
@@ -110,10 +110,10 @@ dwarf_get_uc(const struct dwarf_cursor *cursor)
# define DWARF_LOC(r, t) ((dwarf_loc_t) { .val = (r) })
# define DWARF_IS_REG_LOC(l) 0
# define DWARF_REG_LOC(c,r) (DWARF_LOC((unw_word_t) \
- tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
+ x86_64_r_uc_addr(dwarf_get_uc(c), (r)), 0))
# define DWARF_MEM_LOC(c,m) DWARF_LOC ((m), 0)
# define DWARF_FPREG_LOC(c,r) (DWARF_LOC((unw_word_t) \
- tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
+ x86_64_r_uc_addr(dwarf_get_uc(c), (r)), 0))
#else /* !UNW_LOCAL_ONLY */
# define DWARF_LOC_TYPE_FP (1 << 0)
@@ -184,7 +184,6 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
/* Platforms that support UNW_INFO_FORMAT_TABLE need to define
tdep_search_unwind_table. */
#define tdep_search_unwind_table dwarf_search_unwind_table
-#define tdep_uc_addr UNW_ARCH_OBJ(uc_addr)
#define tdep_get_elf_image UNW_ARCH_OBJ(get_elf_image)
#define tdep_access_reg UNW_OBJ(access_reg)
#define tdep_access_fpreg UNW_OBJ(access_fpreg)
@@ -199,6 +198,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
#endif
#define tdep_stash_frame UNW_OBJ(stash_frame)
#define tdep_trace UNW_OBJ(tdep_trace)
+#define x86_64_r_uc_addr UNW_OBJ(r_uc_addr)
#ifdef UNW_LOCAL_ONLY
# define tdep_find_proc_info(c,ip,n) \
@@ -226,7 +226,7 @@ extern void tdep_init_mem_validate (void);
extern int tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
unw_dyn_info_t *di, unw_proc_info_t *pi,
int need_unwind_info, void *arg);
-extern void *tdep_uc_addr (ucontext_t *uc, int reg);
+extern void *x86_64_r_uc_addr (ucontext_t *uc, int reg);
extern int tdep_get_elf_image (struct elf_image *ei, pid_t pid, unw_word_t ip,
unsigned long *segbase, unsigned long *mapoff,
char *path, size_t pathlen);
diff --git a/src/mi/backtrace.c b/src/mi/backtrace.c
index 42e1f5d3..bd748aac 100644
--- a/src/mi/backtrace.c
+++ b/src/mi/backtrace.c
@@ -39,7 +39,7 @@ slow_backtrace (void **buffer, int size, unw_context_t *uc)
unw_word_t ip;
int n = 0;
- if (unw_init_local (&cursor, uc) < 0)
+ if (unlikely (unw_init_local (&cursor, uc) < 0))
return 0;
while (unw_step (&cursor) > 0)
@@ -63,10 +63,10 @@ unw_backtrace (void **buffer, int size)
tdep_getcontext_trace (&uc);
- if (unw_init_local (&cursor, &uc) < 0)
+ if (unlikely (unw_init_local (&cursor, &uc) < 0))
return 0;
- if (tdep_trace (&cursor, buffer, &n) < 0)
+ if (unlikely (tdep_trace (&cursor, buffer, &n) < 0))
{
unw_getcontext (&uc);
return slow_backtrace (buffer, size, &uc);
diff --git a/src/x86_64/Ginit.c b/src/x86_64/Ginit.c
index f49e4bad..ee62d02e 100644
--- a/src/x86_64/Ginit.c
+++ b/src/x86_64/Ginit.c
@@ -47,16 +47,6 @@ static struct unw_addr_space local_addr_space;
PROTECTED unw_addr_space_t unw_local_addr_space = &local_addr_space;
-# ifdef UNW_LOCAL_ONLY
-
-HIDDEN void *
-tdep_uc_addr (ucontext_t *uc, int reg)
-{
- return x86_64_r_uc_addr (uc, reg);
-}
-
-# endif /* UNW_LOCAL_ONLY */
-
HIDDEN unw_dyn_info_list_t _U_dyn_info_list;
/* XXX fix me: there is currently no way to locate the dyn-info list
@@ -168,7 +158,7 @@ static int
access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write,
void *arg)
{
- if (write)
+ if (unlikely (write))
{
Debug (16, "mem[%016lx] <- %lx\n", addr, *val);
*(unw_word_t *) addr = *val;
@@ -177,7 +167,8 @@ access_mem (unw_addr_space_t as, unw_word_t addr, unw_word_t *val, int write,
{
/* validate address */
const struct cursor *c = (const struct cursor *)arg;
- if (c && c->validate && validate_mem(addr))
+ if (likely (c != 0) && unlikely (c->validate)
+ && unlikely (validate_mem (addr)))
return -1;
*val = *(unw_word_t *) addr;
Debug (16, "mem[%016lx] -> %lx\n", addr, *val);
diff --git a/src/x86_64/Ginit_local.c b/src/x86_64/Ginit_local.c
index 70bef3e1..54b4fcdb 100644
--- a/src/x86_64/Ginit_local.c
+++ b/src/x86_64/Ginit_local.c
@@ -43,7 +43,7 @@ unw_init_local (unw_cursor_t *cursor, ucontext_t *uc)
{
struct cursor *c = (struct cursor *) cursor;
- if (tdep_needs_initialization)
+ if (unlikely (tdep_needs_initialization))
tdep_init ();
Debug (1, "(cursor=%p)\n", c);
diff --git a/src/x86_64/Gos-freebsd.c b/src/x86_64/Gos-freebsd.c
index 50ee60bd..3ef99261 100644
--- a/src/x86_64/Gos-freebsd.c
+++ b/src/x86_64/Gos-freebsd.c
@@ -154,6 +154,7 @@ unw_handle_signal_frame (unw_cursor_t *cursor)
HIDDEN void *
x86_64_r_uc_addr (ucontext_t *uc, int reg)
{
+ /* NOTE: common_init() in init.h inlines these for fast path access. */
void *addr;
switch (reg)
diff --git a/src/x86_64/Gos-linux.c b/src/x86_64/Gos-linux.c
index c0278881..a315ea1e 100644
--- a/src/x86_64/Gos-linux.c
+++ b/src/x86_64/Gos-linux.c
@@ -106,6 +106,7 @@ unw_handle_signal_frame (unw_cursor_t *cursor)
HIDDEN void *
x86_64_r_uc_addr (ucontext_t *uc, int reg)
{
+ /* NOTE: common_init() in init.h inlines these for fast path access. */
void *addr;
switch (reg)
diff --git a/src/x86_64/Gtrace.c b/src/x86_64/Gtrace.c
index 6935d00b..5b23f7c0 100644
--- a/src/x86_64/Gtrace.c
+++ b/src/x86_64/Gtrace.c
@@ -92,7 +92,7 @@ trace_cache_buckets (void)
unw_tdep_frame_t *frames = mempool_alloc(&trace_frame_pool);
size_t i;
- if (likely (frames != 0))
+ if (likely(frames != 0))
for (i = 0; i < (1u << HASH_LOW_BITS); ++i)
frames[i] = empty_frame;
@@ -142,7 +142,7 @@ trace_cache_expand (unw_trace_cache_t *cache)
old_size = (1u << cache->log_frame_vecs);
new_size = cache->log_frame_vecs + 2;
for (i = old_size; i < (1u << new_size); ++i)
- if (unlikely (! (cache->frames[i] = trace_cache_buckets())))
+ if (unlikely(! (cache->frames[i] = trace_cache_buckets())))
{
Debug(5, "failed to expand cache to 2^%lu hash bucket sets\n", new_size);
for (j = old_size; j < i; ++j)
@@ -237,10 +237,10 @@ trace_init_addr (unw_tdep_frame_t *f,
d->loc[UNW_X86_64_RSP] = DWARF_REG_LOC (d, UNW_X86_64_RSP);
c->frame_info = *f;
- if (dwarf_put (d, d->loc[UNW_X86_64_RIP], rip) >= 0
- && dwarf_put (d, d->loc[UNW_X86_64_RBP], rbp) >= 0
- && dwarf_put (d, d->loc[UNW_X86_64_RSP], rsp) >= 0
- && (ret = unw_step (cursor)) >= 0)
+ if (likely(dwarf_put (d, d->loc[UNW_X86_64_RIP], rip) >= 0)
+ && likely(dwarf_put (d, d->loc[UNW_X86_64_RBP], rbp) >= 0)
+ && likely(dwarf_put (d, d->loc[UNW_X86_64_RSP], rsp) >= 0)
+ && likely((ret = unw_step (cursor)) >= 0))
*f = c->frame_info;
/* If unw_step() stopped voluntarily, remember that, even if it
@@ -290,14 +290,14 @@ trace_lookup (unw_cursor_t *cursor,
addr = frame->virtual_address;
/* Return if we found the address. */
- if (addr == rip)
+ if (likely(addr == rip))
{
Debug (4, "found address after %ld steps\n", i);
return frame;
}
/* If slot is empty, reuse it. */
- if (! addr)
+ if (likely(! addr))
break;
/* Linear probe to next slot candidate, step = 1. */
@@ -310,9 +310,9 @@ trace_lookup (unw_cursor_t *cursor,
it's free or collides. Note that hash expansion drops previous
contents; further lookups will refill the hash. */
Debug (4, "updating slot %lu after %ld steps, replacing 0x%lx\n", slot, i, addr);
- if (unlikely (addr || cache->used >= cache_size / 2))
+ if (unlikely(addr || cache->used >= cache_size / 2))
{
- if (unlikely (trace_cache_expand (cache) < 0))
+ if (unlikely(trace_cache_expand (cache) < 0))
return 0;
cache_size = 1u << (HASH_LOW_BITS + cache->log_frame_vecs);
@@ -404,7 +404,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
int ret;
/* Check input parametres. */
- if (! cursor || ! buffer || ! size || (maxdepth = *size) <= 0)
+ if (unlikely(! cursor || ! buffer || ! size || (maxdepth = *size) <= 0))
return -UNW_EINVAL;
Debug (1, "begin ip 0x%lx cfa 0x%lx\n", d->ip, d->cfa);
@@ -415,7 +415,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
/* Determine initial register values. */
rip = d->ip;
rsp = cfa = d->cfa;
- if ((ret = dwarf_get (d, d->loc[UNW_X86_64_RBP], &rbp)) < 0)
+ if (unlikely((ret = dwarf_get (d, d->loc[UNW_X86_64_RBP], &rbp)) < 0))
{
Debug (1, "returning %d, rbp value not found\n", ret);
*size = 0;
@@ -424,7 +424,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
}
/* Get frame cache. */
- if (! (cache = trace_cache_get()))
+ if (unlikely(! (cache = trace_cache_get())))
{
Debug (1, "returning %d, cannot get trace cache\n", -UNW_ENOMEM);
*size = 0;
@@ -450,7 +450,7 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
unw_tdep_frame_t *f = trace_lookup (cursor, cache, cfa, rip, rbp, rsp);
/* If we don't have information for this frame, give up. */
- if (! f)
+ if (unlikely(! f))
{
ret = -UNW_ENOINFO;
break;
@@ -481,9 +481,9 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
case UNW_X86_64_FRAME_STANDARD:
/* Advance standard traceable frame. */
cfa = (f->cfa_reg_rsp ? rsp : rbp) + f->cfa_reg_offset;
- ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa - 8), &rip);
- if (ret >= 0 && f->rbp_cfa_offset != -1)
- ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset), &rbp);
+ ACCESS_MEM_FAST(ret, c->validate, d, cfa - 8, rip);
+ if (likely(ret >= 0) && likely(f->rbp_cfa_offset != -1))
+ ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset, rbp);
/* Don't bother reading RSP from DWARF, CFA becomes new RSP. */
rsp = cfa;
@@ -497,13 +497,12 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
registers (ucontext) among other things. We know the info
is stored at some unknown constant offset off inner frame's
CFA. We determine the actual offset from DWARF unwind info. */
- d->use_prev_instr = 0;
cfa = cfa + f->cfa_reg_offset;
- ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset + dRIP), &rip);
- if (ret >= 0)
- ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rbp_cfa_offset), &rbp);
- if (ret >= 0)
- ret = dwarf_get (d, DWARF_MEM_LOC (d, cfa + f->rsp_cfa_offset), &rsp);
+ ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset + dRIP, rip);
+ if (likely(ret >= 0))
+ ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rbp_cfa_offset, rbp);
+ if (likely(ret >= 0))
+ ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->rsp_cfa_offset, rsp);
/* Resume stack at signal restoration point. The stack is not
necessarily continuous here, especially with sigaltstack(). */
@@ -524,8 +523,8 @@ tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
Debug (4, "new cfa 0x%lx rip 0x%lx rsp 0x%lx rbp 0x%lx\n",
cfa, rip, rsp, rbp);
- /* If we failed on ended up somewhere bogus, stop. */
- if (ret < 0 || rip < 0x4000)
+ /* If we failed or ended up somewhere bogus, stop. */
+ if (unlikely(ret < 0 || rip < 0x4000))
break;
/* Record this address in stack trace. We skipped the first address. */
diff --git a/src/x86_64/init.h b/src/x86_64/init.h
index f04ecda3..e80e5533 100644
--- a/src/x86_64/init.h
+++ b/src/x86_64/init.h
@@ -27,28 +27,42 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "unwind_i.h"
+/* Avoid a trip to x86_64_r_uc_addr() for purely local initialisation. */
+#if defined UNW_LOCAL_ONLY && defined __linux
+# define REG_INIT_LOC(c, rlc, ruc) \
+ DWARF_LOC ((unw_word_t) &c->uc->uc_mcontext.gregs[REG_ ## ruc], 0)
+
+#elif defined UNW_LOCAL_ONLY && defined __FreeBSD__
+# define REG_INIT_LOC(c, rlc, ruc) \
+ DWARF_LOC ((unw_word_t) &c->uc->uc_mcontext.mc_ ## rlc, 0)
+
+#else
+# define REG_INIT_LOC(c, rlc, ruc) \
+ DWARF_REG_LOC (&c->dwarf, UNW_X86_64_ ## ruc)
+#endif
+
static inline int
common_init (struct cursor *c, unsigned use_prev_instr)
{
int ret;
- c->dwarf.loc[RAX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RAX);
- c->dwarf.loc[RDX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RDX);
- c->dwarf.loc[RCX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RCX);
- c->dwarf.loc[RBX] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RBX);
- c->dwarf.loc[RSI] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RSI);
- c->dwarf.loc[RDI] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RDI);
- c->dwarf.loc[RBP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RBP);
- c->dwarf.loc[RSP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RSP);
- c->dwarf.loc[R8] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R8);
- c->dwarf.loc[R9] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R9);
- c->dwarf.loc[R10] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R10);
- c->dwarf.loc[R11] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R11);
- c->dwarf.loc[R12] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R12);
- c->dwarf.loc[R13] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R13);
- c->dwarf.loc[R14] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R14);
- c->dwarf.loc[R15] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_R15);
- c->dwarf.loc[RIP] = DWARF_REG_LOC (&c->dwarf, UNW_X86_64_RIP);
+ c->dwarf.loc[RAX] = REG_INIT_LOC(c, rax, RAX);
+ c->dwarf.loc[RDX] = REG_INIT_LOC(c, rdx, RDX);
+ c->dwarf.loc[RCX] = REG_INIT_LOC(c, rcx, RCX);
+ c->dwarf.loc[RBX] = REG_INIT_LOC(c, rbx, RBX);
+ c->dwarf.loc[RSI] = REG_INIT_LOC(c, rsi, RSI);
+ c->dwarf.loc[RDI] = REG_INIT_LOC(c, rdi, RDI);
+ c->dwarf.loc[RBP] = REG_INIT_LOC(c, rbp, RBP);
+ c->dwarf.loc[RSP] = REG_INIT_LOC(c, rsp, RSP);
+ c->dwarf.loc[R8] = REG_INIT_LOC(c, r8, R8);
+ c->dwarf.loc[R9] = REG_INIT_LOC(c, r9, R9);
+ c->dwarf.loc[R10] = REG_INIT_LOC(c, r10, R10);
+ c->dwarf.loc[R11] = REG_INIT_LOC(c, r11, R11);
+ c->dwarf.loc[R12] = REG_INIT_LOC(c, r12, R12);
+ c->dwarf.loc[R13] = REG_INIT_LOC(c, r13, R13);
+ c->dwarf.loc[R14] = REG_INIT_LOC(c, r14, R14);
+ c->dwarf.loc[R15] = REG_INIT_LOC(c, r15, R15);
+ c->dwarf.loc[RIP] = REG_INIT_LOC(c, rip, RIP);
ret = dwarf_get (&c->dwarf, c->dwarf.loc[RIP], &c->dwarf.ip);
if (ret < 0)
diff --git a/src/x86_64/unwind_i.h b/src/x86_64/unwind_i.h
index 699a6b38..1e55a766 100644
--- a/src/x86_64/unwind_i.h
+++ b/src/x86_64/unwind_i.h
@@ -65,6 +65,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define x86_64_r_uc_addr UNW_OBJ(r_uc_addr)
#define x86_64_sigreturn UNW_OBJ(sigreturn)
+/* By-pass calls to access_mem() when known to be safe. */
+#ifdef UNW_LOCAL_ONLY
+# undef ACCESS_MEM_FAST
+# define ACCESS_MEM_FAST(ret,validate,cur,addr,to) \
+ do { \
+ if (unlikely(validate)) \
+ (ret) = dwarf_get ((cur), DWARF_MEM_LOC ((cur), (addr)), &(to)); \
+ else \
+ (ret) = 0, (to) = *(unw_word_t *)(addr); \
+ } while (0)
+#endif
+
extern void x86_64_local_addr_space_init (void);
extern int x86_64_local_resume (unw_addr_space_t as, unw_cursor_t *cursor,
void *arg);