summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS14
-rwxr-xr-xconfigure20
-rw-r--r--configure.ac2
-rw-r--r--doc/cpuprofile.html7
-rw-r--r--src/base/atomicops-internals-arm-v6plus.h9
-rw-r--r--src/base/linux_syscall_support.h1334
-rw-r--r--src/base/stl_allocator.h1
-rw-r--r--src/base/sysinfo.cc12
-rw-r--r--src/debugallocation.cc10
-rw-r--r--src/google/malloc_extension.h10
-rw-r--r--src/google/malloc_extension_c.h8
-rw-r--r--src/heap-checker.cc41
-rw-r--r--src/heap-profile-table.cc191
-rw-r--r--src/heap-profile-table.h59
-rw-r--r--src/heap-profiler.cc38
-rw-r--r--src/malloc_extension.cc16
-rw-r--r--src/memory_region_map.cc4
-rw-r--r--src/memory_region_map.h9
-rw-r--r--src/system-alloc.cc18
-rw-r--r--src/tcmalloc.cc14
-rwxr-xr-xsrc/tests/sampler_test.cc6
-rw-r--r--src/thread_cache.cc2
-rw-r--r--src/windows/ia32_opcode_map.cc31
-rw-r--r--src/windows/mini_disassembler.cc16
-rw-r--r--src/windows/mini_disassembler.h6
-rw-r--r--src/windows/mini_disassembler_types.h14
-rw-r--r--src/windows/patch_functions.cc6
-rw-r--r--src/windows/preamble_patcher.cc446
-rw-r--r--src/windows/preamble_patcher.h238
-rw-r--r--src/windows/preamble_patcher_test.cc356
-rw-r--r--src/windows/preamble_patcher_with_stub.cc170
-rw-r--r--src/windows/shortproc.asm167
32 files changed, 1916 insertions, 1359 deletions
diff --git a/NEWS b/NEWS
index 30d8b1d..2290d8e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,14 @@
-== 23 December 2011 ==
+== 24 January 2011 ==
+
+The `google-perftools` Google Code page has been renamed to
+`gperftools`, in preparation for the project being renamed to
+`gperftools`. In the coming weeks, I'll be stepping down as
+maintainer for the perftools project, and as part of that Google is
+relinquishing ownership of the project; it will now be entirely
+community run. The name change reflects that shift. The 'g' in
+'gperftools' stands for 'great'. :-)
+
+=== 23 December 2011 ===
I've just released perftools 1.9.1
@@ -7,7 +17,7 @@ ARM. If you are not compiling on ARM, or have successfully compiled
perftools 1.9, there is no need to upgrade.
-== 22 December 2011 ==
+=== 22 December 2011 ===
I've just released perftools 1.9
diff --git a/configure b/configure
index ce21ebc..c118199 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.65 for google-perftools 1.9.1.
+# Generated by GNU Autoconf 2.65 for google-perftools 1.10.
#
# Report bugs to <opensource@google.com>.
#
@@ -701,8 +701,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='google-perftools'
PACKAGE_TARNAME='google-perftools'
-PACKAGE_VERSION='1.9.1'
-PACKAGE_STRING='google-perftools 1.9.1'
+PACKAGE_VERSION='1.10'
+PACKAGE_STRING='google-perftools 1.10'
PACKAGE_BUGREPORT='opensource@google.com'
PACKAGE_URL=''
@@ -1474,7 +1474,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures google-perftools 1.9.1 to adapt to many kinds of systems.
+\`configure' configures google-perftools 1.10 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1545,7 +1545,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of google-perftools 1.9.1:";;
+ short | recursive ) echo "Configuration of google-perftools 1.10:";;
esac
cat <<\_ACEOF
@@ -1655,7 +1655,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-google-perftools configure 1.9.1
+google-perftools configure 1.10
generated by GNU Autoconf 2.65
Copyright (C) 2009 Free Software Foundation, Inc.
@@ -2240,7 +2240,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by google-perftools $as_me 1.9.1, which was
+It was created by google-perftools $as_me 1.10, which was
generated by GNU Autoconf 2.65. Invocation command line was
$ $0 $@
@@ -2985,7 +2985,7 @@ fi
# Define the identity of the package.
PACKAGE='google-perftools'
- VERSION='1.9.1'
+ VERSION='1.10'
cat >>confdefs.h <<_ACEOF
@@ -17785,7 +17785,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by google-perftools $as_me 1.9.1, which was
+This file was extended by google-perftools $as_me 1.10, which was
generated by GNU Autoconf 2.65. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -17851,7 +17851,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-google-perftools config.status 1.9.1
+google-perftools config.status 1.10
configured by $0, generated by GNU Autoconf 2.65,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index ab25509..eb8ba7c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4,7 +4,7 @@
# make sure we're interpreted by some minimal autoconf
AC_PREREQ(2.57)
-AC_INIT(google-perftools, 1.9.1, opensource@google.com)
+AC_INIT(google-perftools, 1.10, opensource@google.com)
# Update this value for every release! (A:B:C will map to foo.so.(A-C).C.B)
# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
TCMALLOC_SO_VERSION=3:0:3
diff --git a/doc/cpuprofile.html b/doc/cpuprofile.html
index f029e78..78ee2a6 100644
--- a/doc/cpuprofile.html
+++ b/doc/cpuprofile.html
@@ -50,9 +50,10 @@ for a given run of an executable:</p>
<ol>
<li> <p>Define the environment variable CPUPROFILE to the filename
- to dump the profile to. For instance, to profile
- <code>/usr/local/bin/my_binary_compiled_with_libprofiler_so</code>:</p>
- <pre>% env CPUPROFILE=/tmp/mybin.prof /usr/local/bin/my_binary_compiled_with_libprofiler_so</pre>
+ to dump the profile to. For instance, if you had a version of
+ <code>/bin/ls</code> that had been linked against libprofiler,
+ you could run:</p>
+ <pre>% env CPUPROFILE=ls.prof /bin/ls</pre>
<li> <p>In your code, bracket the code you want profiled in calls to
<code>ProfilerStart()</code> and <code>ProfilerStop()</code>.
diff --git a/src/base/atomicops-internals-arm-v6plus.h b/src/base/atomicops-internals-arm-v6plus.h
index e197cac..8d5b9b5 100644
--- a/src/base/atomicops-internals-arm-v6plus.h
+++ b/src/base/atomicops-internals-arm-v6plus.h
@@ -67,6 +67,10 @@ inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
"ldrex %1, [%3]\n"
"mov %0, #0\n"
"teq %1, %4\n"
+ // The following IT (if-then) instruction is needed for the subsequent
+ // conditional instruction STREXEQ when compiling in THUMB mode.
+ // In ARM mode, the compiler/assembler will not generate any code for it.
+ "it eq\n"
"strexeq %0, %5, [%3]\n"
: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
: "r" (ptr), "Ir" (old_value), "r" (new_value)
@@ -186,7 +190,12 @@ inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
"ldrexd %1, [%3]\n"
"mov %0, #0\n"
"teq %Q1, %Q4\n"
+ // The following IT (if-then) instructions are needed for the subsequent
+ // conditional instructions when compiling in THUMB mode.
+ // In ARM mode, the compiler/assembler will not generate any code for it.
+ "it eq\n"
"teqeq %R1, %R4\n"
+ "it eq\n"
"strexdeq %0, %5, [%3]\n"
: "=&r" (res), "=&r" (oldval), "+Q" (*ptr)
: "r" (ptr), "Ir" (old_value), "r" (new_value)
diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h
index f52f16e..99dac9e 100644
--- a/src/base/linux_syscall_support.h
+++ b/src/base/linux_syscall_support.h
@@ -69,6 +69,63 @@
* This file defines a few internal symbols that all start with "LSS_".
* Do not access these symbols from outside this file. They are not part
* of the supported API.
+ *
+ * NOTE: This is a stripped down version of the official opensource
+ * version of linux_syscall_support.h, which lives at
+ * http://code.google.com/p/linux-syscall-support/
+ * It includes only the syscalls that are used in perftools, plus a
+ * few extra. Here's the breakdown:
+ * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u
+ * sys__exit(
+ * sys_clone(
+ * sys_close(
+ * sys_fcntl(
+ * sys_fstat(
+ * sys_futex(
+ * sys_futex1(
+ * sys_getcpu(
+ * sys_getdents(
+ * sys_getppid(
+ * sys_gettid(
+ * sys_lseek(
+ * sys_mmap(
+ * sys_mremap(
+ * sys_munmap(
+ * sys_open(
+ * sys_pipe(
+ * sys_prctl(
+ * sys_ptrace(
+ * sys_ptrace_detach(
+ * sys_read(
+ * sys_sched_yield(
+ * sys_sigaction(
+ * sys_sigaltstack(
+ * sys_sigdelset(
+ * sys_sigfillset(
+ * sys_sigprocmask(
+ * sys_socket(
+ * sys_stat(
+ * sys_waitpid(
+ * 2) These are used as subroutines of the above:
+ * sys_getpid -- gettid
+ * sys_kill -- ptrace_detach
+ * sys_restore -- sigaction
+ * sys_restore_rt -- sigaction
+ * sys_socketcall -- socket
+ * sys_wait4 -- waitpid
+ * 3) I left these in even though they're not used. They either
+ * complement the above (write vs read) or are variants (rt_sigaction):
+ * sys_fstat64
+ * sys_getdents64
+ * sys_llseek
+ * sys_mmap2
+ * sys_openat
+ * sys_rt_sigaction
+ * sys_rt_sigprocmask
+ * sys_sigaddset
+ * sys_sigemptyset
+ * sys_stat64
+ * sys_write
*/
#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
#define SYS_LINUX_SYSCALL_SUPPORT_H
@@ -154,36 +211,6 @@ struct kernel_dirent {
char d_name[256];
};
-/* include/linux/uio.h */
-struct kernel_iovec {
- void *iov_base;
- unsigned long iov_len;
-};
-
-/* include/linux/socket.h */
-struct kernel_msghdr {
- void *msg_name;
- int msg_namelen;
- struct kernel_iovec*msg_iov;
- unsigned long msg_iovlen;
- void *msg_control;
- unsigned long msg_controllen;
- unsigned msg_flags;
-};
-
-/* include/asm-generic/poll.h */
-struct kernel_pollfd {
- int fd;
- short events;
- short revents;
-};
-
-/* include/linux/resource.h */
-struct kernel_rlimit {
- unsigned long rlim_cur;
- unsigned long rlim_max;
-};
-
/* include/linux/time.h */
struct kernel_timespec {
long tv_sec;
@@ -274,12 +301,6 @@ struct kernel_sigaction {
#endif
};
-/* include/linux/socket.h */
-struct kernel_sockaddr {
- unsigned short sa_family;
- char sa_data[14];
-};
-
/* include/asm-{arm,i386,mips,ppc}/stat.h */
#ifdef __mips__
#if _MIPS_SIM == _MIPS_SIM_ABI64
@@ -449,74 +470,6 @@ struct kernel_stat {
};
#endif
-/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h */
-#ifdef __mips__
-#if _MIPS_SIM != _MIPS_SIM_ABI64
-struct kernel_statfs64 {
- unsigned long f_type;
- unsigned long f_bsize;
- unsigned long f_frsize;
- unsigned long __pad;
- unsigned long long f_blocks;
- unsigned long long f_bfree;
- unsigned long long f_files;
- unsigned long long f_ffree;
- unsigned long long f_bavail;
- struct { int val[2]; } f_fsid;
- unsigned long f_namelen;
- unsigned long f_spare[6];
-};
-#endif
-#elif !defined(__x86_64__)
-struct kernel_statfs64 {
- unsigned long f_type;
- unsigned long f_bsize;
- unsigned long long f_blocks;
- unsigned long long f_bfree;
- unsigned long long f_bavail;
- unsigned long long f_files;
- unsigned long long f_ffree;
- struct { int val[2]; } f_fsid;
- unsigned long f_namelen;
- unsigned long f_frsize;
- unsigned long f_spare[5];
-};
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h */
-#ifdef __mips__
-struct kernel_statfs {
- long f_type;
- long f_bsize;
- long f_frsize;
- long f_blocks;
- long f_bfree;
- long f_files;
- long f_ffree;
- long f_bavail;
- struct { int val[2]; } f_fsid;
- long f_namelen;
- long f_spare[6];
-};
-#else
-struct kernel_statfs {
- /* x86_64 actually defines all these fields as signed, whereas all other */
- /* platforms define them as unsigned. Leaving them at unsigned should not */
- /* cause any problems. */
- unsigned long f_type;
- unsigned long f_bsize;
- unsigned long f_blocks;
- unsigned long f_bfree;
- unsigned long f_bavail;
- unsigned long f_files;
- unsigned long f_ffree;
- struct { int val[2]; } f_fsid;
- unsigned long f_namelen;
- unsigned long f_frsize;
- unsigned long f_spare[5];
-};
-#endif
-
/* Definitions missing from the standard header files */
#ifndef O_DIRECTORY
@@ -526,12 +479,6 @@ struct kernel_statfs {
#define O_DIRECTORY 0200000
#endif
#endif
-#ifndef NT_PRXFPREG
-#define NT_PRXFPREG 0x46e62b7f
-#endif
-#ifndef PTRACE_GETFPXREGS
-#define PTRACE_GETFPXREGS ((enum __ptrace_request)18)
-#endif
#ifndef PR_GET_DUMPABLE
#define PR_GET_DUMPABLE 3
#endif
@@ -553,46 +500,11 @@ struct kernel_statfs {
#ifndef SA_RESTORER
#define SA_RESTORER 0x04000000
#endif
-#ifndef CPUCLOCK_PROF
-#define CPUCLOCK_PROF 0
-#endif
-#ifndef CPUCLOCK_VIRT
-#define CPUCLOCK_VIRT 1
-#endif
-#ifndef CPUCLOCK_SCHED
-#define CPUCLOCK_SCHED 2
-#endif
-#ifndef CPUCLOCK_PERTHREAD_MASK
-#define CPUCLOCK_PERTHREAD_MASK 4
-#endif
-#ifndef MAKE_PROCESS_CPUCLOCK
-#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
- ((~(int)(pid) << 3) | (int)(clock))
-#endif
-#ifndef MAKE_THREAD_CPUCLOCK
-#define MAKE_THREAD_CPUCLOCK(tid, clock) \
- ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK))
-#endif
#if defined(__i386__)
-#ifndef __NR_setresuid
-#define __NR_setresuid 164
-#define __NR_setresgid 170
-#endif
#ifndef __NR_rt_sigaction
#define __NR_rt_sigaction 174
#define __NR_rt_sigprocmask 175
-#define __NR_rt_sigpending 176
-#define __NR_rt_sigsuspend 179
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 180
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 181
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit 191
#endif
#ifndef __NR_stat64
#define __NR_stat64 195
@@ -600,121 +512,43 @@ struct kernel_statfs {
#ifndef __NR_fstat64
#define __NR_fstat64 197
#endif
-#ifndef __NR_setresuid32
-#define __NR_setresuid32 208
-#define __NR_setresgid32 210
-#endif
-#ifndef __NR_setfsuid32
-#define __NR_setfsuid32 215
-#define __NR_setfsgid32 216
-#endif
#ifndef __NR_getdents64
#define __NR_getdents64 220
#endif
#ifndef __NR_gettid
#define __NR_gettid 224
#endif
-#ifndef __NR_readahead
-#define __NR_readahead 225
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr 226
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr 227
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr 229
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr 230
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr 232
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr 233
-#endif
#ifndef __NR_futex
#define __NR_futex 240
#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity 241
-#define __NR_sched_getaffinity 242
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address 258
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime 265
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres 266
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 268
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 269
-#endif
-#ifndef __NR_fadvise64_64
-#define __NR_fadvise64_64 272
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set 289
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get 290
-#endif
#ifndef __NR_openat
#define __NR_openat 295
#endif
-#ifndef __NR_fstatat64
-#define __NR_fstatat64 300
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat 301
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages 317
-#endif
#ifndef __NR_getcpu
#define __NR_getcpu 318
#endif
-#ifndef __NR_fallocate
-#define __NR_fallocate 324
-#endif
/* End of i386 definitions */
#elif defined(__arm__)
#ifndef __syscall
#if defined(__thumb__) || defined(__ARM_EABI__)
-#define __SYS_REG(name) register long __sysreg __asm__("r7") = __NR_##name;
-#define __SYS_REG_LIST(regs...) "r" (__sysreg) , ##regs
+#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name;
+#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs
#define __syscall(name) "swi\t0"
+#define __syscall_safe(name) \
+ "push {r7}\n" \
+ "mov r7,%[sysreg]\n" \
+ __syscall(name)"\n" \
+ "pop {r7}"
#else
#define __SYS_REG(name)
#define __SYS_REG_LIST(regs...) regs
#define __syscall(name) "swi\t" __sys1(__NR_##name) ""
+#define __syscall_safe(name) __syscall(name)
#endif
#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_SYSCALL_BASE + 164)
-#define __NR_setresgid (__NR_SYSCALL_BASE + 170)
-#endif
#ifndef __NR_rt_sigaction
#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174)
#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175)
-#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176)
-#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179)
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 (__NR_SYSCALL_BASE + 180)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181)
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191)
#endif
#ifndef __NR_stat64
#define __NR_stat64 (__NR_SYSCALL_BASE + 195)
@@ -722,169 +556,35 @@ struct kernel_statfs {
#ifndef __NR_fstat64
#define __NR_fstat64 (__NR_SYSCALL_BASE + 197)
#endif
-#ifndef __NR_setresuid32
-#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208)
-#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210)
-#endif
-#ifndef __NR_setfsuid32
-#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215)
-#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216)
-#endif
#ifndef __NR_getdents64
#define __NR_getdents64 (__NR_SYSCALL_BASE + 217)
#endif
#ifndef __NR_gettid
#define __NR_gettid (__NR_SYSCALL_BASE + 224)
#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_SYSCALL_BASE + 225)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_SYSCALL_BASE + 226)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_SYSCALL_BASE + 229)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_SYSCALL_BASE + 232)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_SYSCALL_BASE + 233)
-#endif
#ifndef __NR_futex
#define __NR_futex (__NR_SYSCALL_BASE + 240)
#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241)
-#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_SYSCALL_BASE + 264)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 (__NR_SYSCALL_BASE + 266)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267)
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_SYSCALL_BASE + 344)
-#endif
/* End of ARM definitions */
#elif defined(__x86_64__)
-#ifndef __NR_pread64
-#define __NR_pread64 17
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 18
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid 117
-#define __NR_setresgid 119
-#endif
#ifndef __NR_gettid
#define __NR_gettid 186
#endif
-#ifndef __NR_readahead
-#define __NR_readahead 187
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr 188
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr 189
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr 191
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr 192
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr 194
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr 195
-#endif
#ifndef __NR_futex
#define __NR_futex 202
#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity 203
-#define __NR_sched_getaffinity 204
-#endif
#ifndef __NR_getdents64
#define __NR_getdents64 217
#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address 218
-#endif
-#ifndef __NR_fadvise64
-#define __NR_fadvise64 221
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime 228
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres 229
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set 251
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get 252
-#endif
#ifndef __NR_openat
#define __NR_openat 257
#endif
-#ifndef __NR_newfstatat
-#define __NR_newfstatat 262
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat 263
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages 279
-#endif
-#ifndef __NR_fallocate
-#define __NR_fallocate 285
-#endif
/* End of x86-64 definitions */
#elif defined(__mips__)
#if _MIPS_SIM == _MIPS_SIM_ABI32
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_Linux + 185)
-#define __NR_setresgid (__NR_Linux + 190)
-#endif
#ifndef __NR_rt_sigaction
#define __NR_rt_sigaction (__NR_Linux + 194)
#define __NR_rt_sigprocmask (__NR_Linux + 195)
-#define __NR_rt_sigpending (__NR_Linux + 196)
-#define __NR_rt_sigsuspend (__NR_Linux + 199)
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 (__NR_Linux + 200)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 (__NR_Linux + 201)
#endif
#ifndef __NR_stat64
#define __NR_stat64 (__NR_Linux + 213)
@@ -898,245 +598,59 @@ struct kernel_statfs {
#ifndef __NR_gettid
#define __NR_gettid (__NR_Linux + 222)
#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_Linux + 223)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_Linux + 224)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_Linux + 225)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_Linux + 227)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_Linux + 228)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_Linux + 230)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_Linux + 231)
-#endif
#ifndef __NR_futex
#define __NR_futex (__NR_Linux + 238)
#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_Linux + 239)
-#define __NR_sched_getaffinity (__NR_Linux + 240)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_Linux + 252)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 (__NR_Linux + 255)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 (__NR_Linux + 256)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_Linux + 263)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_Linux + 264)
-#endif
#ifndef __NR_openat
#define __NR_openat (__NR_Linux + 288)
#endif
#ifndef __NR_fstatat
#define __NR_fstatat (__NR_Linux + 293)
#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat (__NR_Linux + 294)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_Linux + 308)
-#endif
#ifndef __NR_getcpu
#define __NR_getcpu (__NR_Linux + 312)
#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_Linux + 314)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_Linux + 315)
-#endif
/* End of MIPS (old 32bit API) definitions */
#elif _MIPS_SIM == _MIPS_SIM_ABI64
-#ifndef __NR_pread64
-#define __NR_pread64 (__NR_Linux + 16)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 (__NR_Linux + 17)
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_Linux + 115)
-#define __NR_setresgid (__NR_Linux + 117)
-#endif
#ifndef __NR_gettid
#define __NR_gettid (__NR_Linux + 178)
#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_Linux + 179)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_Linux + 180)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_Linux + 181)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_Linux + 183)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_Linux + 184)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_Linux + 186)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_Linux + 187)
-#endif
#ifndef __NR_futex
#define __NR_futex (__NR_Linux + 194)
#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_Linux + 195)
-#define __NR_sched_getaffinity (__NR_Linux + 196)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_Linux + 212)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_Linux + 222)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_Linux + 223)
-#endif
#ifndef __NR_openat
#define __NR_openat (__NR_Linux + 247)
#endif
#ifndef __NR_fstatat
#define __NR_fstatat (__NR_Linux + 252)
#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat (__NR_Linux + 253)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_Linux + 267)
-#endif
#ifndef __NR_getcpu
#define __NR_getcpu (__NR_Linux + 271)
#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_Linux + 273)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_Linux + 274)
-#endif
/* End of MIPS (64bit API) definitions */
#else
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_Linux + 115)
-#define __NR_setresgid (__NR_Linux + 117)
-#endif
#ifndef __NR_gettid
#define __NR_gettid (__NR_Linux + 178)
#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_Linux + 179)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_Linux + 180)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_Linux + 181)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_Linux + 183)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_Linux + 184)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_Linux + 186)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_Linux + 187)
-#endif
#ifndef __NR_futex
#define __NR_futex (__NR_Linux + 194)
#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_Linux + 195)
-#define __NR_sched_getaffinity (__NR_Linux + 196)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_Linux + 213)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 (__NR_Linux + 217)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 (__NR_Linux + 218)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_Linux + 226)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_Linux + 227)
-#endif
#ifndef __NR_openat
#define __NR_openat (__NR_Linux + 251)
#endif
#ifndef __NR_fstatat
#define __NR_fstatat (__NR_Linux + 256)
#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat (__NR_Linux + 257)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_Linux + 271)
-#endif
#ifndef __NR_getcpu
#define __NR_getcpu (__NR_Linux + 275)
#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_Linux + 277)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_Linux + 278)
-#endif
/* End of MIPS (new 32bit API) definitions */
#endif
/* End of MIPS definitions */
#elif defined(__PPC__)
-#ifndef __NR_setfsuid
-#define __NR_setfsuid 138
-#define __NR_setfsgid 139
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid 164
-#define __NR_setresgid 169
-#endif
#ifndef __NR_rt_sigaction
#define __NR_rt_sigaction 173
#define __NR_rt_sigprocmask 174
-#define __NR_rt_sigpending 175
-#define __NR_rt_sigsuspend 178
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 179
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 180
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit 190
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead 191
#endif
#ifndef __NR_stat64
#define __NR_stat64 195
@@ -1150,67 +664,12 @@ struct kernel_statfs {
#ifndef __NR_gettid
#define __NR_gettid 207
#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr 209
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr 210
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr 212
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr 213
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr 215
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr 216
-#endif
#ifndef __NR_futex
#define __NR_futex 221
#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity 222
-#define __NR_sched_getaffinity 223
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address 232
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime 246
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres 247
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 252
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 253
-#endif
-#ifndef __NR_fadvise64_64
-#define __NR_fadvise64_64 254
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set 273
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get 274
-#endif
#ifndef __NR_openat
#define __NR_openat 286
#endif
-#ifndef __NR_fstatat64
-#define __NR_fstatat64 291
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat 292
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages 301
-#endif
#ifndef __NR_getcpu
#define __NR_getcpu 302
#endif
@@ -1315,6 +774,15 @@ struct kernel_statfs {
} while (0)
#endif
#if defined(__i386__)
+ #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404)
+ /* This only works for GCC-4.4 and above -- the first version to use
+ .cfi directives for dwarf unwind info. */
+ #define CFI_ADJUST_CFA_OFFSET(adjust) \
+ ".cfi_adjust_cfa_offset " #adjust "\n"
+ #else
+ #define CFI_ADJUST_CFA_OFFSET(adjust) /**/
+ #endif
+
/* In PIC mode (e.g. when building shared libraries), gcc for i386
* reserves ebx. Unfortunately, most distribution ship with implementations
* of _syscallX() which clobber ebx.
@@ -1327,11 +795,13 @@ struct kernel_statfs {
#define LSS_BODY(type,args...) \
long __res; \
__asm__ __volatile__("push %%ebx\n" \
+ CFI_ADJUST_CFA_OFFSET(4) \
"movl %2,%%ebx\n" \
"int $0x80\n" \
- "pop %%ebx" \
+ "pop %%ebx\n" \
+ CFI_ADJUST_CFA_OFFSET(-4) \
args \
- : "memory"); \
+ : "esp", "memory"); \
LSS_RETURN(type,__res)
#undef _syscall0
#define _syscall0(type,name) \
@@ -1388,7 +858,7 @@ struct kernel_statfs {
: "i" (__NR_##name), "ri" ((long)(arg1)), \
"c" ((long)(arg2)), "d" ((long)(arg3)), \
"S" ((long)(arg4)), "D" ((long)(arg5)) \
- : "memory"); \
+ : "esp", "memory"); \
LSS_RETURN(type,__res); \
}
#undef _syscall6
@@ -1410,7 +880,7 @@ struct kernel_statfs {
: "i" (__NR_##name), "0" ((long)(&__s)), \
"c" ((long)(arg2)), "d" ((long)(arg3)), \
"S" ((long)(arg4)), "D" ((long)(arg5)) \
- : "memory"); \
+ : "esp", "memory"); \
LSS_RETURN(type,__res); \
}
LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
@@ -1496,36 +966,10 @@ struct kernel_statfs {
: "0"(-EINVAL), "i"(__NR_clone),
"m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
"m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
- : "memory", "ecx", "edx", "esi", "edi");
+ : "esp", "memory", "ecx", "edx", "esi", "edi");
LSS_RETURN(int, __res);
}
- #define __NR__fadvise64_64 __NR_fadvise64_64
- LSS_INLINE _syscall6(int, _fadvise64_64, int, fd,
- unsigned, offset_lo, unsigned, offset_hi,
- unsigned, len_lo, unsigned, len_hi,
- int, advice)
-
- LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
- loff_t len, int advice) {
- return LSS_NAME(_fadvise64_64)(fd,
- (unsigned)offset, (unsigned)(offset >>32),
- (unsigned)len, (unsigned)(len >> 32),
- advice);
- }
-
- #define __NR__fallocate __NR_fallocate
- LSS_INLINE _syscall6(int, _fallocate, int, fd,
- int, mode,
- unsigned, offset_lo, unsigned, offset_hi,
- unsigned, len_lo, unsigned, len_hi)
-
- LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode,
- loff_t offset, loff_t len) {
- union { loff_t off; unsigned w[2]; } o = { offset }, l = { len };
- return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]);
- }
-
LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
/* On i386, the kernel does not know how to return from a signal
* handler. Instead, it relies on user space to provide a
@@ -1604,7 +1048,7 @@ struct kernel_statfs {
__asm__ __volatile__("movq %5,%%r10; syscall" : \
"=a" (__res) : "0" (__NR_##name), \
"D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "g" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \
+ "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \
LSS_RETURN(type, __res); \
}
#undef _syscall5
@@ -1616,7 +1060,7 @@ struct kernel_statfs {
__asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \
"=a" (__res) : "0" (__NR_##name), \
"D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "g" ((long)(arg4)), "g" ((long)(arg5)) : \
+ "r" ((long)(arg4)), "r" ((long)(arg5)) : \
"r8", "r10", "r11", "rcx", "memory"); \
LSS_RETURN(type, __res); \
}
@@ -1630,7 +1074,7 @@ struct kernel_statfs {
"syscall" : \
"=a" (__res) : "0" (__NR_##name), \
"D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "g" ((long)(arg4)), "g" ((long)(arg5)), "g" ((long)(arg6)) : \
+ "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \
"r8", "r9", "r10", "r11", "rcx", "memory"); \
LSS_RETURN(type, __res); \
}
@@ -1639,8 +1083,6 @@ struct kernel_statfs {
void *newtls, int *child_tidptr) {
long __res;
{
- register void *__tls __asm__("r8") = newtls;
- register int *__ctid __asm__("r10") = child_tidptr;
__asm__ __volatile__(/* if (fn == NULL)
* return -EINVAL;
*/
@@ -1653,8 +1095,10 @@ struct kernel_statfs {
"testq %5,%5\n"
"jz 1f\n"
- /* childstack -= 2*sizeof(void *);
+ /* Set up alignment of the child stack:
+ * child_stack = (child_stack & ~0xF) - 16;
*/
+ "andq $-16,%5\n"
"subq $16,%5\n"
/* Push "arg" and "fn" onto the stack that will be
@@ -1671,6 +1115,8 @@ struct kernel_statfs {
* %r10 = child_tidptr)
*/
"movq %2,%%rax\n"
+ "movq %9,%%r8\n"
+ "movq %10,%%r10\n"
"syscall\n"
/* if (%rax != 0)
@@ -1701,13 +1147,11 @@ struct kernel_statfs {
: "=a" (__res)
: "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
"r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
- "d"(parent_tidptr), "r"(__tls), "r"(__ctid)
- : "memory", "r11", "rcx");
+ "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
+ : "rsp", "memory", "r8", "r10", "r11", "rcx");
}
LSS_RETURN(int, __res);
}
- LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len,
- int, advice)
LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
/* On x86-64, the kernel does not know how to return from
@@ -1735,12 +1179,26 @@ struct kernel_statfs {
*/
#undef LSS_REG
#define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
+
+ /* r0..r3 are scratch registers and not preserved across function
+ * calls. We need to first evaluate the first 4 syscall arguments
+ * and store them on stack. They must be loaded into r0..r3 after
+ * all function calls to avoid r0..r3 being clobbered.
+ */
+ #undef LSS_SAVE_ARG
+ #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a
+ #undef LSS_LOAD_ARG
+ #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r
+
#undef LSS_BODY
- #define LSS_BODY(type,name,args...) \
+ #define LSS_BODY(type, name, args...) \
register long __res_r0 __asm__("r0"); \
long __res; \
- __asm__ __volatile__ (__syscall(name) \
- : "=r"(__res_r0) : args : "lr", "memory"); \
+ __SYS_REG(name) \
+ __asm__ __volatile__ (__syscall_safe(name) \
+ : "=r"(__res_r0) \
+ : __SYS_REG_LIST(args) \
+ : "lr", "memory"); \
__res = __res_r0; \
LSS_RETURN(type, __res)
#undef _syscall0
@@ -1751,77 +1209,126 @@ struct kernel_statfs {
#undef _syscall1
#define _syscall1(type, name, type1, arg1) \
type LSS_NAME(name)(type1 arg1) { \
- LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \
+ /* There is no need for using a volatile temp. */ \
+ LSS_REG(0, arg1); \
+ LSS_BODY(type, name, "r"(__r0)); \
}
#undef _syscall2
#define _syscall2(type, name, type1, arg1, type2, arg2) \
type LSS_NAME(name)(type1 arg1, type2 arg2) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); \
+ LSS_SAVE_ARG(0, arg1); \
+ LSS_SAVE_ARG(1, arg2); \
+ LSS_LOAD_ARG(0); \
+ LSS_LOAD_ARG(1); \
LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \
}
#undef _syscall3
#define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
+ LSS_SAVE_ARG(0, arg1); \
+ LSS_SAVE_ARG(1, arg2); \
+ LSS_SAVE_ARG(2, arg3); \
+ LSS_LOAD_ARG(0); \
+ LSS_LOAD_ARG(1); \
+ LSS_LOAD_ARG(2); \
LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \
}
#undef _syscall4
- #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+ #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
- LSS_REG(3, arg4); \
+ LSS_SAVE_ARG(0, arg1); \
+ LSS_SAVE_ARG(1, arg2); \
+ LSS_SAVE_ARG(2, arg3); \
+ LSS_SAVE_ARG(3, arg4); \
+ LSS_LOAD_ARG(0); \
+ LSS_LOAD_ARG(1); \
+ LSS_LOAD_ARG(2); \
+ LSS_LOAD_ARG(3); \
LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \
}
#undef _syscall5
- #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5) \
+ #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
type5 arg5) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
- LSS_REG(3, arg4); LSS_REG(4, arg5); \
+ LSS_SAVE_ARG(0, arg1); \
+ LSS_SAVE_ARG(1, arg2); \
+ LSS_SAVE_ARG(2, arg3); \
+ LSS_SAVE_ARG(3, arg4); \
+ LSS_REG(4, arg5); \
+ LSS_LOAD_ARG(0); \
+ LSS_LOAD_ARG(1); \
+ LSS_LOAD_ARG(2); \
+ LSS_LOAD_ARG(3); \
LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \
"r"(__r4)); \
}
#undef _syscall6
- #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5,type6,arg6) \
+ #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5, type6, arg6) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
type5 arg5, type6 arg6) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
- LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \
+ LSS_SAVE_ARG(0, arg1); \
+ LSS_SAVE_ARG(1, arg2); \
+ LSS_SAVE_ARG(2, arg3); \
+ LSS_SAVE_ARG(3, arg4); \
+ LSS_REG(4, arg5); \
+ LSS_REG(5, arg6); \
+ LSS_LOAD_ARG(0); \
+ LSS_LOAD_ARG(1); \
+ LSS_LOAD_ARG(2); \
+ LSS_LOAD_ARG(3); \
LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \
"r"(__r4), "r"(__r5)); \
}
LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
int flags, void *arg, int *parent_tidptr,
void *newtls, int *child_tidptr) {
- long __res;
+ register long __res __asm__("r5");
{
- register int __flags __asm__("r0") = flags;
- register void *__stack __asm__("r1") = child_stack;
- register void *__ptid __asm__("r2") = parent_tidptr;
- register void *__tls __asm__("r3") = newtls;
- register int *__ctid __asm__("r4") = child_tidptr;
- __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
- * return -EINVAL;
- */
- "cmp %2,#0\n"
- "cmpne %3,#0\n"
- "moveq %0,%1\n"
- "beq 1f\n"
+ if (fn == NULL || child_stack == NULL) {
+ __res = -EINVAL;
+ goto clone_exit;
+ }
- /* Push "arg" and "fn" onto the stack that will be
- * used by the child.
- */
- "str %5,[%3,#-4]!\n"
- "str %2,[%3,#-4]!\n"
+ /* stash first 4 arguments on stack first because we can only load
+ * them after all function calls.
+ */
+ int tmp_flags = flags;
+ int * tmp_stack = (int*) child_stack;
+ void * tmp_ptid = parent_tidptr;
+ void * tmp_tls = newtls;
+
+ register int *__ctid __asm__("r4") = child_tidptr;
- /* %r0 = syscall(%r0 = flags,
+ /* Push "arg" and "fn" onto the stack that will be
+ * used by the child.
+ */
+ *(--tmp_stack) = (int) arg;
+ *(--tmp_stack) = (int) fn;
+
+ /* We must load r0..r3 last after all possible function calls. */
+ register int __flags __asm__("r0") = tmp_flags;
+ register void *__stack __asm__("r1") = tmp_stack;
+ register void *__ptid __asm__("r2") = tmp_ptid;
+ register void *__tls __asm__("r3") = tmp_tls;
+
+ /* %r0 = syscall(%r0 = flags,
+ * %r1 = child_stack,
+ * %r2 = parent_tidptr,
+ * %r3 = newtls,
+ * %r4 = child_tidptr)
+ */
+ __SYS_REG(clone)
+ __asm__ __volatile__(/* %r0 = syscall(%r0 = flags,
* %r1 = child_stack,
* %r2 = parent_tidptr,
* %r3 = newtls,
* %r4 = child_tidptr)
*/
+ "push {r7}\n"
+ "mov r7,%1\n"
__syscall(clone)"\n"
/* if (%r0 != 0)
@@ -1836,16 +1343,24 @@ struct kernel_statfs {
"mov lr,pc\n"
"ldr pc,[sp]\n"
- /* Call _exit(%r0).
+ /* Call _exit(%r0), which never returns. We only
+ * need to set r7 for EABI syscall ABI but we do
+ * this always to simplify code sharing between
+ * old and new syscall ABIs.
*/
+ "mov r7,%2\n"
__syscall(exit)"\n"
- "1:\n"
+
+ /* Pop r7 from the stack only in the parent.
+ */
+ "1: pop {r7}\n"
: "=r" (__res)
- : "i"(-EINVAL),
- "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+ : "r"(__sysreg),
+ "i"(__NR_exit), "r"(__stack), "r"(__flags),
"r"(__ptid), "r"(__tls), "r"(__ctid)
- : "lr", "memory");
+ : "cc", "lr", "memory");
}
+ clone_exit:
LSS_RETURN(int, __res);
}
#elif defined(__mips__)
@@ -2263,25 +1778,12 @@ struct kernel_statfs {
#define __NR__exit __NR_exit
#define __NR__gettid __NR_gettid
#define __NR__mremap __NR_mremap
- LSS_INLINE _syscall1(int, chdir, const char *,p)
LSS_INLINE _syscall1(int, close, int, f)
- LSS_INLINE _syscall2(int, clock_getres, int, c,
- struct kernel_timespec*, t)
- LSS_INLINE _syscall2(int, clock_gettime, int, c,
- struct kernel_timespec*, t)
- LSS_INLINE _syscall1(int, dup, int, f)
- LSS_INLINE _syscall2(int, dup2, int, s,
- int, d)
- LSS_INLINE _syscall3(int, execve, const char*, f,
- const char*const*,a,const char*const*, e)
LSS_INLINE _syscall1(int, _exit, int, e)
LSS_INLINE _syscall3(int, fcntl, int, f,
int, c, long, a)
- LSS_INLINE _syscall0(pid_t, fork)
LSS_INLINE _syscall2(int, fstat, int, f,
struct kernel_stat*, b)
- LSS_INLINE _syscall2(int, fstatfs, int, f,
- struct kernel_statfs*, b)
LSS_INLINE _syscall4(int, futex, int*, a,
int, o, int, v,
struct kernel_timespec*, t)
@@ -2291,147 +1793,53 @@ struct kernel_statfs {
LSS_INLINE _syscall3(int, getdents64, int, f,
struct kernel_dirent64*, d, int, c)
#endif
- LSS_INLINE _syscall0(gid_t, getegid)
- LSS_INLINE _syscall0(uid_t, geteuid)
- LSS_INLINE _syscall0(pid_t, getpgrp)
LSS_INLINE _syscall0(pid_t, getpid)
LSS_INLINE _syscall0(pid_t, getppid)
- LSS_INLINE _syscall2(int, getpriority, int, a,
- int, b)
- LSS_INLINE _syscall2(int, getrlimit, int, r,
- struct kernel_rlimit*, l)
- LSS_INLINE _syscall1(pid_t, getsid, pid_t, p)
LSS_INLINE _syscall0(pid_t, _gettid)
- LSS_INLINE _syscall5(int, setxattr, const char *,p,
- const char *, n, const void *,v,
- size_t, s, int, f)
- LSS_INLINE _syscall5(int, lsetxattr, const char *,p,
- const char *, n, const void *,v,
- size_t, s, int, f)
- LSS_INLINE _syscall4(ssize_t, getxattr, const char *,p,
- const char *, n, void *, v, size_t, s)
- LSS_INLINE _syscall4(ssize_t, lgetxattr, const char *,p,
- const char *, n, void *, v, size_t, s)
- LSS_INLINE _syscall3(ssize_t, listxattr, const char *,p,
- char *, l, size_t, s)
- LSS_INLINE _syscall3(ssize_t, llistxattr, const char *,p,
- char *, l, size_t, s)
- LSS_INLINE _syscall2(int, ioprio_get, int, which,
- int, who)
- LSS_INLINE _syscall3(int, ioprio_set, int, which,
- int, who, int, ioprio)
LSS_INLINE _syscall2(int, kill, pid_t, p,
int, s)
LSS_INLINE _syscall3(off_t, lseek, int, f,
off_t, o, int, w)
LSS_INLINE _syscall2(int, munmap, void*, s,
size_t, l)
- LSS_INLINE _syscall6(long, move_pages, pid_t, p,
- unsigned long, n, void **,g, int *, d,
- int *, s, int, f)
LSS_INLINE _syscall5(void*, _mremap, void*, o,
size_t, os, size_t, ns,
unsigned long, f, void *, a)
LSS_INLINE _syscall3(int, open, const char*, p,
int, f, int, m)
- LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u,
- unsigned int, n, int, t)
LSS_INLINE _syscall2(int, prctl, int, o,
long, a)
LSS_INLINE _syscall4(long, ptrace, int, r,
pid_t, p, void *, a, void *, d)
LSS_INLINE _syscall3(ssize_t, read, int, f,
void *, b, size_t, c)
- LSS_INLINE _syscall3(int, readlink, const char*, p,
- char*, b, size_t, s)
LSS_INLINE _syscall4(int, rt_sigaction, int, s,
const struct kernel_sigaction*, a,
struct kernel_sigaction*, o, size_t, c)
- LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s,
- size_t, c)
LSS_INLINE _syscall4(int, rt_sigprocmask, int, h,
const struct kernel_sigset_t*, s,
struct kernel_sigset_t*, o, size_t, c);
- LSS_INLINE _syscall2(int, rt_sigsuspend,
- const struct kernel_sigset_t*, s, size_t, c);
- LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p,
- unsigned int, l, unsigned long *, m)
- LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p,
- unsigned int, l, unsigned long *, m)
LSS_INLINE _syscall0(int, sched_yield)
- LSS_INLINE _syscall1(long, set_tid_address, int *, t)
- LSS_INLINE _syscall1(int, setfsgid, gid_t, g)
- LSS_INLINE _syscall1(int, setfsuid, uid_t, u)
- LSS_INLINE _syscall1(int, setuid, uid_t, u)
- LSS_INLINE _syscall1(int, setgid, gid_t, g)
- LSS_INLINE _syscall2(int, setpgid, pid_t, p,
- pid_t, g)
- LSS_INLINE _syscall3(int, setpriority, int, a,
- int, b, int, p)
- LSS_INLINE _syscall3(int, setresgid, gid_t, r,
- gid_t, e, gid_t, s)
- LSS_INLINE _syscall3(int, setresuid, uid_t, r,
- uid_t, e, uid_t, s)
- LSS_INLINE _syscall2(int, setrlimit, int, r,
- const struct kernel_rlimit*, l)
- LSS_INLINE _syscall0(pid_t, setsid)
LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s,
const stack_t*, o)
LSS_INLINE _syscall2(int, stat, const char*, f,
struct kernel_stat*, b)
- LSS_INLINE _syscall2(int, statfs, const char*, f,
- struct kernel_statfs*, b)
LSS_INLINE _syscall3(ssize_t, write, int, f,
const void *, b, size_t, c)
- LSS_INLINE _syscall3(ssize_t, writev, int, f,
- const struct kernel_iovec*, v, size_t, c)
#if defined(__NR_getcpu)
LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
unsigned *, node, void *, unused);
#endif
#if defined(__x86_64__) || \
(defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
- LSS_INLINE _syscall3(int, recvmsg, int, s,
- struct kernel_msghdr*, m, int, f)
- LSS_INLINE _syscall3(int, sendmsg, int, s,
- const struct kernel_msghdr*, m, int, f)
- LSS_INLINE _syscall6(int, sendto, int, s,
- const void*, m, size_t, l,
- int, f,
- const struct kernel_sockaddr*, a, int, t)
- LSS_INLINE _syscall2(int, shutdown, int, s,
- int, h)
LSS_INLINE _syscall3(int, socket, int, d,
int, t, int, p)
- LSS_INLINE _syscall4(int, socketpair, int, d,
- int, t, int, p, int*, s)
#endif
#if defined(__x86_64__)
- LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode,
- loff_t, offset, loff_t, len)
LSS_INLINE _syscall6(void*, mmap, void*, s,
size_t, l, int, p,
int, f, int, d,
__off64_t, o)
- LSS_INLINE _syscall4(int, newfstatat, int, d,
- const char *, p,
- struct kernel_stat*, b, int, f)
-
- LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
- return LSS_NAME(setfsgid)(gid);
- }
-
- LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
- return LSS_NAME(setfsuid)(uid);
- }
-
- LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
- return LSS_NAME(setresgid)(rgid, egid, sgid);
- }
-
- LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
- return LSS_NAME(setresuid)(ruid, euid, suid);
- }
LSS_INLINE int LSS_NAME(sigaction)(int signum,
const struct kernel_sigaction *act,
@@ -2453,117 +1861,35 @@ struct kernel_statfs {
}
}
- LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
- return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
- }
-
LSS_INLINE int LSS_NAME(sigprocmask)(int how,
const struct kernel_sigset_t *set,
struct kernel_sigset_t *oldset) {
return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
}
-
- LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
- return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
- }
#endif
#if defined(__x86_64__) || \
defined(__arm__) || \
(defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
LSS_INLINE _syscall4(pid_t, wait4, pid_t, p,
int*, s, int, o,
- struct kernel_rusage*, r)
-
+ struct kernel_rusage*, r)
LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
return LSS_NAME(wait4)(pid, status, options, 0);
}
- #endif
- #if defined(__i386__) || defined(__x86_64__)
+ #endif
+ #if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
- LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f)
- #endif
- #if defined(__i386__) || defined(__arm__)
- #define __NR__getresgid32 __NR_getresgid32
- #define __NR__getresuid32 __NR_getresuid32
- #define __NR__setfsgid32 __NR_setfsgid32
- #define __NR__setfsuid32 __NR_setfsuid32
- #define __NR__setresgid32 __NR_setresgid32
- #define __NR__setresuid32 __NR_setresuid32
- LSS_INLINE _syscall2(int, ugetrlimit, int, r,
- struct kernel_rlimit*, l)
- LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f)
- LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f)
- LSS_INLINE _syscall3(int, _setresgid32, gid_t, r,
- gid_t, e, gid_t, s)
- LSS_INLINE _syscall3(int, _setresuid32, uid_t, r,
- uid_t, e, uid_t, s)
-
- LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
- int rc;
- if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)gid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setfsgid)(gid);
- }
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
- int rc;
- if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)uid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setfsuid)(uid);
- }
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
- int rc;
- if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)rgid & ~0xFFFFu ||
- (unsigned int)egid & ~0xFFFFu ||
- (unsigned int)sgid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setresgid)(rgid, egid, sgid);
- }
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
- int rc;
- if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)ruid & ~0xFFFFu ||
- (unsigned int)euid & ~0xFFFFu ||
- (unsigned int)suid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setresuid)(ruid, euid, suid);
- }
- }
- return rc;
- }
#endif
LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
memset(&set->sig, 0, sizeof(set->sig));
return 0;
}
-
+
LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
memset(&set->sig, -1, sizeof(set->sig));
return 0;
}
-
+
LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
int signum) {
if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
@@ -2575,7 +1901,7 @@ struct kernel_statfs {
return 0;
}
}
-
+
LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
int signum) {
if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
@@ -2587,30 +1913,26 @@ struct kernel_statfs {
return 0;
}
}
-
- LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set,
- int signum) {
- if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
- LSS_ERRNO = EINVAL;
- return -1;
- } else {
- return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] &
- (1UL << ((signum - 1) % (8*sizeof(set->sig[0])))));
- }
- }
+
#if defined(__i386__) || \
defined(__arm__) || \
(defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__)
#define __NR__sigaction __NR_sigaction
- #define __NR__sigpending __NR_sigpending
#define __NR__sigprocmask __NR_sigprocmask
- #define __NR__sigsuspend __NR_sigsuspend
LSS_INLINE _syscall2(int, fstat64, int, f,
struct kernel_stat64 *, b)
LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo,
loff_t *, res, uint, wh)
+#ifdef __PPC64__
+ LSS_INLINE _syscall6(void*, mmap, void*, s,
+ size_t, l, int, p,
+ int, f, int, d,
+ off_t, o)
+#else
+ #ifndef __ARM_EABI__
+ /* Not available on ARM EABI Linux. */
LSS_INLINE _syscall1(void*, mmap, void*, a)
-#ifndef __PPC64__
+ #endif
LSS_INLINE _syscall6(void*, mmap2, void*, s,
size_t, l, int, p,
int, f, int, d,
@@ -2619,17 +1941,9 @@ struct kernel_statfs {
LSS_INLINE _syscall3(int, _sigaction, int, s,
const struct kernel_old_sigaction*, a,
struct kernel_old_sigaction*, o)
- LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s)
LSS_INLINE _syscall3(int, _sigprocmask, int, h,
const unsigned long*, s,
unsigned long*, o)
- #ifdef __PPC__
- LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s)
- #else
- LSS_INLINE _syscall3(int, _sigsuspend, const void*, a,
- int, b,
- unsigned long, s)
- #endif
LSS_INLINE _syscall2(int, stat64, const char *, p,
struct kernel_stat64 *, b)
@@ -2695,17 +2009,6 @@ struct kernel_statfs {
return rc;
}
- LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
- int old_errno = LSS_ERRNO;
- int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
- if (rc < 0 && LSS_ERRNO == ENOSYS) {
- LSS_ERRNO = old_errno;
- LSS_NAME(sigemptyset)(set);
- rc = LSS_NAME(_sigpending)(&set->sig[0]);
- }
- return rc;
- }
-
LSS_INLINE int LSS_NAME(sigprocmask)(int how,
const struct kernel_sigset_t *set,
struct kernel_sigset_t *oldset) {
@@ -2722,20 +2025,6 @@ struct kernel_statfs {
}
return rc;
}
-
- LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
- int olderrno = LSS_ERRNO;
- int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
- if (rc < 0 && LSS_ERRNO == ENOSYS) {
- LSS_ERRNO = olderrno;
- rc = LSS_NAME(_sigsuspend)(
- #ifndef __PPC__
- set, 0,
- #endif
- set->sig[0]);
- }
- return rc;
- }
#endif
#if defined(__PPC__)
#undef LSS_SC_LOADARGS_0
@@ -2792,93 +2081,20 @@ struct kernel_statfs {
} \
LSS_RETURN(type, __sc_ret, __sc_err)
- LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
- int flags){
- LSS_SC_BODY(3, ssize_t, 17, s, msg, flags);
- }
-
- LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
- const struct kernel_msghdr *msg,
- int flags) {
- LSS_SC_BODY(3, ssize_t, 16, s, msg, flags);
- }
-
- // TODO(csilvers): why is this ifdef'ed out?
-#if 0
- LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
- int flags,
- const struct kernel_sockaddr *to,
- unsigned int tolen) {
- LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen);
- }
-#endif
-
- LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
- LSS_SC_BODY(2, int, 13, s, how);
- }
-
LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
LSS_SC_BODY(3, int, 1, domain, type, protocol);
}
-
- LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
- int sv[2]) {
- LSS_SC_BODY(4, int, 8, d, type, protocol, sv);
- }
#endif
#if defined(__i386__) || \
- defined(__arm__) || \
+ (defined(__arm__) && !defined(__ARM_EABI__)) || \
(defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+
/* See sys_socketcall in net/socket.c in kernel source.
* It de-multiplexes on its first arg and unpacks the arglist
* array in its second arg.
*/
LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a)
- LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
- int flags){
- unsigned long args[3] = {
- (unsigned long) s,
- (unsigned long) msg,
- (unsigned long) flags
- };
- return (ssize_t) LSS_NAME(socketcall)(17, args);
- }
-
- LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
- const struct kernel_msghdr *msg,
- int flags) {
- unsigned long args[3] = {
- (unsigned long) s,
- (unsigned long) msg,
- (unsigned long) flags
- };
- return (ssize_t) LSS_NAME(socketcall)(16, args);
- }
-
- LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
- int flags,
- const struct kernel_sockaddr *to,
- unsigned int tolen) {
- unsigned long args[6] = {
- (unsigned long) s,
- (unsigned long) buf,
- (unsigned long) len,
- (unsigned long) flags,
- (unsigned long) to,
- (unsigned long) tolen
- };
- return (ssize_t) LSS_NAME(socketcall)(11, args);
- }
-
- LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
- unsigned long args[2] = {
- (unsigned long) s,
- (unsigned long) how
- };
- return LSS_NAME(socketcall)(13, args);
- }
-
LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
unsigned long args[3] = {
(unsigned long) domain,
@@ -2887,22 +2103,9 @@ struct kernel_statfs {
};
return LSS_NAME(socketcall)(1, args);
}
-
- LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
- int sv[2]) {
- unsigned long args[4] = {
- (unsigned long) d,
- (unsigned long) type,
- (unsigned long) protocol,
- (unsigned long) sv
- };
- return LSS_NAME(socketcall)(8, args);
- }
- #endif
- #if defined(__i386__) || defined(__PPC__)
- LSS_INLINE _syscall4(int, fstatat64, int, d,
- const char *, p,
- struct kernel_stat64 *, b, int, f)
+ #elif defined(__ARM_EABI__)
+ LSS_INLINE _syscall3(int, socket, int, d,
+ int, t, int, p)
#endif
#if defined(__i386__) || defined(__PPC__) || \
(defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
@@ -2934,29 +2137,6 @@ struct kernel_statfs {
#else
LSS_INLINE _syscall1(int, pipe, int *, p)
#endif
- /* TODO(csilvers): see if ppc can/should support this as well */
- #if defined(__i386__) || \
- defined(__arm__) || \
- (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
- #define __NR__statfs64 __NR_statfs64
- #define __NR__fstatfs64 __NR_fstatfs64
- LSS_INLINE _syscall3(int, _statfs64, const char*, p,
- size_t, s,struct kernel_statfs64*, b)
- LSS_INLINE _syscall3(int, _fstatfs64, int, f,
- size_t, s,struct kernel_statfs64*, b)
- LSS_INLINE int LSS_NAME(statfs64)(const char *p,
- struct kernel_statfs64 *b) {
- return LSS_NAME(_statfs64)(p, sizeof(*b), b);
- }
- LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) {
- return LSS_NAME(_fstatfs64)(f, sizeof(*b), b);
- }
- #endif
-
- LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) {
- extern char **environ;
- return LSS_NAME(execve)(path, argv, (const char *const *)environ);
- }
LSS_INLINE pid_t LSS_NAME(gettid)() {
pid_t tid = LSS_NAME(_gettid)();
@@ -2993,72 +2173,6 @@ struct kernel_statfs {
LSS_ERRNO = err;
return rc;
}
-
- LSS_INLINE int LSS_NAME(raise)(int sig) {
- return LSS_NAME(kill)(LSS_NAME(getpid)(), sig);
- }
-
- LSS_INLINE int LSS_NAME(setpgrp)() {
- return LSS_NAME(setpgid)(0, 0);
- }
-
- LSS_INLINE int LSS_NAME(sysconf)(int name) {
- extern int __getpagesize(void);
- switch (name) {
- case _SC_OPEN_MAX: {
- struct kernel_rlimit limit;
- return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0
- ? 8192 : limit.rlim_cur;
- }
- case _SC_PAGESIZE:
- return __getpagesize();
- default:
- errno = ENOSYS;
- return -1;
- }
- }
- #if defined(__x86_64__) || \
- (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64)
- LSS_INLINE _syscall4(ssize_t, pread64, int, f,
- void *, b, size_t, c,
- loff_t, o)
- LSS_INLINE _syscall4(ssize_t, pwrite64, int, f,
- const void *, b, size_t, c,
- loff_t, o)
- LSS_INLINE _syscall3(int, readahead, int, f,
- loff_t, o, unsigned, c)
- #else
- #define __NR__pread64 __NR_pread64
- #define __NR__pwrite64 __NR_pwrite64
- #define __NR__readahead __NR_readahead
- LSS_INLINE _syscall5(ssize_t, _pread64, int, f,
- void *, b, size_t, c, unsigned, o1,
- unsigned, o2)
- LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f,
- const void *, b, size_t, c, unsigned, o1,
- long, o2)
- LSS_INLINE _syscall4(int, _readahead, int, f,
- unsigned, o1, unsigned, o2, size_t, c);
- /* We force 64bit-wide parameters onto the stack, then access each
- * 32-bit component individually. This guarantees that we build the
- * correct parameters independent of the native byte-order of the
- * underlying architecture.
- */
- LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count,
- loff_t off) {
- union { loff_t off; unsigned arg[2]; } o = { off };
- return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]);
- }
- LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf,
- size_t count, loff_t off) {
- union { loff_t off; unsigned arg[2]; } o = { off };
- return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]);
- }
- LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) {
- union { loff_t off; unsigned arg[2]; } o = { off };
- return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len);
- }
- #endif
#endif
#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
diff --git a/src/base/stl_allocator.h b/src/base/stl_allocator.h
index 3152cf9..8276a83 100644
--- a/src/base/stl_allocator.h
+++ b/src/base/stl_allocator.h
@@ -87,6 +87,7 @@ class STL_Allocator {
size_type max_size() const { return size_t(-1) / sizeof(T); }
void construct(pointer p, const T& val) { ::new(p) T(val); }
+ void construct(pointer p) { ::new(p) T(); }
void destroy(pointer p) { p->~T(); }
// There's no state, so these allocators are always equal
diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc
index 5396743..2559158 100644
--- a/src/base/sysinfo.cc
+++ b/src/base/sysinfo.cc
@@ -86,12 +86,20 @@
// time, so prefer making the syscalls directly if we can.
#ifdef HAVE_SYS_SYSCALL_H
# include <sys/syscall.h>
+#endif
+#ifdef SYS_open // solaris 11, at least sometimes, only defines SYS_openat
# define safeopen(filename, mode) syscall(SYS_open, filename, mode)
-# define saferead(fd, buffer, size) syscall(SYS_read, fd, buffer, size)
-# define safeclose(fd) syscall(SYS_close, fd)
#else
# define safeopen(filename, mode) open(filename, mode)
+#endif
+#ifdef SYS_read
+# define saferead(fd, buffer, size) syscall(SYS_read, fd, buffer, size)
+#else
# define saferead(fd, buffer, size) read(fd, buffer, size)
+#endif
+#ifdef SYS_close
+# define safeclose(fd) syscall(SYS_close, fd)
+#else
# define safeclose(fd) close(fd)
#endif
diff --git a/src/debugallocation.cc b/src/debugallocation.cc
index d65b13e..8ecb351 100644
--- a/src/debugallocation.cc
+++ b/src/debugallocation.cc
@@ -742,7 +742,7 @@ class MallocBlock {
return FromRawPointer(const_cast<void*>(p));
}
- void Check(int type) {
+ void Check(int type) const {
alloc_map_lock_.Lock();
CheckLocked(type);
alloc_map_lock_.Unlock();
@@ -1015,17 +1015,17 @@ class DebugMallocImplementation : public TCMallocImplementation {
return result;
}
- virtual bool VerifyNewMemory(void* p) {
+ virtual bool VerifyNewMemory(const void* p) {
if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType);
return true;
}
- virtual bool VerifyArrayNewMemory(void* p) {
+ virtual bool VerifyArrayNewMemory(const void* p) {
if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType);
return true;
}
- virtual bool VerifyMallocMemory(void* p) {
+ virtual bool VerifyMallocMemory(const void* p) {
if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType);
return true;
}
@@ -1043,7 +1043,7 @@ class DebugMallocImplementation : public TCMallocImplementation {
return size;
}
- virtual size_t GetAllocatedSize(void* p) {
+ virtual size_t GetAllocatedSize(const void* p) {
if (p) {
RAW_CHECK(GetOwnership(p) != MallocExtension::kNotOwned,
"ptr not allocated by tcmalloc");
diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h
index 4b06b2d..5bee019 100644
--- a/src/google/malloc_extension.h
+++ b/src/google/malloc_extension.h
@@ -100,10 +100,9 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// See "verify_memory.h" to see what these routines do
virtual bool VerifyAllMemory();
- // TODO(csilvers): change these to const void*.
- virtual bool VerifyNewMemory(void* p);
- virtual bool VerifyArrayNewMemory(void* p);
- virtual bool VerifyMallocMemory(void* p);
+ virtual bool VerifyNewMemory(const void* p);
+ virtual bool VerifyArrayNewMemory(const void* p);
+ virtual bool VerifyMallocMemory(const void* p);
virtual bool MallocMemoryStats(int* blocks, size_t* total,
int histogram[kMallocHistogramSize]);
@@ -281,8 +280,7 @@ class PERFTOOLS_DLL_DECL MallocExtension {
// will return 0.)
// This is equivalent to malloc_size() in OS X, malloc_usable_size()
// in glibc, and _msize() for windows.
- // TODO(csilvers): change to const void*.
- virtual size_t GetAllocatedSize(void* p);
+ virtual size_t GetAllocatedSize(const void* p);
// Returns kOwned if this malloc implementation allocated the memory
// pointed to by p, or kNotOwned if some other malloc implementation
diff --git a/src/google/malloc_extension_c.h b/src/google/malloc_extension_c.h
index e3f7f79..72a0a7c 100644
--- a/src/google/malloc_extension_c.h
+++ b/src/google/malloc_extension_c.h
@@ -58,9 +58,9 @@ extern "C" {
#define kMallocExtensionHistogramSize 64
PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(void* p);
+PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(const void* p);
+PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(const void* p);
+PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(const void* p);
PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total,
int histogram[kMallocExtensionHistogramSize]);
PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length);
@@ -78,7 +78,7 @@ PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void);
PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes);
PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void);
PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(void* p);
+PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(const void* p);
/*
* NOTE: These enum values MUST be kept in sync with the version in
diff --git a/src/heap-checker.cc b/src/heap-checker.cc
index 1c1cf30..5cb582d 100644
--- a/src/heap-checker.cc
+++ b/src/heap-checker.cc
@@ -784,6 +784,8 @@ static void MakeDisabledLiveCallbackLocked(
}
}
+static const char kUnnamedProcSelfMapEntry[] = "UNNAMED";
+
// This function takes some fields from a /proc/self/maps line:
//
// start_address start address of a memory region.
@@ -801,7 +803,9 @@ static void RecordGlobalDataLocked(uintptr_t start_address,
RAW_DCHECK(heap_checker_lock.IsHeld(), "");
// Ignore non-writeable regions.
if (strchr(permissions, 'w') == NULL) return;
- if (filename == NULL || *filename == '\0') filename = "UNNAMED";
+ if (filename == NULL || *filename == '\0') {
+ filename = kUnnamedProcSelfMapEntry;
+ }
RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
filename, start_address, end_address);
(*library_live_objects)[filename].
@@ -1405,6 +1409,31 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
}
}
if (size < sizeof(void*)) continue;
+
+#ifdef NO_FRAME_POINTER
+ // Frame pointer omission requires us to use libunwind, which uses direct
+ // mmap and munmap system calls, and that needs special handling.
+ if (name2 == kUnnamedProcSelfMapEntry) {
+ static const uintptr_t page_mask = ~(getpagesize() - 1);
+ const uintptr_t addr = reinterpret_cast<uintptr_t>(object);
+ if ((addr & page_mask) == 0 && (size & page_mask) == 0) {
+ // This is an object we slurped from /proc/self/maps.
+ // It may or may not be readable at this point.
+ //
+ // In case all the above conditions made a mistake, and the object is
+ // not related to libunwind, we also verify that it's not readable
+ // before ignoring it.
+ if (msync(const_cast<char*>(object), size, MS_ASYNC) != 0) {
+ // Skip unreadable object, so we don't crash trying to sweep it.
+ RAW_VLOG(0, "Ignoring inaccessible object [%p, %p) "
+ "(msync error %d (%s))",
+ object, object + size, errno, strerror(errno));
+ continue;
+ }
+ }
+ }
+#endif
+
const char* const max_object = object + size - sizeof(void*);
while (object <= max_object) {
// potentially unaligned load:
@@ -1986,6 +2015,16 @@ void HeapLeakChecker_InternalInitStart() {
RAW_LOG(FATAL, "Unsupported heap_check flag: %s",
FLAGS_heap_check.c_str());
}
+ // FreeBSD doesn't seem to honor atexit execution order:
+ // http://code.google.com/p/gperftools/issues/detail?id=375
+ // Since heap-checking before destructors depends on atexit running
+ // at the right time, on FreeBSD we always check after, even in the
+ // less strict modes. This just means FreeBSD is always a bit
+ // stricter in its checking than other OSes.
+#ifdef __FreeBSD__
+ FLAGS_heap_check_after_destructors = true;
+#endif
+
{ SpinLockHolder l(&heap_checker_lock);
RAW_DCHECK(heap_checker_pid == getpid(), "");
heap_checker_on = true;
diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc
index e85b16e..1d79924 100644
--- a/src/heap-profile-table.cc
+++ b/src/heap-profile-table.cc
@@ -63,6 +63,7 @@
#include "symbolize.h"
#include <google/stacktrace.h>
#include <google/malloc_hook.h>
+#include "memory_region_map.h"
#include "base/commandlineflags.h"
#include "base/logging.h" // for the RawFD I/O commands
#include "base/sysinfo.h"
@@ -98,7 +99,8 @@ const char HeapProfileTable::kFileExt[] = ".heap";
//----------------------------------------------------------------------
-static const int kHashTableSize = 179999; // Size for table_.
+// Size for alloc_table_ and mmap_table_.
+static const int kHashTableSize = 179999;
/*static*/ const int HeapProfileTable::kMaxStackDepth;
//----------------------------------------------------------------------
@@ -122,38 +124,60 @@ static bool ByAllocatedSpace(HeapProfileTable::Stats* a,
HeapProfileTable::HeapProfileTable(Allocator alloc, DeAllocator dealloc)
: alloc_(alloc), dealloc_(dealloc) {
- // Make the table
- const int table_bytes = kHashTableSize * sizeof(*table_);
- table_ = reinterpret_cast<Bucket**>(alloc_(table_bytes));
- memset(table_, 0, table_bytes);
- // Make allocation map
- allocation_ =
- new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
- // init the rest:
+ // Initialize the overall profile stats.
memset(&total_, 0, sizeof(total_));
- num_buckets_ = 0;
+
+ // Make the malloc table.
+ const int alloc_table_bytes = kHashTableSize * sizeof(*alloc_table_);
+ alloc_table_ = reinterpret_cast<Bucket**>(alloc_(alloc_table_bytes));
+ memset(alloc_table_, 0, alloc_table_bytes);
+ num_alloc_buckets_ = 0;
+
+ // Initialize the mmap table.
+ mmap_table_ = NULL;
+ num_available_mmap_buckets_ = 0;
+
+ // Make malloc and mmap allocation maps.
+ alloc_address_map_ =
+ new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
+ mmap_address_map_ = NULL;
}
HeapProfileTable::~HeapProfileTable() {
- // free allocation map
- allocation_->~AllocationMap();
- dealloc_(allocation_);
- allocation_ = NULL;
- // free hash table
- for (int b = 0; b < kHashTableSize; b++) {
- for (Bucket* x = table_[b]; x != 0; /**/) {
- Bucket* b = x;
- x = x->next;
- dealloc_(b->stack);
- dealloc_(b);
+ DeallocateBucketTable(alloc_table_);
+ alloc_table_ = NULL;
+ DeallocateBucketTable(mmap_table_);
+ mmap_table_ = NULL;
+ DeallocateAllocationMap(alloc_address_map_);
+ alloc_address_map_ = NULL;
+ DeallocateAllocationMap(mmap_address_map_);
+ mmap_address_map_ = NULL;
+}
+
+void HeapProfileTable::DeallocateAllocationMap(AllocationMap* allocation) {
+ if (allocation != NULL) {
+ alloc_address_map_->~AllocationMap();
+ dealloc_(allocation);
+ }
+}
+
+void HeapProfileTable::DeallocateBucketTable(Bucket** table) {
+ if (table != NULL) {
+ for (int b = 0; b < kHashTableSize; b++) {
+ for (Bucket* x = table[b]; x != 0; /**/) {
+ Bucket* b = x;
+ x = x->next;
+ dealloc_(b->stack);
+ dealloc_(b);
+ }
}
+ dealloc_(table);
}
- dealloc_(table_);
- table_ = NULL;
}
-HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
- const void* const key[]) {
+HeapProfileTable::Bucket* HeapProfileTable::GetBucket(
+ int depth, const void* const key[], Bucket** table,
+ int* bucket_count) {
// Make hash-value
uintptr_t h = 0;
for (int i = 0; i < depth; i++) {
@@ -166,7 +190,7 @@ HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
// Lookup stack trace in table
unsigned int buck = ((unsigned int) h) % kHashTableSize;
- for (Bucket* b = table_[buck]; b != 0; b = b->next) {
+ for (Bucket* b = table[buck]; b != 0; b = b->next) {
if ((b->hash == h) &&
(b->depth == depth) &&
equal(key, key + depth, b->stack)) {
@@ -183,9 +207,11 @@ HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
b->hash = h;
b->depth = depth;
b->stack = kcopy;
- b->next = table_[buck];
- table_[buck] = b;
- num_buckets_++;
+ b->next = table[buck];
+ table[buck] = b;
+ if (bucket_count != NULL) {
+ ++(*bucket_count);
+ }
return b;
}
@@ -198,7 +224,8 @@ int HeapProfileTable::GetCallerStackTrace(
void HeapProfileTable::RecordAlloc(
const void* ptr, size_t bytes, int stack_depth,
const void* const call_stack[]) {
- Bucket* b = GetBucket(stack_depth, call_stack);
+ Bucket* b = GetBucket(stack_depth, call_stack, alloc_table_,
+ &num_alloc_buckets_);
b->allocs++;
b->alloc_size += bytes;
total_.allocs++;
@@ -207,12 +234,12 @@ void HeapProfileTable::RecordAlloc(
AllocValue v;
v.set_bucket(b); // also did set_live(false); set_ignore(false)
v.bytes = bytes;
- allocation_->Insert(ptr, v);
+ alloc_address_map_->Insert(ptr, v);
}
void HeapProfileTable::RecordFree(const void* ptr) {
AllocValue v;
- if (allocation_->FindAndRemove(ptr, &v)) {
+ if (alloc_address_map_->FindAndRemove(ptr, &v)) {
Bucket* b = v.bucket();
b->frees++;
b->free_size += v.bytes;
@@ -222,14 +249,14 @@ void HeapProfileTable::RecordFree(const void* ptr) {
}
bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const {
- const AllocValue* alloc_value = allocation_->Find(ptr);
+ const AllocValue* alloc_value = alloc_address_map_->Find(ptr);
if (alloc_value != NULL) *object_size = alloc_value->bytes;
return alloc_value != NULL;
}
bool HeapProfileTable::FindAllocDetails(const void* ptr,
AllocInfo* info) const {
- const AllocValue* alloc_value = allocation_->Find(ptr);
+ const AllocValue* alloc_value = alloc_address_map_->Find(ptr);
if (alloc_value != NULL) {
info->object_size = alloc_value->bytes;
info->call_stack = alloc_value->bucket()->stack;
@@ -243,13 +270,13 @@ bool HeapProfileTable::FindInsideAlloc(const void* ptr,
const void** object_ptr,
size_t* object_size) const {
const AllocValue* alloc_value =
- allocation_->FindInside(&AllocValueSize, max_size, ptr, object_ptr);
+ alloc_address_map_->FindInside(&AllocValueSize, max_size, ptr, object_ptr);
if (alloc_value != NULL) *object_size = alloc_value->bytes;
return alloc_value != NULL;
}
bool HeapProfileTable::MarkAsLive(const void* ptr) {
- AllocValue* alloc = allocation_->FindMutable(ptr);
+ AllocValue* alloc = alloc_address_map_->FindMutable(ptr);
if (alloc && !alloc->live()) {
alloc->set_live(true);
return true;
@@ -258,7 +285,7 @@ bool HeapProfileTable::MarkAsLive(const void* ptr) {
}
void HeapProfileTable::MarkAsIgnored(const void* ptr) {
- AllocValue* alloc = allocation_->FindMutable(ptr);
+ AllocValue* alloc = alloc_address_map_->FindMutable(ptr);
if (alloc) {
alloc->set_ignore(true);
}
@@ -299,27 +326,81 @@ int HeapProfileTable::UnparseBucket(const Bucket& b,
HeapProfileTable::Bucket**
HeapProfileTable::MakeSortedBucketList() const {
- Bucket** list =
- reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_));
+ Bucket** list = reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) *
+ (num_alloc_buckets_ + num_available_mmap_buckets_)));
+
+ RAW_DCHECK(mmap_table_ != NULL || num_available_mmap_buckets_ == 0, "");
int n = 0;
+
for (int b = 0; b < kHashTableSize; b++) {
- for (Bucket* x = table_[b]; x != 0; x = x->next) {
+ for (Bucket* x = alloc_table_[b]; x != 0; x = x->next) {
list[n++] = x;
}
}
- RAW_DCHECK(n == num_buckets_, "");
+ RAW_DCHECK(n == num_alloc_buckets_, "");
+
+ if (mmap_table_ != NULL) {
+ for (int b = 0; b < kHashTableSize; b++) {
+ for (Bucket* x = mmap_table_[b]; x != 0; x = x->next) {
+ list[n++] = x;
+ }
+ }
+ }
+ RAW_DCHECK(n == num_alloc_buckets_ + num_available_mmap_buckets_, "");
- sort(list, list + num_buckets_, ByAllocatedSpace);
+ sort(list, list + num_alloc_buckets_ + num_available_mmap_buckets_,
+ ByAllocatedSpace);
return list;
}
+void HeapProfileTable::RefreshMMapData() {
+ // Make the table
+ static const int mmap_table_bytes = kHashTableSize * sizeof(*mmap_table_);
+ if (mmap_table_ == NULL) {
+ mmap_table_ = reinterpret_cast<Bucket**>(alloc_(mmap_table_bytes));
+ memset(mmap_table_, 0, mmap_table_bytes);
+ }
+ num_available_mmap_buckets_ = 0;
+
+ ClearMMapData();
+ mmap_address_map_ =
+ new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
+
+ MemoryRegionMap::LockHolder l;
+ for (MemoryRegionMap::RegionIterator r =
+ MemoryRegionMap::BeginRegionLocked();
+ r != MemoryRegionMap::EndRegionLocked(); ++r) {
+ Bucket* b =
+ GetBucket(r->call_stack_depth, r->call_stack, mmap_table_, NULL);
+ if (b->alloc_size == 0) {
+ num_available_mmap_buckets_ += 1;
+ }
+ b->allocs += 1;
+ b->alloc_size += r->end_addr - r->start_addr;
+
+ AllocValue v;
+ v.set_bucket(b);
+ v.bytes = r->end_addr - r->start_addr;
+ mmap_address_map_->Insert(reinterpret_cast<const void*>(r->start_addr), v);
+ }
+}
+
+void HeapProfileTable::ClearMMapData() {
+ if (mmap_address_map_ != NULL) {
+ mmap_address_map_->Iterate(ZeroBucketCountsIterator, this);
+ mmap_address_map_->~AllocationMap();
+ dealloc_(mmap_address_map_);
+ mmap_address_map_ = NULL;
+ }
+}
+
void HeapProfileTable::IterateOrderedAllocContexts(
AllocContextIterator callback) const {
Bucket** list = MakeSortedBucketList();
AllocContextInfo info;
- for (int i = 0; i < num_buckets_; ++i) {
+ for (int i = 0; i < num_alloc_buckets_; ++i) {
*static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]);
info.stack_depth = list[i]->depth;
info.call_stack = list[i]->stack;
@@ -351,9 +432,14 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const {
memset(&stats, 0, sizeof(stats));
int bucket_length = snprintf(buf, size, "%s", kProfileHeader);
if (bucket_length < 0 || bucket_length >= size) return 0;
- bucket_length = UnparseBucket(total_, buf, bucket_length, size,
+ Bucket total_with_mmap(total_);
+ if (mmap_table_ != NULL) {
+ total_with_mmap.alloc_size += MemoryRegionMap::MapSize();
+ total_with_mmap.free_size += MemoryRegionMap::UnmapSize();
+ }
+ bucket_length = UnparseBucket(total_with_mmap, buf, bucket_length, size,
" heapprofile", &stats);
- for (int i = 0; i < num_buckets_; i++) {
+ for (int i = 0; i < num_alloc_buckets_; i++) {
bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "",
&stats);
}
@@ -388,6 +474,17 @@ void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v,
RawWrite(args.fd, buf, len);
}
+inline void HeapProfileTable::ZeroBucketCountsIterator(
+ const void* ptr, AllocValue* v, HeapProfileTable* heap_profile) {
+ Bucket* b = v->bucket();
+ if (b != NULL) {
+ b->allocs = 0;
+ b->alloc_size = 0;
+ b->free_size = 0;
+ b->frees = 0;
+ }
+}
+
// Callback from NonLiveSnapshot; adds entry to arg->dest
// if not the entry is not live and is not present in arg->base.
void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v,
@@ -452,7 +549,7 @@ void HeapProfileTable::CleanupOldProfiles(const char* prefix) {
HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() {
Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_);
- allocation_->Iterate(AddToSnapshot, s);
+ alloc_address_map_->Iterate(AddToSnapshot, s);
return s;
}
@@ -477,7 +574,7 @@ HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot(
AddNonLiveArgs args;
args.dest = s;
args.base = base;
- allocation_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args);
+ alloc_address_map_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args);
RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n",
int(s->total_.allocs - s->total_.frees),
int(s->total_.alloc_size - s->total_.free_size));
diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h
index 8ab1164..abd3184 100644
--- a/src/heap-profile-table.h
+++ b/src/heap-profile-table.h
@@ -138,7 +138,8 @@ class HeapProfileTable {
// are skipped in heap checking reports.
void MarkAsIgnored(const void* ptr);
- // Return current total (de)allocation statistics.
+ // Return current total (de)allocation statistics. It doesn't contain
+ // mmap'ed regions.
const Stats& total() const { return total_; }
// Allocation data iteration callback: gets passed object pointer and
@@ -148,7 +149,7 @@ class HeapProfileTable {
// Iterate over the allocation profile data calling "callback"
// for every allocation.
void IterateAllocs(AllocIterator callback) const {
- allocation_->Iterate(MapArgsAllocIterator, callback);
+ alloc_address_map_->Iterate(MapArgsAllocIterator, callback);
}
// Allocation context profile data iteration callback
@@ -186,6 +187,16 @@ class HeapProfileTable {
// Caller must call ReleaseSnapshot() on result when no longer needed.
Snapshot* NonLiveSnapshot(Snapshot* base);
+ // Refresh the internal mmap information from MemoryRegionMap. Results of
+ // FillOrderedProfile and IterateOrderedAllocContexts will contain mmap'ed
+ // memory regions as at calling RefreshMMapData.
+ void RefreshMMapData();
+
+ // Clear the internal mmap information. Results of FillOrderedProfile and
+ // IterateOrderedAllocContexts won't contain mmap'ed memory regions after
+ // calling ClearMMapData.
+ void ClearMMapData();
+
private:
// data types ----------------------------
@@ -263,9 +274,18 @@ class HeapProfileTable {
const char* extra,
Stats* profile_stats);
- // Get the bucket for the caller stack trace 'key' of depth 'depth'
- // creating the bucket if needed.
- Bucket* GetBucket(int depth, const void* const key[]);
+ // Deallocate a given allocation map.
+ void DeallocateAllocationMap(AllocationMap* allocation);
+
+ // Deallocate a given bucket table.
+ void DeallocateBucketTable(Bucket** table);
+
+ // Get the bucket for the caller stack trace 'key' of depth 'depth' from a
+ // bucket hash map 'table' creating the bucket if needed. '*bucket_count'
+ // is incremented both when 'bucket_count' is not NULL and when a new
+ // bucket object is created.
+ Bucket* GetBucket(int depth, const void* const key[], Bucket** table,
+ int* bucket_count);
// Helper for IterateAllocs to do callback signature conversion
// from AllocationMap::Iterate to AllocIterator.
@@ -285,9 +305,14 @@ class HeapProfileTable {
inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v,
const DumpArgs& args);
+ // Helper for filling size variables in buckets by zero.
+ inline static void ZeroBucketCountsIterator(
+ const void* ptr, AllocValue* v, HeapProfileTable* heap_profile);
+
// Helper for IterateOrderedAllocContexts and FillOrderedProfile.
- // Creates a sorted list of Buckets whose length is num_buckets_.
- // The caller is responsible for dellocating the returned list.
+ // Creates a sorted list of Buckets whose length is num_alloc_buckets_ +
+ // num_avaliable_mmap_buckets_.
+ // The caller is responsible for deallocating the returned list.
Bucket** MakeSortedBucketList() const;
// Helper for TakeSnapshot. Saves object to snapshot.
@@ -319,17 +344,25 @@ class HeapProfileTable {
// Overall profile stats; we use only the Stats part,
// but make it a Bucket to pass to UnparseBucket.
+ // It doesn't contain mmap'ed regions.
Bucket total_;
- // Bucket hash table.
+ // Bucket hash table for malloc.
// We hand-craft one instead of using one of the pre-written
// ones because we do not want to use malloc when operating on the table.
// It is only few lines of code, so no big deal.
- Bucket** table_;
- int num_buckets_;
-
- // Map of all currently allocated objects we know about.
- AllocationMap* allocation_;
+ Bucket** alloc_table_;
+ int num_alloc_buckets_;
+
+ // Bucket hash table for mmap.
+ // This table is filled with the information from MemoryRegionMap by calling
+ // RefreshMMapData.
+ Bucket** mmap_table_;
+ int num_available_mmap_buckets_;
+
+ // Map of all currently allocated objects and mapped regions we know about.
+ AllocationMap* alloc_address_map_;
+ AllocationMap* mmap_address_map_;
DISALLOW_COPY_AND_ASSIGN(HeapProfileTable);
};
diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc
index 32815b4..68b379d 100644
--- a/src/heap-profiler.cc
+++ b/src/heap-profiler.cc
@@ -175,29 +175,6 @@ static HeapProfileTable* heap_profile = NULL; // the heap profile table
// Profile generation
//----------------------------------------------------------------------
-enum AddOrRemove { ADD, REMOVE };
-
-// Add or remove all MMap-allocated regions to/from *heap_profile.
-// Assumes heap_lock is held.
-static void AddRemoveMMapDataLocked(AddOrRemove mode) {
- RAW_DCHECK(heap_lock.IsHeld(), "");
- if (!FLAGS_mmap_profile || !is_on) return;
- // MemoryRegionMap maintained all the data we need for all
- // mmap-like allocations, so we just use it here:
- MemoryRegionMap::LockHolder l;
- for (MemoryRegionMap::RegionIterator r = MemoryRegionMap::BeginRegionLocked();
- r != MemoryRegionMap::EndRegionLocked(); ++r) {
- if (mode == ADD) {
- heap_profile->RecordAlloc(
- reinterpret_cast<const void*>(r->start_addr),
- r->end_addr - r->start_addr,
- r->call_stack_depth, r->call_stack);
- } else {
- heap_profile->RecordFree(reinterpret_cast<void*>(r->start_addr));
- }
- }
-}
-
// Input must be a buffer of size at least 1MB.
static char* DoGetHeapProfileLocked(char* buf, int buflen) {
// We used to be smarter about estimating the required memory and
@@ -208,16 +185,13 @@ static char* DoGetHeapProfileLocked(char* buf, int buflen) {
RAW_DCHECK(heap_lock.IsHeld(), "");
int bytes_written = 0;
if (is_on) {
- HeapProfileTable::Stats const stats = heap_profile->total();
- (void)stats; // avoid an unused-variable warning in non-debug mode.
- AddRemoveMMapDataLocked(ADD);
+ if (FLAGS_mmap_profile) {
+ heap_profile->RefreshMMapData();
+ }
bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1);
- // FillOrderedProfile should not reduce the set of active mmap-ed regions,
- // hence MemoryRegionMap will let us remove everything we've added above:
- AddRemoveMMapDataLocked(REMOVE);
- RAW_DCHECK(stats.Equivalent(heap_profile->total()), "");
- // if this fails, we somehow removed by AddRemoveMMapDataLocked
- // more than we have added.
+ if (FLAGS_mmap_profile) {
+ heap_profile->ClearMMapData();
+ }
}
buf[bytes_written] = '\0';
RAW_DCHECK(bytes_written == strlen(buf), "");
diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc
index bf946e6..deed4cd 100644
--- a/src/malloc_extension.cc
+++ b/src/malloc_extension.cc
@@ -108,9 +108,9 @@ SysAllocator::~SysAllocator() {}
// Default implementation -- does nothing
MallocExtension::~MallocExtension() { }
bool MallocExtension::VerifyAllMemory() { return true; }
-bool MallocExtension::VerifyNewMemory(void* p) { return true; }
-bool MallocExtension::VerifyArrayNewMemory(void* p) { return true; }
-bool MallocExtension::VerifyMallocMemory(void* p) { return true; }
+bool MallocExtension::VerifyNewMemory(const void* p) { return true; }
+bool MallocExtension::VerifyArrayNewMemory(const void* p) { return true; }
+bool MallocExtension::VerifyMallocMemory(const void* p) { return true; }
bool MallocExtension::GetNumericProperty(const char* property, size_t* value) {
return false;
@@ -177,7 +177,7 @@ size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) {
return size;
}
-size_t MallocExtension::GetAllocatedSize(void* p) {
+size_t MallocExtension::GetAllocatedSize(const void* p) {
assert(GetOwnership(p) != kNotOwned);
return 0;
}
@@ -343,9 +343,9 @@ void MallocExtension::Ranges(void* arg, RangeFunction func) {
}
C_SHIM(VerifyAllMemory, int, (void), ());
-C_SHIM(VerifyNewMemory, int, (void* p), (p));
-C_SHIM(VerifyArrayNewMemory, int, (void* p), (p));
-C_SHIM(VerifyMallocMemory, int, (void* p), (p));
+C_SHIM(VerifyNewMemory, int, (const void* p), (p));
+C_SHIM(VerifyArrayNewMemory, int, (const void* p), (p));
+C_SHIM(VerifyMallocMemory, int, (const void* p), (p));
C_SHIM(MallocMemoryStats, int,
(int* blocks, size_t* total, int histogram[kMallocHistogramSize]),
(blocks, total, histogram));
@@ -362,7 +362,7 @@ C_SHIM(MarkThreadBusy, void, (void), ());
C_SHIM(ReleaseFreeMemory, void, (void), ());
C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes));
C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size));
-C_SHIM(GetAllocatedSize, size_t, (void* p), (p));
+C_SHIM(GetAllocatedSize, size_t, (const void* p), (p));
// Can't use the shim here because of the need to translate the enums.
extern "C"
diff --git a/src/memory_region_map.cc b/src/memory_region_map.cc
index 3bf8983..e281706 100644
--- a/src/memory_region_map.cc
+++ b/src/memory_region_map.cc
@@ -145,6 +145,8 @@ SpinLock MemoryRegionMap::owner_lock_( // ACQUIRED_AFTER(lock_)
SpinLock::LINKER_INITIALIZED);
int MemoryRegionMap::recursion_count_ = 0; // GUARDED_BY(owner_lock_)
pthread_t MemoryRegionMap::lock_owner_tid_; // GUARDED_BY(owner_lock_)
+int64 MemoryRegionMap::map_size_ = 0;
+int64 MemoryRegionMap::unmap_size_ = 0;
// ========================================================================= //
@@ -462,6 +464,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) {
reinterpret_cast<void*>(region.caller()));
// Note: none of the above allocates memory.
Lock(); // recursively lock
+ map_size_ += size;
InsertRegionLocked(region);
// This will (eventually) allocate storage for and copy over the stack data
// from region.call_stack_data_ that is pointed by region.call_stack().
@@ -573,6 +576,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
reinterpret_cast<void*>(end_addr),
regions_->size());
if (VLOG_IS_ON(12)) LogAllLocked();
+ unmap_size_ += size;
Unlock();
}
diff --git a/src/memory_region_map.h b/src/memory_region_map.h
index 739514c..988ea70 100644
--- a/src/memory_region_map.h
+++ b/src/memory_region_map.h
@@ -252,6 +252,10 @@ class MemoryRegionMap {
static RegionIterator BeginRegionLocked();
static RegionIterator EndRegionLocked();
+ // Return the accumulated sizes of mapped and unmapped regions.
+ static int64 MapSize() { return map_size_; }
+ static int64 UnmapSize() { return unmap_size_; }
+
// Effectively private type from our .cc =================================
// public to let us declare global objects:
union RegionSetRep;
@@ -286,6 +290,11 @@ class MemoryRegionMap {
// The thread id of the thread that's inside the recursive lock.
static pthread_t lock_owner_tid_;
+ // Total size of all mapped pages so far
+ static int64 map_size_;
+ // Total size of all unmapped pages so far
+ static int64 unmap_size_;
+
// helpers ==================================================================
// Helper for FindRegion and FindAndMarkStackRegion:
diff --git a/src/system-alloc.cc b/src/system-alloc.cc
index 6379494..cf35446 100644
--- a/src/system-alloc.cc
+++ b/src/system-alloc.cc
@@ -61,6 +61,13 @@
# define MAP_ANONYMOUS MAP_ANON
#endif
+// MADV_FREE is specifically designed for use by malloc(), but only
+// FreeBSD supports it; in linux we fall back to the somewhat inferior
+// MADV_DONTNEED.
+#ifndef MADV_FREE
+# define MADV_FREE MADV_DONTNEED
+#endif
+
// Solaris has a bug where it doesn't declare madvise() for C++.
// http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
#if defined(__sun) && defined(__SVR4)
@@ -107,7 +114,7 @@ union MemoryAligner {
static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
-#if defined(HAVE_MMAP) || defined(MADV_DONTNEED)
+#if defined(HAVE_MMAP) || defined(MADV_FREE)
// Page size is initialized on demand (only needed for mmap-based allocators)
static size_t pagesize = 0;
#endif
@@ -424,7 +431,6 @@ void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size,
if (result != NULL) {
return result;
}
- Log(kLog, __FILE__, __LINE__, names_[i], "failed");
failed_[i] = true;
}
}
@@ -488,10 +494,10 @@ void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
}
void TCMalloc_SystemRelease(void* start, size_t length) {
-#ifdef MADV_DONTNEED
+#ifdef MADV_FREE
if (FLAGS_malloc_devmem_start) {
- // It's not safe to use MADV_DONTNEED if we've been mapping
- // /dev/mem for heap memory
+ // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been
+ // mapping /dev/mem for heap memory.
return;
}
if (pagesize == 0) pagesize = getpagesize();
@@ -515,7 +521,7 @@ void TCMalloc_SystemRelease(void* start, size_t length) {
// Note -- ignoring most return codes, because if this fails it
// doesn't matter...
while (madvise(reinterpret_cast<char*>(new_start), new_end - new_start,
- MADV_DONTNEED) == -1 &&
+ MADV_FREE) == -1 &&
errno == EAGAIN) {
// NOP
}
diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc
index cfce3a7..a83e7ea 100644
--- a/src/tcmalloc.cc
+++ b/src/tcmalloc.cc
@@ -289,13 +289,13 @@ void InvalidFree(void* ptr) {
Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr);
}
-size_t InvalidGetSizeForRealloc(void* old_ptr) {
+size_t InvalidGetSizeForRealloc(const void* old_ptr) {
Log(kCrash, __FILE__, __LINE__,
"Attempt to realloc invalid pointer", old_ptr);
return 0;
}
-size_t InvalidGetAllocatedSize(void* ptr) {
+size_t InvalidGetAllocatedSize(const void* ptr) {
Log(kCrash, __FILE__, __LINE__,
"Attempt to get the size of an invalid pointer", ptr);
return 0;
@@ -757,7 +757,7 @@ class TCMallocImplementation : public MallocExtension {
// This just calls GetSizeWithCallback, but because that's in an
// unnamed namespace, we need to move the definition below it in the
// file.
- virtual size_t GetAllocatedSize(void* ptr);
+ virtual size_t GetAllocatedSize(const void* ptr);
// This duplicates some of the logic in GetSizeWithCallback, but is
// faster. This is important on OS X, where this function is called
@@ -1147,8 +1147,8 @@ inline void do_free(void* ptr) {
// NOTE: some logic here is duplicated in GetOwnership (above), for
// speed. If you change this function, look at that one too.
-inline size_t GetSizeWithCallback(void* ptr,
- size_t (*invalid_getsize_fn)(void*)) {
+inline size_t GetSizeWithCallback(const void* ptr,
+ size_t (*invalid_getsize_fn)(const void*)) {
if (ptr == NULL)
return 0;
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
@@ -1173,7 +1173,7 @@ inline size_t GetSizeWithCallback(void* ptr,
inline void* do_realloc_with_callback(
void* old_ptr, size_t new_size,
void (*invalid_free_fn)(void*),
- size_t (*invalid_get_size_fn)(void*)) {
+ size_t (*invalid_get_size_fn)(const void*)) {
// Get the size of the old entry
const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
@@ -1438,7 +1438,7 @@ void* cpp_memalign(size_t align, size_t size) {
} // end unnamed namespace
// As promised, the definition of this function, declared above.
-size_t TCMallocImplementation::GetAllocatedSize(void* ptr) {
+size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
ASSERT(TCMallocImplementation::GetOwnership(ptr)
!= TCMallocImplementation::kNotOwned);
return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
diff --git a/src/tests/sampler_test.cc b/src/tests/sampler_test.cc
index 31c87cd..c55d5dc 100755
--- a/src/tests/sampler_test.cc
+++ b/src/tests/sampler_test.cc
@@ -357,7 +357,7 @@ bool CheckMean(size_t mean, int num_samples) {
}
double empirical_mean = total / static_cast<double>(num_samples);
double expected_sd = mean / pow(num_samples * 1.0, 0.5);
- return(abs(mean-empirical_mean) < expected_sd * kSigmas);
+ return(fabs(mean-empirical_mean) < expected_sd * kSigmas);
}
// Prints a sequence so you can look at the distribution
@@ -409,8 +409,8 @@ TEST(Sampler, LargeAndSmallAllocs_CombinedTest) {
size_small, kSamplingInterval);
LOG(INFO) << StringPrintf("large_allocs_sds = %f\n", large_allocs_sds);
LOG(INFO) << StringPrintf("small_allocs_sds = %f\n", small_allocs_sds);
- CHECK_LE(abs(large_allocs_sds), kSigmas);
- CHECK_LE(abs(small_allocs_sds), kSigmas);
+ CHECK_LE(fabs(large_allocs_sds), kSigmas);
+ CHECK_LE(fabs(small_allocs_sds), kSigmas);
}
// Tests whether the mean is about right over 1000 samples
diff --git a/src/thread_cache.cc b/src/thread_cache.cc
index 39f2424..d6dead3 100644
--- a/src/thread_cache.cc
+++ b/src/thread_cache.cc
@@ -86,7 +86,7 @@ bool kernel_supports_tls = false; // be conservative
# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
void CheckIfKernelSupportsTLS() {
struct utsname buf;
- if (uname(&buf) != 0) { // should be impossible
+ if (uname(&buf) < 0) { // should be impossible
Log(kLog, __FILE__, __LINE__,
"uname failed assuming no TLS support (errno)", errno);
kernel_supports_tls = false;
diff --git a/src/windows/ia32_opcode_map.cc b/src/windows/ia32_opcode_map.cc
index c9ec18b..ba6a79e 100644
--- a/src/windows/ia32_opcode_map.cc
+++ b/src/windows/ia32_opcode_map.cc
@@ -111,6 +111,25 @@ const Opcode s_first_opcode_byte[] = {
/* 0x3D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x3E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x3F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aas", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#ifdef _M_X64
+ /* REX Prefixes in 64-bit mode. */
+ /* 0x40 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x41 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x42 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x43 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x44 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x45 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x46 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x47 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x48 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x49 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x4A */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x4B */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x4C */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x4D */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x4E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0x4F */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#else
/* 0x40 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x41 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x42 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
@@ -127,6 +146,7 @@ const Opcode s_first_opcode_byte[] = {
/* 0x4D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x4E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x4F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#endif
/* 0x50 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x51 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0x52 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
@@ -231,6 +251,16 @@ const Opcode s_first_opcode_byte[] = {
/* 0xB5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xB6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xB7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#ifdef _M_X64
+ /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+ /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#else
/* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
@@ -239,6 +269,7 @@ const Opcode s_first_opcode_byte[] = {
/* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#endif
/* 0xC0 */ { 6, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xC1 */ { 7, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
/* 0xC2 */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
diff --git a/src/windows/mini_disassembler.cc b/src/windows/mini_disassembler.cc
index 30bdcc1..9e336ba 100644
--- a/src/windows/mini_disassembler.cc
+++ b/src/windows/mini_disassembler.cc
@@ -100,6 +100,12 @@ InstructionType MiniDisassembler::Disassemble(
void MiniDisassembler::Initialize() {
operand_is_32_bits_ = operand_default_is_32_bits_;
address_is_32_bits_ = address_default_is_32_bits_;
+#ifdef _M_X64
+ operand_default_support_64_bits_ = true;
+#else
+ operand_default_support_64_bits_ = false;
+#endif
+ operand_is_64_bits_ = false;
operand_bytes_ = 0;
have_modrm_ = false;
should_decode_modrm_ = false;
@@ -129,6 +135,8 @@ InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
got_f3_prefix_ = true;
else if (0x66 == (*start_byte))
got_66_prefix_ = true;
+ else if (operand_default_support_64_bits_ && (*start_byte) & 0x48)
+ operand_is_64_bits_ = true;
instruction_type = opcode.type_;
size ++;
@@ -314,8 +322,12 @@ bool MiniDisassembler::ProcessOperand(int flag_operand) {
// floating point
succeeded = false;
break;
- case OT_V: // Word or doubleword, depending on operand-size attribute.
- if (operand_is_32_bits_)
+ case OT_V: // Word, doubleword or quadword, depending on operand-size
+ // attribute.
+ if (operand_is_64_bits_ && flag_operand & AM_I &&
+ flag_operand & IOS_64)
+ operand_bytes_ += OS_QUAD_WORD;
+ else if (operand_is_32_bits_)
operand_bytes_ += OS_DOUBLE_WORD;
else
operand_bytes_ += OS_WORD;
diff --git a/src/windows/mini_disassembler.h b/src/windows/mini_disassembler.h
index e676232..59a5d08 100644
--- a/src/windows/mini_disassembler.h
+++ b/src/windows/mini_disassembler.h
@@ -165,6 +165,12 @@ class MiniDisassembler {
// Default address size is 32 bits if true, 16 bits if false.
bool address_default_is_32_bits_;
+ // Determines if 64 bit operands are supported (x64).
+ bool operand_default_support_64_bits_;
+
+ // Current operand size is 64 bits if true, 32 bits if false.
+ bool operand_is_64_bits_;
+
// Huge big opcode table based on the IA-32 manual, defined
// in Ia32OpcodeMap.cc
static const OpcodeTable s_ia32_opcode_map_[];
diff --git a/src/windows/mini_disassembler_types.h b/src/windows/mini_disassembler_types.h
index 7f8e997..83dee8b 100644
--- a/src/windows/mini_disassembler_types.h
+++ b/src/windows/mini_disassembler_types.h
@@ -143,6 +143,16 @@ enum OperandType {
OT_ADDRESS_MODE_M = 0x80000000
};
+// Flag that indicates if an immediate operand is 64-bits.
+//
+// The Intel 64 and IA-32 Architecture Software Developer's Manual currently
+// defines MOV as the only instruction supporting a 64-bit immediate operand.
+enum ImmediateOperandSize {
+ IOS_MASK = 0x0000F000,
+ IOS_DEFAULT = 0x0,
+ IOS_64 = 0x00001000
+};
+
// Everything that's in an Opcode (see below) except the three
// alternative opcode structs for different prefixes.
struct SpecificOpcode {
@@ -154,8 +164,8 @@ struct SpecificOpcode {
InstructionType type_;
// Description of the type of the dest, src and aux operands,
- // put together from an enOperandType flag and an enAddressingMethod
- // flag.
+ // put together from enOperandType, enAddressingMethod and
+ // enImmediateOperandSize flags.
int flag_dest_;
int flag_source_;
int flag_aux_;
diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc
index f837e7a..3ff6eb5 100644
--- a/src/windows/patch_functions.cc
+++ b/src/windows/patch_functions.cc
@@ -181,6 +181,8 @@ class LibcInfo {
kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow,
// These are windows-only functions from malloc.h
k_Msize, k_Expand,
+ // A MS CRT "internal" function, implemented using _calloc_impl
+ k_CallocCrt,
kNumFunctions
};
@@ -404,7 +406,7 @@ const char* const LibcInfo::function_name_[] = {
NULL, // kMangledNewArrayNothrow,
NULL, // kMangledDeleteNothrow,
NULL, // kMangledDeleteArrayNothrow,
- "_msize", "_expand",
+ "_msize", "_expand", "_calloc_crt",
};
// For mingw, I can't patch the new/delete here, because the
@@ -435,6 +437,7 @@ const GenericFnPtr LibcInfo::static_fn_[] = {
#endif
(GenericFnPtr)&::_msize,
(GenericFnPtr)&::_expand,
+ (GenericFnPtr)&::calloc,
};
template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = {
@@ -457,6 +460,7 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
(GenericFnPtr)&Perftools_deletearray_nothrow,
(GenericFnPtr)&Perftools__msize,
(GenericFnPtr)&Perftools__expand,
+ (GenericFnPtr)&Perftools_calloc,
};
/*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = {
diff --git a/src/windows/preamble_patcher.cc b/src/windows/preamble_patcher.cc
index 78a4763..b27a95b 100644
--- a/src/windows/preamble_patcher.cc
+++ b/src/windows/preamble_patcher.cc
@@ -29,6 +29,7 @@
*
* ---
* Author: Joi Sigurdsson
+ * Author: Scott Francis
*
* Implementation of PreamblePatcher
*/
@@ -46,18 +47,42 @@
#define ASM_JMP32ABS_0 0xFF
#define ASM_JMP32ABS_1 0x25
#define ASM_JMP8REL 0xEB
+#define ASM_JCC32REL_0 0x0F
+#define ASM_JCC32REL_1_MASK 0x80
+#define ASM_NOP 0x90
+// X64 opcodes
+#define ASM_REXW 0x48
+#define ASM_MOVRAX_IMM 0xB8
+#define ASM_JMP 0xFF
+#define ASM_JMP_RAX 0xE0
namespace sidestep {
+PreamblePatcher::PreamblePage* PreamblePatcher::preamble_pages_ = NULL;
+long PreamblePatcher::granularity_ = 0;
+long PreamblePatcher::pagesize_ = 0;
+bool PreamblePatcher::initialized_ = false;
+
+static const unsigned int kPreamblePageMagic = 0x4347414D; // "MAGC"
+
// Handle a special case that we see with functions that point into an
// IAT table (including functions linked statically into the
// application): these function already starts with ASM_JMP32*. For
// instance, malloc() might be implemented as a JMP to __malloc().
// This function follows the initial JMPs for us, until we get to the
// place where the actual code is defined. If we get to STOP_BEFORE,
-// we return the address before stop_before.
+// we return the address before stop_before. The stop_before_trampoline
+// flag is used in 64-bit mode. If true, we will return the address
+// before a trampoline is detected. Trampolines are defined as:
+//
+// nop
+// mov rax, <replacement_function>
+// jmp rax
+//
+// See PreamblePatcher::RawPatchWithStub for more information.
void* PreamblePatcher::ResolveTargetImpl(unsigned char* target,
- unsigned char* stop_before) {
+ unsigned char* stop_before,
+ bool stop_before_trampoline) {
if (target == NULL)
return NULL;
while (1) {
@@ -81,15 +106,26 @@ void* PreamblePatcher::ResolveTargetImpl(unsigned char* target,
// Visual studio seems to sometimes do it this way instead of the
// previous way. Not sure what the rules are, but it was happening
// with operator new in some binaries.
- void **new_target_v;
- SIDESTEP_ASSERT(sizeof(new_target) == 4);
- memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4);
+ void** new_target_v;
+ if (kIs64BitBinary) {
+ // In 64-bit mode JMPs are RIP-relative, not absolute
+ int target_offset;
+ memcpy(reinterpret_cast<void*>(&target_offset),
+ reinterpret_cast<void*>(target + 2), 4);
+ new_target_v = reinterpret_cast<void**>(target + target_offset + 6);
+ } else {
+ SIDESTEP_ASSERT(sizeof(new_target) == 4);
+ memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4);
+ }
new_target = reinterpret_cast<unsigned char*>(*new_target_v);
} else {
break;
}
if (new_target == stop_before)
break;
+ if (stop_before_trampoline && *new_target == ASM_NOP
+ && new_target[1] == ASM_REXW && new_target[2] == ASM_MOVRAX_IMM)
+ break;
target = new_target;
}
return target;
@@ -103,7 +139,7 @@ class DeleteUnsignedCharArray {
~DeleteUnsignedCharArray() {
if (array_) {
- delete [] array_;
+ PreamblePatcher::FreePreambleBlock(array_);
}
}
@@ -191,9 +227,23 @@ SideStepError PreamblePatcher::RawPatch(void* target_function,
return SIDESTEP_INVALID_PARAMETER;
}
- // @see MAX_PREAMBLE_STUB_SIZE for an explanation of how we arrives at
- // this size
- unsigned char* preamble_stub = new unsigned char[MAX_PREAMBLE_STUB_SIZE];
+ BOOL succeeded = FALSE;
+
+ // First, deal with a special case that we see with functions that
+ // point into an IAT table (including functions linked statically
+ // into the application): these function already starts with
+ // ASM_JMP32REL. For instance, malloc() might be implemented as a
+ // JMP to __malloc(). In that case, we replace the destination of
+ // the JMP (__malloc), rather than the JMP itself (malloc). This
+ // way we get the correct behavior no matter how malloc gets called.
+ void* new_target = ResolveTarget(target_function);
+ if (new_target != target_function) {
+ target_function = new_target;
+ }
+
+ // In 64-bit mode, preamble_stub must be within 2GB of target function
+ // so that if target contains a jump, we can translate it.
+ unsigned char* preamble_stub = AllocPreambleBlockNear(target_function);
if (!preamble_stub) {
SIDESTEP_ASSERT(false && "Unable to allocate preamble-stub.");
return SIDESTEP_INSUFFICIENT_BUFFER;
@@ -202,19 +252,6 @@ SideStepError PreamblePatcher::RawPatch(void* target_function,
// Frees the array at end of scope.
DeleteUnsignedCharArray guard_preamble_stub(preamble_stub);
- // Change the protection of the newly allocated preamble stub to
- // PAGE_EXECUTE_READWRITE. This is required to work with DEP (Data
- // Execution Prevention) which will cause an exception if code is executed
- // from a page on which you do not have read access.
- DWORD old_stub_protect = 0;
- BOOL succeeded = ::VirtualProtect(preamble_stub, MAX_PREAMBLE_STUB_SIZE,
- PAGE_EXECUTE_READWRITE, &old_stub_protect);
- if (!succeeded) {
- SIDESTEP_ASSERT(false &&
- "Failed to make page preamble stub read-write-execute.");
- return SIDESTEP_ACCESS_DENIED;
- }
-
SideStepError error_code = RawPatchWithStubAndProtections(
target_function, replacement_function, preamble_stub,
MAX_PREAMBLE_STUB_SIZE, NULL);
@@ -260,23 +297,6 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
return SIDESTEP_INVALID_PARAMETER;
}
- // We disassemble the preamble of the _stub_ to see how many bytes we
- // originally copied to the stub.
- MiniDisassembler disassembler;
- unsigned int preamble_bytes = 0;
- while (preamble_bytes < 5) {
- InstructionType instruction_type =
- disassembler.Disassemble(
- reinterpret_cast<unsigned char*>(original_function_stub) +
- preamble_bytes,
- preamble_bytes);
- if (IT_GENERIC != instruction_type) {
- SIDESTEP_ASSERT(false &&
- "Should only have generic instructions in stub!!");
- return SIDESTEP_UNSUPPORTED_INSTRUCTION;
- }
- }
-
// Before unpatching, target_function should be a JMP to
// replacement_function. If it's not, then either it's an error, or
// we're falling into the case where the original instruction was a
@@ -286,7 +306,8 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
unsigned char* target = reinterpret_cast<unsigned char*>(target_function);
target = reinterpret_cast<unsigned char*>(
ResolveTargetImpl(
- target, reinterpret_cast<unsigned char*>(replacement_function)));
+ target, reinterpret_cast<unsigned char*>(replacement_function),
+ true));
// We should end at the function we patched. When we patch, we insert
// a ASM_JMP32REL instruction, so look for that as a sanity check.
if (target[0] != ASM_JMP32REL) {
@@ -295,11 +316,13 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
return SIDESTEP_INVALID_PARAMETER;
}
+ const unsigned int kRequiredTargetPatchBytes = 5;
+
// We need to be able to write to a process-local copy of the first
- // MAX_PREAMBLE_STUB_SIZE bytes of target_function
+ // kRequiredTargetPatchBytes bytes of target_function
DWORD old_target_function_protect = 0;
- BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function),
- MAX_PREAMBLE_STUB_SIZE,
+ BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target),
+ kRequiredTargetPatchBytes,
PAGE_EXECUTE_READWRITE,
&old_target_function_protect);
if (!succeeded) {
@@ -308,20 +331,67 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
return SIDESTEP_ACCESS_DENIED;
}
- // Replace the first few bytes of the original function with the bytes we
- // previously moved to the preamble stub.
- memcpy(reinterpret_cast<void*>(target),
- original_function_stub, preamble_bytes);
+ unsigned char* preamble_stub = reinterpret_cast<unsigned char*>(
+ original_function_stub);
- // Stub is now useless so delete it.
- // [csilvers: Commented out for perftools because it causes big problems
- // when we're unpatching malloc. We just let this live on as a leak.]
- //delete [] reinterpret_cast<unsigned char*>(original_function_stub);
+ // Disassemble the preamble of stub and copy the bytes back to target.
+ // If we've done any conditional jumps in the preamble we need to convert
+ // them back to the orignal REL8 jumps in the target.
+ MiniDisassembler disassembler;
+ unsigned int preamble_bytes = 0;
+ unsigned int target_bytes = 0;
+ while (target_bytes < kRequiredTargetPatchBytes) {
+ unsigned int cur_bytes = 0;
+ InstructionType instruction_type =
+ disassembler.Disassemble(preamble_stub + preamble_bytes, cur_bytes);
+ if (IT_JUMP == instruction_type) {
+ unsigned int jump_bytes = 0;
+ SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION;
+ if (IsNearConditionalJump(preamble_stub + preamble_bytes, cur_bytes) ||
+ IsNearRelativeJump(preamble_stub + preamble_bytes, cur_bytes) ||
+ IsNearAbsoluteCall(preamble_stub + preamble_bytes, cur_bytes) ||
+ IsNearRelativeCall(preamble_stub + preamble_bytes, cur_bytes)) {
+ jump_ret = PatchNearJumpOrCall(preamble_stub + preamble_bytes,
+ cur_bytes, target + target_bytes,
+ &jump_bytes, MAX_PREAMBLE_STUB_SIZE);
+ }
+ if (jump_ret == SIDESTEP_JUMP_INSTRUCTION) {
+ SIDESTEP_ASSERT(false &&
+ "Found unsupported jump instruction in stub!!");
+ return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+ }
+ target_bytes += jump_bytes;
+ } else if (IT_GENERIC == instruction_type) {
+ if (IsMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes)) {
+ unsigned int mov_bytes = 0;
+ if (PatchMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes,
+ target + target_bytes, &mov_bytes,
+ MAX_PREAMBLE_STUB_SIZE)
+ != SIDESTEP_SUCCESS) {
+ SIDESTEP_ASSERT(false &&
+ "Found unsupported generic instruction in stub!!");
+ return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+ }
+ } else {
+ memcpy(reinterpret_cast<void*>(target + target_bytes),
+ reinterpret_cast<void*>(reinterpret_cast<unsigned char*>(
+ original_function_stub) + preamble_bytes), cur_bytes);
+ target_bytes += cur_bytes;
+ }
+ } else {
+ SIDESTEP_ASSERT(false &&
+ "Found unsupported instruction in stub!!");
+ return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+ }
+ preamble_bytes += cur_bytes;
+ }
- // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of
+ FreePreambleBlock(reinterpret_cast<unsigned char*>(original_function_stub));
+
+ // Restore the protection of the first kRequiredTargetPatchBytes bytes of
// target to what they were before we started goofing around.
succeeded = ::VirtualProtect(reinterpret_cast<void*>(target),
- MAX_PREAMBLE_STUB_SIZE,
+ kRequiredTargetPatchBytes,
old_target_function_protect,
&old_target_function_protect);
@@ -341,4 +411,274 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
return SIDESTEP_SUCCESS;
}
+void PreamblePatcher::Initialize() {
+ if (!initialized_) {
+ SYSTEM_INFO si = { 0 };
+ ::GetSystemInfo(&si);
+ granularity_ = si.dwAllocationGranularity;
+ pagesize_ = si.dwPageSize;
+ initialized_ = true;
+ }
+}
+
+unsigned char* PreamblePatcher::AllocPreambleBlockNear(void* target) {
+ PreamblePage* preamble_page = preamble_pages_;
+ while (preamble_page != NULL) {
+ if (preamble_page->free_ != NULL) {
+ __int64 val = reinterpret_cast<__int64>(preamble_page) -
+ reinterpret_cast<__int64>(target);
+ if ((val > 0 && val + pagesize_ <= INT_MAX) ||
+ (val < 0 && val >= INT_MIN)) {
+ break;
+ }
+ }
+ preamble_page = preamble_page->next_;
+ }
+
+ // The free_ member of the page is used to store the next available block
+ // of memory to use or NULL if there are no chunks available, in which case
+ // we'll allocate a new page.
+ if (preamble_page == NULL || preamble_page->free_ == NULL) {
+ // Create a new preamble page and initialize the free list
+ preamble_page = reinterpret_cast<PreamblePage*>(AllocPageNear(target));
+ SIDESTEP_ASSERT(preamble_page != NULL && "Could not allocate page!");
+ void** pp = &preamble_page->free_;
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(preamble_page) +
+ MAX_PREAMBLE_STUB_SIZE;
+ unsigned char* limit = reinterpret_cast<unsigned char*>(preamble_page) +
+ pagesize_;
+ while (ptr < limit) {
+ *pp = ptr;
+ pp = reinterpret_cast<void**>(ptr);
+ ptr += MAX_PREAMBLE_STUB_SIZE;
+ }
+ *pp = NULL;
+ // Insert the new page into the list
+ preamble_page->magic_ = kPreamblePageMagic;
+ preamble_page->next_ = preamble_pages_;
+ preamble_pages_ = preamble_page;
+ }
+ unsigned char* ret = reinterpret_cast<unsigned char*>(preamble_page->free_);
+ preamble_page->free_ = *(reinterpret_cast<void**>(preamble_page->free_));
+ return ret;
+}
+
+void PreamblePatcher::FreePreambleBlock(unsigned char* block) {
+ SIDESTEP_ASSERT(block != NULL);
+ SIDESTEP_ASSERT(granularity_ != 0);
+ uintptr_t ptr = reinterpret_cast<uintptr_t>(block);
+ ptr -= ptr & (granularity_ - 1);
+ PreamblePage* preamble_page = reinterpret_cast<PreamblePage*>(ptr);
+ SIDESTEP_ASSERT(preamble_page->magic_ == kPreamblePageMagic);
+ *(reinterpret_cast<void**>(block)) = preamble_page->free_;
+ preamble_page->free_ = block;
+}
+
+void* PreamblePatcher::AllocPageNear(void* target) {
+ MEMORY_BASIC_INFORMATION mbi = { 0 };
+ if (!::VirtualQuery(target, &mbi, sizeof(mbi))) {
+ SIDESTEP_ASSERT(false && "VirtualQuery failed on target address");
+ return 0;
+ }
+ if (initialized_ == false) {
+ PreamblePatcher::Initialize();
+ SIDESTEP_ASSERT(initialized_);
+ }
+ void* pv = NULL;
+ unsigned char* allocation_base = reinterpret_cast<unsigned char*>(
+ mbi.AllocationBase);
+ __int64 i = 1;
+ bool high_target = reinterpret_cast<__int64>(target) > UINT_MAX;
+ while (pv == NULL) {
+ __int64 val = reinterpret_cast<__int64>(allocation_base) -
+ (i * granularity_);
+ if (high_target &&
+ reinterpret_cast<__int64>(target) - val > INT_MAX) {
+ // We're further than 2GB from the target
+ break;
+ } else if (val <= NULL) {
+ // Less than 0
+ break;
+ }
+ pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base -
+ (i++ * granularity_)),
+ pagesize_, MEM_COMMIT | MEM_RESERVE,
+ PAGE_EXECUTE_READWRITE);
+ }
+
+ // We couldn't allocate low, try to allocate high
+ if (pv == NULL) {
+ i = 1;
+ // Round up to the next multiple of page granularity
+ allocation_base = reinterpret_cast<unsigned char*>(
+ (reinterpret_cast<__int64>(target) &
+ (~(granularity_ - 1))) + granularity_);
+ while (pv == NULL) {
+ __int64 val = reinterpret_cast<__int64>(allocation_base) +
+ (i * granularity_) - reinterpret_cast<__int64>(target);
+ if (val > INT_MAX || val < 0) {
+ // We're too far or we overflowed
+ break;
+ }
+ pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base +
+ (i++ * granularity_)),
+ pagesize_, MEM_COMMIT | MEM_RESERVE,
+ PAGE_EXECUTE_READWRITE);
+ }
+ }
+ return pv;
+}
+
+bool PreamblePatcher::IsShortConditionalJump(
+ unsigned char* target,
+ unsigned int instruction_size) {
+ return (*(target) & 0x70) == 0x70 && instruction_size == 2;
+}
+
+bool PreamblePatcher::IsNearConditionalJump(
+ unsigned char* target,
+ unsigned int instruction_size) {
+ return *(target) == 0xf && (*(target + 1) & 0x80) == 0x80 &&
+ instruction_size == 6;
+}
+
+bool PreamblePatcher::IsNearRelativeJump(
+ unsigned char* target,
+ unsigned int instruction_size) {
+ return *(target) == 0xe9 && instruction_size == 5;
+}
+
+bool PreamblePatcher::IsNearAbsoluteCall(
+ unsigned char* target,
+ unsigned int instruction_size) {
+ return *(target) == 0xff && (*(target + 1) & 0x10) == 0x10 &&
+ instruction_size == 6;
+}
+
+bool PreamblePatcher::IsNearRelativeCall(
+ unsigned char* target,
+ unsigned int instruction_size) {
+ return *(target) == 0xe8 && instruction_size == 5;
+}
+
+bool PreamblePatcher::IsMovWithDisplacement(
+ unsigned char* target,
+ unsigned int instruction_size) {
+ // In this case, the ModRM byte's mod field will be 0 and r/m will be 101b (5)
+ return instruction_size == 7 && *target == 0x48 && *(target + 1) == 0x8b &&
+ (*(target + 2) >> 6) == 0 && (*(target + 2) & 0x7) == 5;
+}
+
+SideStepError PreamblePatcher::PatchShortConditionalJump(
+ unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size) {
+ unsigned char* original_jump_dest = (source + 2) + source[1];
+ unsigned char* stub_jump_from = target + 6;
+ __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
+ if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
+ SIDESTEP_ASSERT(false &&
+ "Unable to fix up short jump because target"
+ " is too far away.");
+ return SIDESTEP_JUMP_INSTRUCTION;
+ }
+
+ *target_bytes = 6;
+ if (target_size > *target_bytes) {
+ // Convert the short jump to a near jump.
+ //
+ // 0f 8x xx xx xx xx = Jcc rel32off
+ unsigned short jmpcode = ((0x80 | (source[0] & 0xf)) << 8) | 0x0f;
+ memcpy(reinterpret_cast<void*>(target),
+ reinterpret_cast<void*>(&jmpcode), 2);
+ memcpy(reinterpret_cast<void*>(target + 2),
+ reinterpret_cast<void*>(&fixup_jump_offset), 4);
+ }
+
+ return SIDESTEP_SUCCESS;
+}
+
+SideStepError PreamblePatcher::PatchNearJumpOrCall(
+ unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size) {
+ SIDESTEP_ASSERT(instruction_size == 5 || instruction_size == 6);
+ unsigned int jmp_offset_in_instruction = instruction_size == 5 ? 1 : 2;
+ unsigned char* original_jump_dest = reinterpret_cast<unsigned char *>(
+ reinterpret_cast<__int64>(source + instruction_size) +
+ *(reinterpret_cast<int*>(source + jmp_offset_in_instruction)));
+ unsigned char* stub_jump_from = target + instruction_size;
+ __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
+ if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
+ SIDESTEP_ASSERT(false &&
+ "Unable to fix up near jump because target"
+ " is too far away.");
+ return SIDESTEP_JUMP_INSTRUCTION;
+ }
+
+ if ((fixup_jump_offset < SCHAR_MAX && fixup_jump_offset > SCHAR_MIN)) {
+ *target_bytes = 2;
+ if (target_size > *target_bytes) {
+ // If the new offset is in range, use a short jump instead of a near jump.
+ if (source[0] == ASM_JCC32REL_0 &&
+ (source[1] & ASM_JCC32REL_1_MASK) == ASM_JCC32REL_1_MASK) {
+ unsigned short jmpcode = (static_cast<unsigned char>(
+ fixup_jump_offset) << 8) | (0x70 | (source[1] & 0xf));
+ memcpy(reinterpret_cast<void*>(target),
+ reinterpret_cast<void*>(&jmpcode),
+ 2);
+ } else {
+ target[0] = ASM_JMP8REL;
+ target[1] = static_cast<unsigned char>(fixup_jump_offset);
+ }
+ }
+ } else {
+ *target_bytes = instruction_size;
+ if (target_size > *target_bytes) {
+ memcpy(reinterpret_cast<void*>(target),
+ reinterpret_cast<void*>(source),
+ jmp_offset_in_instruction);
+ memcpy(reinterpret_cast<void*>(target + jmp_offset_in_instruction),
+ reinterpret_cast<void*>(&fixup_jump_offset),
+ 4);
+ }
+ }
+
+ return SIDESTEP_SUCCESS;
+}
+
+SideStepError PreamblePatcher::PatchMovWithDisplacement(
+ unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size) {
+ SIDESTEP_ASSERT(instruction_size == 7);
+ const int mov_offset_in_instruction = 3; // 0x48 0x8b 0x0d <offset>
+ unsigned char* original_mov_dest = reinterpret_cast<unsigned char*>(
+ reinterpret_cast<__int64>(source + instruction_size) +
+ *(reinterpret_cast<int*>(source + mov_offset_in_instruction)));
+ unsigned char* stub_mov_from = target + instruction_size;
+ __int64 fixup_mov_offset = original_mov_dest - stub_mov_from;
+ if (fixup_mov_offset > INT_MAX || fixup_mov_offset < INT_MIN) {
+ SIDESTEP_ASSERT(false &&
+ "Unable to fix up near MOV because target is too far away.");
+ return SIDESTEP_UNEXPECTED;
+ }
+ *target_bytes = instruction_size;
+ if (target_size > *target_bytes) {
+ memcpy(reinterpret_cast<void*>(target),
+ reinterpret_cast<void*>(source),
+ mov_offset_in_instruction);
+ memcpy(reinterpret_cast<void*>(target + mov_offset_in_instruction),
+ reinterpret_cast<void*>(&fixup_mov_offset),
+ 4);
+ }
+ return SIDESTEP_SUCCESS;
+}
+
}; // namespace sidestep
diff --git a/src/windows/preamble_patcher.h b/src/windows/preamble_patcher.h
index 0028e4e..50946bd 100644
--- a/src/windows/preamble_patcher.h
+++ b/src/windows/preamble_patcher.h
@@ -29,6 +29,7 @@
*
* ---
* Author: Joi Sigurdsson
+ * Author: Scott Francis
*
* Definition of PreamblePatcher
*/
@@ -47,7 +48,25 @@
// bytes of the function. Considering the worst case scenario, we need 4
// bytes + the max instruction size + 5 more bytes for our jump back to
// the original code. With that in mind, 32 is a good number :)
+#ifdef _M_X64
+// In 64-bit mode we may need more room. In 64-bit mode all jumps must be
+// within +/-2GB of RIP. Because of this limitation we may need to use a
+// trampoline to jump to the replacement function if it is further than 2GB
+// away from the target. The trampoline is 14 bytes.
+//
+// So 4 bytes + max instruction size (17 bytes) + 5 bytes to jump back to the
+// original code + trampoline size. 64 bytes is a nice number :-)
+#define MAX_PREAMBLE_STUB_SIZE (64)
+#else
#define MAX_PREAMBLE_STUB_SIZE (32)
+#endif
+
+// Determines if this is a 64-bit binary.
+#ifdef _M_X64
+static const bool kIs64BitBinary = true;
+#else
+static const bool kIs64BitBinary = false;
+#endif
namespace sidestep {
@@ -68,6 +87,8 @@ enum SideStepError {
#define SIDESTEP_TO_HRESULT(error) \
MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error)
+class DeleteUnsignedCharArray;
+
// Implements a patching mechanism that overwrites the first few bytes of
// a function preamble with a jump to our hook function, which is then
// able to call the original function via a specially-made preamble-stub
@@ -287,7 +308,55 @@ class PreamblePatcher {
return (T)ResolveTargetImpl((unsigned char*)target_function, NULL);
}
+ // Allocates a block of memory of size MAX_PREAMBLE_STUB_SIZE that is as
+ // close (within 2GB) as possible to target. This is done to ensure that
+ // we can perform a relative jump from target to a trampoline if the
+ // replacement function is > +-2GB from target. This means that we only need
+ // to patch 5 bytes in the target function.
+ //
+ // @param target Pointer to target function.
+ //
+ // @return Returns a block of memory of size MAX_PREAMBLE_STUB_SIZE that can
+ // be used to store a function preamble block.
+ static unsigned char* AllocPreambleBlockNear(void* target);
+
+ // Frees a block allocated by AllocPreambleBlockNear.
+ //
+ // @param block Block that was returned by AllocPreambleBlockNear.
+ static void FreePreambleBlock(unsigned char* block);
+
private:
+ friend class DeleteUnsignedCharArray;
+
+ // Used to store data allocated for preamble stubs
+ struct PreamblePage {
+ unsigned int magic_;
+ PreamblePage* next_;
+ // This member points to a linked list of free blocks within the page
+ // or NULL if at the end
+ void* free_;
+ };
+
+ // In 64-bit mode, the replacement function must be within 2GB of the original
+ // target in order to only require 5 bytes for the function patch. To meet
+ // this requirement we're creating an allocator within this class to
+ // allocate blocks that are within 2GB of a given target. This member is the
+ // head of a linked list of pages used to allocate blocks that are within
+ // 2GB of the target.
+ static PreamblePage* preamble_pages_;
+
+ // Page granularity
+ static long granularity_;
+
+ // Page size
+ static long pagesize_;
+
+ // Determines if the patcher has been initialized.
+ static bool initialized_;
+
+ // Used to initialize static members.
+ static void Initialize();
+
// Patches a function by overwriting its first few bytes with
// a jump to a different function. This is similar to the RawPatch
// function except that it uses the stub allocated by the caller
@@ -318,7 +387,7 @@ class PreamblePatcher {
// @return An error code indicating the result of patching.
static SideStepError RawPatchWithStubAndProtections(
void* target_function,
- void *replacement_function,
+ void* replacement_function,
unsigned char* preamble_stub,
unsigned long stub_size,
unsigned long* bytes_needed);
@@ -348,7 +417,7 @@ class PreamblePatcher {
//
// @return An error code indicating the result of patching.
static SideStepError RawPatchWithStub(void* target_function,
- void *replacement_function,
+ void* replacement_function,
unsigned char* preamble_stub,
unsigned long stub_size,
unsigned long* bytes_needed);
@@ -365,12 +434,175 @@ class PreamblePatcher {
// target_function, we get to the address stop, we return
// immediately, the address that jumps to stop_before.
//
+ // @param stop_before_trampoline When following JMP instructions from
+ // target_function, stop before a trampoline is detected. See comment in
+ // PreamblePatcher::RawPatchWithStub for more information. This parameter
+ // has no effect in 32-bit mode.
+ //
// @return Either target_function (the input parameter), or if
// target_function's body consists entirely of a JMP instruction,
// the address it JMPs to (or more precisely, the address at the end
// of a chain of JMPs).
static void* ResolveTargetImpl(unsigned char* target_function,
- unsigned char* stop_before);
+ unsigned char* stop_before,
+ bool stop_before_trampoline = false);
+
+ // Helper routine that attempts to allocate a page as close (within 2GB)
+ // as possible to target.
+ //
+ // @param target Pointer to target function.
+ //
+ // @return Returns an address that is within 2GB of target.
+ static void* AllocPageNear(void* target);
+
+ // Helper routine that determines if a target instruction is a short
+ // conditional jump.
+ //
+ // @param target Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction in bytes.
+ //
+ // @return Returns true if the instruction is a short conditional jump.
+ static bool IsShortConditionalJump(unsigned char* target,
+ unsigned int instruction_size);
+
+ // Helper routine that determines if a target instruction is a near
+ // conditional jump.
+ //
+ // @param target Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction in bytes.
+ //
+ // @return Returns true if the instruction is a near conditional jump.
+ static bool IsNearConditionalJump(unsigned char* target,
+ unsigned int instruction_size);
+
+ // Helper routine that determines if a target instruction is a near
+ // relative jump.
+ //
+ // @param target Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction in bytes.
+ //
+ // @return Returns true if the instruction is a near absolute jump.
+ static bool IsNearRelativeJump(unsigned char* target,
+ unsigned int instruction_size);
+
+ // Helper routine that determines if a target instruction is a near
+ // absolute call.
+ //
+ // @param target Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction in bytes.
+ //
+ // @return Returns true if the instruction is a near absolute call.
+ static bool IsNearAbsoluteCall(unsigned char* target,
+ unsigned int instruction_size);
+
+ // Helper routine that determines if a target instruction is a near
+ // absolute call.
+ //
+ // @param target Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction in bytes.
+ //
+ // @return Returns true if the instruction is a near absolute call.
+ static bool IsNearRelativeCall(unsigned char* target,
+ unsigned int instruction_size);
+
+ // Helper routine that determines if a target instruction is a 64-bit MOV
+ // that uses a RIP-relative displacement.
+ //
+ // @param target Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction in bytes.
+ //
+ // @return Returns true if the instruction is a MOV with displacement.
+ static bool IsMovWithDisplacement(unsigned char* target,
+ unsigned int instruction_size);
+
+ // Helper routine that converts a short conditional jump instruction
+ // to a near conditional jump in a target buffer. Note that the target
+ // buffer must be within 2GB of the source for the near jump to work.
+ //
+ // A short conditional jump instruction is in the format:
+ // 7x xx = Jcc rel8off
+ //
+ // @param source Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction.
+ //
+ // @param target Target buffer to write the new instruction.
+ //
+ // @param target_bytes Pointer to a buffer that contains the size
+ // of the target instruction, in bytes.
+ //
+ // @param target_size Size of the target buffer.
+ //
+ // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error.
+ static SideStepError PatchShortConditionalJump(unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size);
+
+ // Helper routine that converts an instruction that will convert various
+ // jump-like instructions to corresponding instructions in the target buffer.
+ // What this routine does is fix up the relative offsets contained in jump
+ // instructions to point back to the original target routine. Like with
+ // PatchShortConditionalJump, the target buffer must be within 2GB of the
+ // source.
+ //
+ // We currently handle the following instructions:
+ //
+ // E9 xx xx xx xx = JMP rel32off
+ // 0F 8x xx xx xx xx = Jcc rel32off
+ // FF /2 xx xx xx xx = CALL reg/mem32/mem64
+ // E8 xx xx xx xx = CALL rel32off
+ //
+ // It should not be hard to update this function to support other
+ // instructions that jump to relative targets.
+ //
+ // @param source Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction.
+ //
+ // @param target Target buffer to write the new instruction.
+ //
+ // @param target_bytes Pointer to a buffer that contains the size
+ // of the target instruction, in bytes.
+ //
+ // @param target_size Size of the target buffer.
+ //
+ // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error.
+ static SideStepError PatchNearJumpOrCall(unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size);
+
+ // Helper routine that patches a 64-bit MOV instruction with a RIP-relative
+ // displacement. The target buffer must be within 2GB of the source.
+ //
+ // 48 8B 0D XX XX XX XX = MOV rel32off
+ //
+ // @param source Pointer to instruction.
+ //
+ // @param instruction_size Size of the instruction.
+ //
+ // @param target Target buffer to write the new instruction.
+ //
+ // @param target_bytes Pointer to a buffer that contains the size
+ // of the target instruction, in bytes.
+ //
+ // @param target_size Size of the target buffer.
+ //
+ // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error.
+ static SideStepError PatchMovWithDisplacement(unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size);
};
}; // namespace sidestep
diff --git a/src/windows/preamble_patcher_test.cc b/src/windows/preamble_patcher_test.cc
new file mode 100644
index 0000000..2506cfb
--- /dev/null
+++ b/src/windows/preamble_patcher_test.cc
@@ -0,0 +1,356 @@
+/* Copyright (c) 2011, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Joi Sigurdsson
+ * Author: Scott Francis
+ *
+ * Unit tests for PreamblePatcher
+ */
+
+#include "preamble_patcher.h"
+#include "mini_disassembler.h"
+#pragma warning(push)
+#pragma warning(disable:4553)
+#include "auto_testing_hook.h"
+#pragma warning(pop)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+
+// Turning off all optimizations for this file, since the official build's
+// "Whole program optimization" seems to cause the TestPatchUsingDynamicStub
+// test to crash with an access violation. We debugged this and found
+// that the optimized access a register that is changed by a call to the hook
+// function.
+#pragma optimize("", off)
+
+namespace {
+
+// Function for testing - this is what we patch
+//
+// NOTE: Because of the way the compiler optimizes this function in
+// release builds, we need to use a different input value every time we
+// call it within a function, otherwise the compiler will just reuse the
+// last calculated incremented value.
+int __declspec(noinline) IncrementNumber(int i) {
+#ifdef _M_X64
+ __int64 i2 = i + 1;
+ return (int) i2;
+#else
+ return i + 1;
+#endif
+}
+
+extern "C" int TooShortFunction(int);
+
+extern "C" int JumpShortCondFunction(int);
+
+extern "C" int JumpNearCondFunction(int);
+
+extern "C" int JumpAbsoluteFunction(int);
+
+extern "C" int CallNearRelativeFunction(int);
+
+typedef int (*IncrementingFunc)(int);
+IncrementingFunc original_function = NULL;
+
+int HookIncrementNumber(int i) {
+ SIDESTEP_ASSERT(original_function != NULL);
+ int incremented_once = original_function(i);
+ return incremented_once + 1;
+}
+
+// For the AutoTestingHook test, we can't use original_function, because
+// all that is encapsulated.
+// This function "increments" by 10, just to set it apart from the other
+// functions.
+int __declspec(noinline) AutoHookIncrementNumber(int i) {
+ return i + 10;
+}
+
+}; // namespace
+
+namespace sidestep {
+
+bool TestDisassembler() {
+ unsigned int instruction_size = 0;
+ sidestep::MiniDisassembler disassembler;
+ void * target = reinterpret_cast<unsigned char *>(IncrementNumber);
+ void * new_target = PreamblePatcher::ResolveTarget(target);
+ if (target != new_target)
+ target = new_target;
+
+ while (1) {
+ sidestep::InstructionType instructionType = disassembler.Disassemble(
+ reinterpret_cast<unsigned char *>(target) + instruction_size,
+ instruction_size);
+ if (sidestep::IT_RETURN == instructionType) {
+ return true;
+ }
+ }
+}
+
+bool TestPatchWithLongJump() {
+ original_function = NULL;
+ void *p = ::VirtualAlloc(reinterpret_cast<void *>(0x0000020000000000), 4096,
+ MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+ SIDESTEP_EXPECT_TRUE(p != NULL);
+ memset(p, 0xcc, 4096);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ (IncrementingFunc) p,
+ &original_function));
+ SIDESTEP_ASSERT((*original_function)(1) == 2);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(IncrementNumber,
+ (IncrementingFunc) p,
+ original_function));
+ ::VirtualFree(p, 0, MEM_RELEASE);
+ return true;
+}
+
+bool TestPatchWithPreambleShortCondJump() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(JumpShortCondFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(JumpShortCondFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchWithPreambleNearRelativeCondJump() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(JumpNearCondFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(0);
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(JumpNearCondFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchWithPreambleAbsoluteJump() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(JumpAbsoluteFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(0);
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(JumpAbsoluteFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchWithPreambleNearRelativeCall() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(
+ CallNearRelativeFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(0);
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(
+ CallNearRelativeFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchUsingDynamicStub() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ HookIncrementNumber,
+ &original_function));
+ SIDESTEP_EXPECT_TRUE(original_function);
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 4);
+ SIDESTEP_EXPECT_TRUE(original_function(3) == 4);
+
+ // Clearbox test to see that the function has been patched.
+ sidestep::MiniDisassembler disassembler;
+ unsigned int instruction_size = 0;
+ SIDESTEP_EXPECT_TRUE(sidestep::IT_JUMP == disassembler.Disassemble(
+ reinterpret_cast<unsigned char*>(IncrementNumber),
+ instruction_size));
+
+ // Since we patched IncrementNumber, its first statement is a
+ // jmp to the hook function. So verify that we now can not patch
+ // IncrementNumber because it starts with a jump.
+#if 0
+ IncrementingFunc dummy = NULL;
+ // TODO(joi@chromium.org): restore this test once flag is added to
+ // disable JMP following
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_JUMP_INSTRUCTION ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ HookIncrementNumber,
+ &dummy));
+
+ // This test disabled because code in preamble_patcher_with_stub.cc
+ // asserts before returning the error code -- so there is no way
+ // to get an error code here, in debug build.
+ dummy = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_FUNCTION_TOO_SMALL ==
+ sidestep::PreamblePatcher::Patch(TooShortFunction,
+ HookIncrementNumber,
+ &dummy));
+#endif
+
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(IncrementNumber,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool PatchThenUnpatch() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ HookIncrementNumber,
+ &original_function));
+ SIDESTEP_EXPECT_TRUE(original_function);
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 3);
+ SIDESTEP_EXPECT_TRUE(original_function(2) == 3);
+
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(IncrementNumber,
+ HookIncrementNumber,
+ original_function));
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+ return true;
+}
+
+bool AutoTestingHookTest() {
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+
+ // Inner scope, so we can test what happens when the AutoTestingHook
+ // goes out of scope
+ {
+ AutoTestingHook hook = MakeTestingHook(IncrementNumber,
+ AutoHookIncrementNumber);
+ (void) hook;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12);
+ }
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+ return true;
+}
+
+bool AutoTestingHookInContainerTest() {
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+
+ // Inner scope, so we can test what happens when the AutoTestingHook
+ // goes out of scope
+ {
+ AutoTestingHookHolder hook(MakeTestingHookHolder(IncrementNumber,
+ AutoHookIncrementNumber));
+ (void) hook;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12);
+ }
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+ return true;
+}
+
+bool TestPreambleAllocation() {
+ __int64 diff = 0;
+ void* p1 = reinterpret_cast<void*>(0x110000000);
+ void* p2 = reinterpret_cast<void*>(0x810000000);
+ unsigned char* b1 = PreamblePatcher::AllocPreambleBlockNear(p1);
+ SIDESTEP_EXPECT_TRUE(b1 != NULL);
+ diff = reinterpret_cast<__int64>(p1) - reinterpret_cast<__int64>(b1);
+ // Ensure blocks are within 2GB
+ SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN);
+ unsigned char* b2 = PreamblePatcher::AllocPreambleBlockNear(p2);
+ SIDESTEP_EXPECT_TRUE(b2 != NULL);
+ diff = reinterpret_cast<__int64>(p2) - reinterpret_cast<__int64>(b2);
+ SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN);
+
+ // Ensure we're reusing free blocks
+ unsigned char* b3 = b1;
+ unsigned char* b4 = b2;
+ PreamblePatcher::FreePreambleBlock(b1);
+ PreamblePatcher::FreePreambleBlock(b2);
+ b1 = PreamblePatcher::AllocPreambleBlockNear(p1);
+ SIDESTEP_EXPECT_TRUE(b1 == b3);
+ b2 = PreamblePatcher::AllocPreambleBlockNear(p2);
+ SIDESTEP_EXPECT_TRUE(b2 == b4);
+ PreamblePatcher::FreePreambleBlock(b1);
+ PreamblePatcher::FreePreambleBlock(b2);
+
+ return true;
+}
+
+bool UnitTests() {
+ return TestPatchWithPreambleNearRelativeCall() &&
+ TestPatchWithPreambleAbsoluteJump() &&
+ TestPatchWithPreambleNearRelativeCondJump() &&
+ TestPatchWithPreambleShortCondJump() &&
+ TestDisassembler() && TestPatchWithLongJump() &&
+ TestPatchUsingDynamicStub() && PatchThenUnpatch() &&
+ AutoTestingHookTest() && AutoTestingHookInContainerTest() &&
+ TestPreambleAllocation();
+}
+
+}; // namespace sidestep
+
+int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
+ if (size == 0) // not even room for a \0?
+ return -1; // not what C99 says to do, but what windows does
+ str[size-1] = '\0';
+ return _vsnprintf(str, size-1, format, ap);
+}
+
+int _tmain(int argc, _TCHAR* argv[])
+{
+ bool ret = sidestep::UnitTests();
+ printf("%s\n", ret ? "PASS" : "FAIL");
+ return ret ? 0 : -1;
+}
+
+#pragma optimize("", on)
diff --git a/src/windows/preamble_patcher_with_stub.cc b/src/windows/preamble_patcher_with_stub.cc
index 4eb391d..b0dc393 100644
--- a/src/windows/preamble_patcher_with_stub.cc
+++ b/src/windows/preamble_patcher_with_stub.cc
@@ -29,6 +29,7 @@
*
* ---
* Author: Joi Sigurdsson
+ * Author: Scott Francis
*
* Implementation of PreamblePatcher
*/
@@ -40,12 +41,20 @@
// Definitions of assembly statements we need
#define ASM_JMP32REL 0xE9
#define ASM_INT3 0xCC
+#define ASM_NOP 0x90
+// X64 opcodes
+#define ASM_MOVRAX_IMM 0xB8
+#define ASM_REXW 0x48
+#define ASM_JMP 0xFF
+#define ASM_JMP_RAX 0xE0
+#define ASM_PUSH 0x68
+#define ASM_RET 0xC3
namespace sidestep {
SideStepError PreamblePatcher::RawPatchWithStub(
void* target_function,
- void *replacement_function,
+ void* replacement_function,
unsigned char* preamble_stub,
unsigned long stub_size,
unsigned long* bytes_needed) {
@@ -75,23 +84,52 @@ SideStepError PreamblePatcher::RawPatchWithStub(
// doing it atomically does not help if one of the other threads happens
// to have its eip in the middle of the bytes you change while you change
// them.
+ unsigned char* target = reinterpret_cast<unsigned char*>(target_function);
+ unsigned int required_trampoline_bytes = 0;
+ const unsigned int kRequiredStubJumpBytes = 5;
+ const unsigned int kRequiredTargetPatchBytes = 5;
- // First, deal with a special case that we see with functions that
- // point into an IAT table (including functions linked statically
- // into the application): these function already starts with
- // ASM_JMP32REL. For instance, malloc() might be implemented as a
- // JMP to __malloc(). In that case, we replace the destination of
- // the JMP (__malloc), rather than the JMP itself (malloc). This
- // way we get the correct behavior no matter how malloc gets called.
- void *new_target = ResolveTarget(target_function);
- if (new_target != target_function) { // we're in the IAT case
- // I'd like to just say "target = new_target", but I can't,
- // because the new target will need to have its protections set.
- return RawPatchWithStubAndProtections(new_target, replacement_function,
- preamble_stub, stub_size,
- bytes_needed);
+ // Initialize the stub with INT3's just in case.
+ if (stub_size) {
+ memset(preamble_stub, 0xcc, stub_size);
+ }
+ if (kIs64BitBinary) {
+ // In 64-bit mode JMP instructions are always relative to RIP. If the
+ // replacement - target offset is > 2GB, we can't JMP to the replacement
+ // function. In this case, we're going to use a trampoline - that is,
+ // we're going to do a relative jump to a small chunk of code in the stub
+ // that will then do the absolute jump to the replacement function. By
+ // doing this, we only need to patch 5 bytes in the target function, as
+ // opposed to patching 12 bytes if we were to do an absolute jump.
+ //
+ // Note that the first byte of the trampoline is a NOP instruction. This
+ // is used as a trampoline signature that will be detected when unpatching
+ // the function.
+ //
+ // jmp <trampoline>
+ //
+ // trampoline:
+ // nop
+ // mov rax, <replacement_function>
+ // jmp rax
+ //
+ __int64 replacement_target_offset = reinterpret_cast<__int64>(
+ replacement_function) - reinterpret_cast<__int64>(target) - 5;
+ if (replacement_target_offset > INT_MAX
+ || replacement_target_offset < INT_MIN) {
+ // The stub needs to be within 2GB of the target for the trampoline to
+ // work!
+ __int64 trampoline_offset = reinterpret_cast<__int64>(preamble_stub)
+ - reinterpret_cast<__int64>(target) - 5;
+ if (trampoline_offset > INT_MAX || trampoline_offset < INT_MIN) {
+ // We're screwed.
+ SIDESTEP_ASSERT(false
+ && "Preamble stub is too far from target to patch.");
+ return SIDESTEP_UNEXPECTED;
+ }
+ required_trampoline_bytes = 13;
+ }
}
- unsigned char* target = reinterpret_cast<unsigned char*>(new_target);
// Let's disassemble the preamble of the target function to see if we can
// patch, and to see how much of the preamble we need to take. We need 5
@@ -99,42 +137,76 @@ SideStepError PreamblePatcher::RawPatchWithStub(
// instructions to get 5 bytes.
MiniDisassembler disassembler;
unsigned int preamble_bytes = 0;
- while (preamble_bytes < 5) {
+ unsigned int stub_bytes = 0;
+ while (preamble_bytes < kRequiredTargetPatchBytes) {
+ unsigned int cur_bytes = 0;
InstructionType instruction_type =
- disassembler.Disassemble(target + preamble_bytes, preamble_bytes);
+ disassembler.Disassemble(target + preamble_bytes, cur_bytes);
if (IT_JUMP == instruction_type) {
- SIDESTEP_ASSERT(false &&
- "Unable to patch because there is a jump instruction "
- "in the first 5 bytes.");
- return SIDESTEP_JUMP_INSTRUCTION;
+ unsigned int jump_bytes = 0;
+ SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION;
+ if (IsShortConditionalJump(target + preamble_bytes, cur_bytes)) {
+ jump_ret = PatchShortConditionalJump(target + preamble_bytes, cur_bytes,
+ preamble_stub + stub_bytes,
+ &jump_bytes,
+ stub_size - stub_bytes);
+ } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) ||
+ IsNearRelativeJump(target + preamble_bytes, cur_bytes) ||
+ IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) ||
+ IsNearRelativeCall(target + preamble_bytes, cur_bytes)) {
+ jump_ret = PatchNearJumpOrCall(target + preamble_bytes, cur_bytes,
+ preamble_stub + stub_bytes, &jump_bytes,
+ stub_size - stub_bytes);
+ }
+ if (jump_ret != SIDESTEP_SUCCESS) {
+ SIDESTEP_ASSERT(false &&
+ "Unable to patch because there is an unhandled branch "
+ "instruction in the initial preamble bytes.");
+ return SIDESTEP_JUMP_INSTRUCTION;
+ }
+ stub_bytes += jump_bytes;
} else if (IT_RETURN == instruction_type) {
SIDESTEP_ASSERT(false &&
"Unable to patch because function is too short");
return SIDESTEP_FUNCTION_TOO_SMALL;
- } else if (IT_GENERIC != instruction_type) {
+ } else if (IT_GENERIC == instruction_type) {
+ if (IsMovWithDisplacement(target + preamble_bytes, cur_bytes)) {
+ unsigned int mov_bytes = 0;
+ if (PatchMovWithDisplacement(target + preamble_bytes, cur_bytes,
+ preamble_stub + stub_bytes, &mov_bytes,
+ stub_size - stub_bytes)
+ != SIDESTEP_SUCCESS) {
+ return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+ }
+ stub_bytes += mov_bytes;
+ } else {
+ memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes),
+ reinterpret_cast<void*>(target + preamble_bytes), cur_bytes);
+ stub_bytes += cur_bytes;
+ }
+ } else {
SIDESTEP_ASSERT(false &&
"Disassembler encountered unsupported instruction "
"(either unused or unknown");
return SIDESTEP_UNSUPPORTED_INSTRUCTION;
}
+ preamble_bytes += cur_bytes;
}
if (NULL != bytes_needed)
- *bytes_needed = preamble_bytes + 5;
+ *bytes_needed = stub_bytes + kRequiredStubJumpBytes
+ + required_trampoline_bytes;
// Inv: cbPreamble is the number of bytes (at least 5) that we need to take
// from the preamble to have whole instructions that are 5 bytes or more
- // in size total. The size of the stub required is cbPreamble + size of
- // jmp (5)
- if (preamble_bytes + 5 > stub_size) {
+ // in size total. The size of the stub required is cbPreamble +
+ // kRequiredStubJumpBytes (5) + required_trampoline_bytes (0 or 13)
+ if (stub_bytes + kRequiredStubJumpBytes + required_trampoline_bytes
+ > stub_size) {
SIDESTEP_ASSERT(false);
return SIDESTEP_INSUFFICIENT_BUFFER;
}
- // First, copy the preamble that we will overwrite.
- memcpy(reinterpret_cast<void*>(preamble_stub),
- reinterpret_cast<void*>(target), preamble_bytes);
-
// Now, make a jmp instruction to the rest of the target function (minus the
// preamble bytes we moved into the stub) and copy it into our preamble-stub.
// find address to jump to, relative to next address after jmp instruction
@@ -144,16 +216,32 @@ SideStepError PreamblePatcher::RawPatchWithStub(
#endif
int relative_offset_to_target_rest
= ((reinterpret_cast<unsigned char*>(target) + preamble_bytes) -
- (preamble_stub + preamble_bytes + 5));
+ (preamble_stub + stub_bytes + kRequiredStubJumpBytes));
#ifdef _MSC_VER
#pragma warning(pop)
#endif
// jmp (Jump near, relative, displacement relative to next instruction)
- preamble_stub[preamble_bytes] = ASM_JMP32REL;
+ preamble_stub[stub_bytes] = ASM_JMP32REL;
// copy the address
- memcpy(reinterpret_cast<void*>(preamble_stub + preamble_bytes + 1),
+ memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes + 1),
reinterpret_cast<void*>(&relative_offset_to_target_rest), 4);
+ if (kIs64BitBinary && required_trampoline_bytes != 0) {
+ // Construct the trampoline
+ unsigned int trampoline_pos = stub_bytes + kRequiredStubJumpBytes;
+ preamble_stub[trampoline_pos] = ASM_NOP;
+ preamble_stub[trampoline_pos + 1] = ASM_REXW;
+ preamble_stub[trampoline_pos + 2] = ASM_MOVRAX_IMM;
+ memcpy(reinterpret_cast<void*>(preamble_stub + trampoline_pos + 3),
+ reinterpret_cast<void*>(&replacement_function),
+ sizeof(void *));
+ preamble_stub[trampoline_pos + 11] = ASM_JMP;
+ preamble_stub[trampoline_pos + 12] = ASM_JMP_RAX;
+
+ // Now update replacement_function to point to the trampoline
+ replacement_function = preamble_stub + trampoline_pos;
+ }
+
// Inv: preamble_stub points to assembly code that will execute the
// original function by first executing the first cbPreamble bytes of the
// preamble, then jumping to the rest of the function.
@@ -177,6 +265,7 @@ SideStepError PreamblePatcher::RawPatchWithStub(
// complete the jmp instruction
memcpy(reinterpret_cast<void*>(target + 1),
reinterpret_cast<void*>(&offset_to_replacement_function), 4);
+
// Set any remaining bytes that were moved to the preamble-stub to INT3 so
// as not to cause confusion (otherwise you might see some strange
// instructions if you look at the disassembly, or even invalid
@@ -184,8 +273,9 @@ SideStepError PreamblePatcher::RawPatchWithStub(
// some code calls into this portion of the code. If this happens, it
// means that this function cannot be patched using this patcher without
// further thought.
- if (preamble_bytes > 5) {
- memset(reinterpret_cast<void*>(target + 5), ASM_INT3, preamble_bytes - 5);
+ if (preamble_bytes > kRequiredTargetPatchBytes) {
+ memset(reinterpret_cast<void*>(target + kRequiredTargetPatchBytes),
+ ASM_INT3, preamble_bytes - kRequiredTargetPatchBytes);
}
// Inv: The memory pointed to by target_function now points to a relative
@@ -193,7 +283,13 @@ SideStepError PreamblePatcher::RawPatchWithStub(
// stub contains the first stub_size bytes of the original target
// function's preamble code, followed by a relative jump back to the next
// instruction after the first cbPreamble bytes.
-
+ //
+ // In 64-bit mode the memory pointed to by target_function *may* point to a
+ // relative jump instruction that jumps to a trampoline which will then
+ // perform an absolute jump to the replacement function. The preamble stub
+ // still contains the original target function's preamble code, followed by a
+ // jump back to the instructions after the first preamble bytes.
+ //
return SIDESTEP_SUCCESS;
}
diff --git a/src/windows/shortproc.asm b/src/windows/shortproc.asm
new file mode 100644
index 0000000..757fd43
--- /dev/null
+++ b/src/windows/shortproc.asm
@@ -0,0 +1,167 @@
+; Copyright (c) 2011, Google Inc.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above
+; copyright notice, this list of conditions and the following disclaimer
+; in the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Google Inc. nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+; ---
+; Author: Scott Francis
+;
+; Unit tests for PreamblePatcher
+
+.CODE
+
+TooShortFunction PROC
+ ret
+TooShortFunction ENDP
+
+JumpShortCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+jumpspot:
+ nop
+ nop
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpShortCondFunction ENDP
+
+JumpNearCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpNearCondFunction ENDP
+
+JumpAbsoluteFunction PROC
+ test cl, 1
+ jmp jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpAbsoluteFunction ENDP
+
+CallNearRelativeFunction PROC
+ test cl, 1
+ call TooShortFunction
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ nop
+ nop
+ nop
+ ret
+CallNearRelativeFunction ENDP
+
+END