diff options
32 files changed, 1916 insertions, 1359 deletions
@@ -1,4 +1,14 @@ -== 23 December 2011 == +== 24 January 2011 == + +The `google-perftools` Google Code page has been renamed to +`gperftools`, in preparation for the project being renamed to +`gperftools`. In the coming weeks, I'll be stepping down as +maintainer for the perftools project, and as part of that Google is +relinquishing ownership of the project; it will now be entirely +community run. The name change reflects that shift. The 'g' in +'gperftools' stands for 'great'. :-) + +=== 23 December 2011 === I've just released perftools 1.9.1 @@ -7,7 +17,7 @@ ARM. If you are not compiling on ARM, or have successfully compiled perftools 1.9, there is no need to upgrade. -== 22 December 2011 == +=== 22 December 2011 === I've just released perftools 1.9 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.65 for google-perftools 1.9.1. +# Generated by GNU Autoconf 2.65 for google-perftools 1.10. # # Report bugs to <opensource@google.com>. # @@ -701,8 +701,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='google-perftools' PACKAGE_TARNAME='google-perftools' -PACKAGE_VERSION='1.9.1' -PACKAGE_STRING='google-perftools 1.9.1' +PACKAGE_VERSION='1.10' +PACKAGE_STRING='google-perftools 1.10' PACKAGE_BUGREPORT='opensource@google.com' PACKAGE_URL='' @@ -1474,7 +1474,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures google-perftools 1.9.1 to adapt to many kinds of systems. +\`configure' configures google-perftools 1.10 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1545,7 +1545,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of google-perftools 1.9.1:";; + short | recursive ) echo "Configuration of google-perftools 1.10:";; esac cat <<\_ACEOF @@ -1655,7 +1655,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -google-perftools configure 1.9.1 +google-perftools configure 1.10 generated by GNU Autoconf 2.65 Copyright (C) 2009 Free Software Foundation, Inc. @@ -2240,7 +2240,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by google-perftools $as_me 1.9.1, which was +It was created by google-perftools $as_me 1.10, which was generated by GNU Autoconf 2.65. Invocation command line was $ $0 $@ @@ -2985,7 +2985,7 @@ fi # Define the identity of the package. PACKAGE='google-perftools' - VERSION='1.9.1' + VERSION='1.10' cat >>confdefs.h <<_ACEOF @@ -17785,7 +17785,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by google-perftools $as_me 1.9.1, which was +This file was extended by google-perftools $as_me 1.10, which was generated by GNU Autoconf 2.65. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -17851,7 +17851,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -google-perftools config.status 1.9.1 +google-perftools config.status 1.10 configured by $0, generated by GNU Autoconf 2.65, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index ab25509..eb8ba7c 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ # make sure we're interpreted by some minimal autoconf AC_PREREQ(2.57) -AC_INIT(google-perftools, 1.9.1, opensource@google.com) +AC_INIT(google-perftools, 1.10, opensource@google.com) # Update this value for every release! (A:B:C will map to foo.so.(A-C).C.B) # http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html TCMALLOC_SO_VERSION=3:0:3 diff --git a/doc/cpuprofile.html b/doc/cpuprofile.html index f029e78..78ee2a6 100644 --- a/doc/cpuprofile.html +++ b/doc/cpuprofile.html @@ -50,9 +50,10 @@ for a given run of an executable:</p> <ol> <li> <p>Define the environment variable CPUPROFILE to the filename - to dump the profile to. For instance, to profile - <code>/usr/local/bin/my_binary_compiled_with_libprofiler_so</code>:</p> - <pre>% env CPUPROFILE=/tmp/mybin.prof /usr/local/bin/my_binary_compiled_with_libprofiler_so</pre> + to dump the profile to. For instance, if you had a version of + <code>/bin/ls</code> that had been linked against libprofiler, + you could run:</p> + <pre>% env CPUPROFILE=ls.prof /bin/ls</pre> <li> <p>In your code, bracket the code you want profiled in calls to <code>ProfilerStart()</code> and <code>ProfilerStop()</code>. diff --git a/src/base/atomicops-internals-arm-v6plus.h b/src/base/atomicops-internals-arm-v6plus.h index e197cac..8d5b9b5 100644 --- a/src/base/atomicops-internals-arm-v6plus.h +++ b/src/base/atomicops-internals-arm-v6plus.h @@ -67,6 +67,10 @@ inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, "ldrex %1, [%3]\n" "mov %0, #0\n" "teq %1, %4\n" + // The following IT (if-then) instruction is needed for the subsequent + // conditional instruction STREXEQ when compiling in THUMB mode. + // In ARM mode, the compiler/assembler will not generate any code for it. + "it eq\n" "strexeq %0, %5, [%3]\n" : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr) : "r" (ptr), "Ir" (old_value), "r" (new_value) @@ -186,7 +190,12 @@ inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, "ldrexd %1, [%3]\n" "mov %0, #0\n" "teq %Q1, %Q4\n" + // The following IT (if-then) instructions are needed for the subsequent + // conditional instructions when compiling in THUMB mode. + // In ARM mode, the compiler/assembler will not generate any code for it. + "it eq\n" "teqeq %R1, %R4\n" + "it eq\n" "strexdeq %0, %5, [%3]\n" : "=&r" (res), "=&r" (oldval), "+Q" (*ptr) : "r" (ptr), "Ir" (old_value), "r" (new_value) diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h index f52f16e..99dac9e 100644 --- a/src/base/linux_syscall_support.h +++ b/src/base/linux_syscall_support.h @@ -69,6 +69,63 @@ * This file defines a few internal symbols that all start with "LSS_". * Do not access these symbols from outside this file. They are not part * of the supported API. + * + * NOTE: This is a stripped down version of the official opensource + * version of linux_syscall_support.h, which lives at + * http://code.google.com/p/linux-syscall-support/ + * It includes only the syscalls that are used in perftools, plus a + * few extra. Here's the breakdown: + * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u + * sys__exit( + * sys_clone( + * sys_close( + * sys_fcntl( + * sys_fstat( + * sys_futex( + * sys_futex1( + * sys_getcpu( + * sys_getdents( + * sys_getppid( + * sys_gettid( + * sys_lseek( + * sys_mmap( + * sys_mremap( + * sys_munmap( + * sys_open( + * sys_pipe( + * sys_prctl( + * sys_ptrace( + * sys_ptrace_detach( + * sys_read( + * sys_sched_yield( + * sys_sigaction( + * sys_sigaltstack( + * sys_sigdelset( + * sys_sigfillset( + * sys_sigprocmask( + * sys_socket( + * sys_stat( + * sys_waitpid( + * 2) These are used as subroutines of the above: + * sys_getpid -- gettid + * sys_kill -- ptrace_detach + * sys_restore -- sigaction + * sys_restore_rt -- sigaction + * sys_socketcall -- socket + * sys_wait4 -- waitpid + * 3) I left these in even though they're not used. They either + * complement the above (write vs read) or are variants (rt_sigaction): + * sys_fstat64 + * sys_getdents64 + * sys_llseek + * sys_mmap2 + * sys_openat + * sys_rt_sigaction + * sys_rt_sigprocmask + * sys_sigaddset + * sys_sigemptyset + * sys_stat64 + * sys_write */ #ifndef SYS_LINUX_SYSCALL_SUPPORT_H #define SYS_LINUX_SYSCALL_SUPPORT_H @@ -154,36 +211,6 @@ struct kernel_dirent { char d_name[256]; }; -/* include/linux/uio.h */ -struct kernel_iovec { - void *iov_base; - unsigned long iov_len; -}; - -/* include/linux/socket.h */ -struct kernel_msghdr { - void *msg_name; - int msg_namelen; - struct kernel_iovec*msg_iov; - unsigned long msg_iovlen; - void *msg_control; - unsigned long msg_controllen; - unsigned msg_flags; -}; - -/* include/asm-generic/poll.h */ -struct kernel_pollfd { - int fd; - short events; - short revents; -}; - -/* include/linux/resource.h */ -struct kernel_rlimit { - unsigned long rlim_cur; - unsigned long rlim_max; -}; - /* include/linux/time.h */ struct kernel_timespec { long tv_sec; @@ -274,12 +301,6 @@ struct kernel_sigaction { #endif }; -/* include/linux/socket.h */ -struct kernel_sockaddr { - unsigned short sa_family; - char sa_data[14]; -}; - /* include/asm-{arm,i386,mips,ppc}/stat.h */ #ifdef __mips__ #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -449,74 +470,6 @@ struct kernel_stat { }; #endif -/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h */ -#ifdef __mips__ -#if _MIPS_SIM != _MIPS_SIM_ABI64 -struct kernel_statfs64 { - unsigned long f_type; - unsigned long f_bsize; - unsigned long f_frsize; - unsigned long __pad; - unsigned long long f_blocks; - unsigned long long f_bfree; - unsigned long long f_files; - unsigned long long f_ffree; - unsigned long long f_bavail; - struct { int val[2]; } f_fsid; - unsigned long f_namelen; - unsigned long f_spare[6]; -}; -#endif -#elif !defined(__x86_64__) -struct kernel_statfs64 { - unsigned long f_type; - unsigned long f_bsize; - unsigned long long f_blocks; - unsigned long long f_bfree; - unsigned long long f_bavail; - unsigned long long f_files; - unsigned long long f_ffree; - struct { int val[2]; } f_fsid; - unsigned long f_namelen; - unsigned long f_frsize; - unsigned long f_spare[5]; -}; -#endif - -/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h */ -#ifdef __mips__ -struct kernel_statfs { - long f_type; - long f_bsize; - long f_frsize; - long f_blocks; - long f_bfree; - long f_files; - long f_ffree; - long f_bavail; - struct { int val[2]; } f_fsid; - long f_namelen; - long f_spare[6]; -}; -#else -struct kernel_statfs { - /* x86_64 actually defines all these fields as signed, whereas all other */ - /* platforms define them as unsigned. Leaving them at unsigned should not */ - /* cause any problems. */ - unsigned long f_type; - unsigned long f_bsize; - unsigned long f_blocks; - unsigned long f_bfree; - unsigned long f_bavail; - unsigned long f_files; - unsigned long f_ffree; - struct { int val[2]; } f_fsid; - unsigned long f_namelen; - unsigned long f_frsize; - unsigned long f_spare[5]; -}; -#endif - /* Definitions missing from the standard header files */ #ifndef O_DIRECTORY @@ -526,12 +479,6 @@ struct kernel_statfs { #define O_DIRECTORY 0200000 #endif #endif -#ifndef NT_PRXFPREG -#define NT_PRXFPREG 0x46e62b7f -#endif -#ifndef PTRACE_GETFPXREGS -#define PTRACE_GETFPXREGS ((enum __ptrace_request)18) -#endif #ifndef PR_GET_DUMPABLE #define PR_GET_DUMPABLE 3 #endif @@ -553,46 +500,11 @@ struct kernel_statfs { #ifndef SA_RESTORER #define SA_RESTORER 0x04000000 #endif -#ifndef CPUCLOCK_PROF -#define CPUCLOCK_PROF 0 -#endif -#ifndef CPUCLOCK_VIRT -#define CPUCLOCK_VIRT 1 -#endif -#ifndef CPUCLOCK_SCHED -#define CPUCLOCK_SCHED 2 -#endif -#ifndef CPUCLOCK_PERTHREAD_MASK -#define CPUCLOCK_PERTHREAD_MASK 4 -#endif -#ifndef MAKE_PROCESS_CPUCLOCK -#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ - ((~(int)(pid) << 3) | (int)(clock)) -#endif -#ifndef MAKE_THREAD_CPUCLOCK -#define MAKE_THREAD_CPUCLOCK(tid, clock) \ - ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK)) -#endif #if defined(__i386__) -#ifndef __NR_setresuid -#define __NR_setresuid 164 -#define __NR_setresgid 170 -#endif #ifndef __NR_rt_sigaction #define __NR_rt_sigaction 174 #define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigsuspend 179 -#endif -#ifndef __NR_pread64 -#define __NR_pread64 180 -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 181 -#endif -#ifndef __NR_ugetrlimit -#define __NR_ugetrlimit 191 #endif #ifndef __NR_stat64 #define __NR_stat64 195 @@ -600,121 +512,43 @@ struct kernel_statfs { #ifndef __NR_fstat64 #define __NR_fstat64 197 #endif -#ifndef __NR_setresuid32 -#define __NR_setresuid32 208 -#define __NR_setresgid32 210 -#endif -#ifndef __NR_setfsuid32 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#endif #ifndef __NR_getdents64 #define __NR_getdents64 220 #endif #ifndef __NR_gettid #define __NR_gettid 224 #endif -#ifndef __NR_readahead -#define __NR_readahead 225 -#endif -#ifndef __NR_setxattr -#define __NR_setxattr 226 -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr 227 -#endif -#ifndef __NR_getxattr -#define __NR_getxattr 229 -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr 230 -#endif -#ifndef __NR_listxattr -#define __NR_listxattr 232 -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr 233 -#endif #ifndef __NR_futex #define __NR_futex 240 #endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 241 -#define __NR_sched_getaffinity 242 -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address 258 -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime 265 -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres 266 -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 268 -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 269 -#endif -#ifndef __NR_fadvise64_64 -#define __NR_fadvise64_64 272 -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set 289 -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get 290 -#endif #ifndef __NR_openat #define __NR_openat 295 #endif -#ifndef __NR_fstatat64 -#define __NR_fstatat64 300 -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat 301 -#endif -#ifndef __NR_move_pages -#define __NR_move_pages 317 -#endif #ifndef __NR_getcpu #define __NR_getcpu 318 #endif -#ifndef __NR_fallocate -#define __NR_fallocate 324 -#endif /* End of i386 definitions */ #elif defined(__arm__) #ifndef __syscall #if defined(__thumb__) || defined(__ARM_EABI__) -#define __SYS_REG(name) register long __sysreg __asm__("r7") = __NR_##name; -#define __SYS_REG_LIST(regs...) "r" (__sysreg) , ##regs +#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name; +#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs #define __syscall(name) "swi\t0" +#define __syscall_safe(name) \ + "push {r7}\n" \ + "mov r7,%[sysreg]\n" \ + __syscall(name)"\n" \ + "pop {r7}" #else #define __SYS_REG(name) #define __SYS_REG_LIST(regs...) regs #define __syscall(name) "swi\t" __sys1(__NR_##name) "" +#define __syscall_safe(name) __syscall(name) #endif #endif -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_SYSCALL_BASE + 164) -#define __NR_setresgid (__NR_SYSCALL_BASE + 170) -#endif #ifndef __NR_rt_sigaction #define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) #define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) -#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) -#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) -#endif -#ifndef __NR_pread64 -#define __NR_pread64 (__NR_SYSCALL_BASE + 180) -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) -#endif -#ifndef __NR_ugetrlimit -#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) #endif #ifndef __NR_stat64 #define __NR_stat64 (__NR_SYSCALL_BASE + 195) @@ -722,169 +556,35 @@ struct kernel_statfs { #ifndef __NR_fstat64 #define __NR_fstat64 (__NR_SYSCALL_BASE + 197) #endif -#ifndef __NR_setresuid32 -#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) -#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) -#endif -#ifndef __NR_setfsuid32 -#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215) -#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216) -#endif #ifndef __NR_getdents64 #define __NR_getdents64 (__NR_SYSCALL_BASE + 217) #endif #ifndef __NR_gettid #define __NR_gettid (__NR_SYSCALL_BASE + 224) #endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_SYSCALL_BASE + 225) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_SYSCALL_BASE + 226) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_SYSCALL_BASE + 229) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_SYSCALL_BASE + 232) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_SYSCALL_BASE + 233) -#endif #ifndef __NR_futex #define __NR_futex (__NR_SYSCALL_BASE + 240) #endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) -#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_SYSCALL_BASE + 264) -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 (__NR_SYSCALL_BASE + 266) -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267) -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_SYSCALL_BASE + 344) -#endif /* End of ARM definitions */ #elif defined(__x86_64__) -#ifndef __NR_pread64 -#define __NR_pread64 17 -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 18 -#endif -#ifndef __NR_setresuid -#define __NR_setresuid 117 -#define __NR_setresgid 119 -#endif #ifndef __NR_gettid #define __NR_gettid 186 #endif -#ifndef __NR_readahead -#define __NR_readahead 187 -#endif -#ifndef __NR_setxattr -#define __NR_setxattr 188 -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr 189 -#endif -#ifndef __NR_getxattr -#define __NR_getxattr 191 -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr 192 -#endif -#ifndef __NR_listxattr -#define __NR_listxattr 194 -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr 195 -#endif #ifndef __NR_futex #define __NR_futex 202 #endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 203 -#define __NR_sched_getaffinity 204 -#endif #ifndef __NR_getdents64 #define __NR_getdents64 217 #endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address 218 -#endif -#ifndef __NR_fadvise64 -#define __NR_fadvise64 221 -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime 228 -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres 229 -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set 251 -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get 252 -#endif #ifndef __NR_openat #define __NR_openat 257 #endif -#ifndef __NR_newfstatat -#define __NR_newfstatat 262 -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat 263 -#endif -#ifndef __NR_move_pages -#define __NR_move_pages 279 -#endif -#ifndef __NR_fallocate -#define __NR_fallocate 285 -#endif /* End of x86-64 definitions */ #elif defined(__mips__) #if _MIPS_SIM == _MIPS_SIM_ABI32 -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_Linux + 185) -#define __NR_setresgid (__NR_Linux + 190) -#endif #ifndef __NR_rt_sigaction #define __NR_rt_sigaction (__NR_Linux + 194) #define __NR_rt_sigprocmask (__NR_Linux + 195) -#define __NR_rt_sigpending (__NR_Linux + 196) -#define __NR_rt_sigsuspend (__NR_Linux + 199) -#endif -#ifndef __NR_pread64 -#define __NR_pread64 (__NR_Linux + 200) -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 (__NR_Linux + 201) #endif #ifndef __NR_stat64 #define __NR_stat64 (__NR_Linux + 213) @@ -898,245 +598,59 @@ struct kernel_statfs { #ifndef __NR_gettid #define __NR_gettid (__NR_Linux + 222) #endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_Linux + 223) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_Linux + 224) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_Linux + 225) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_Linux + 227) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_Linux + 228) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_Linux + 230) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_Linux + 231) -#endif #ifndef __NR_futex #define __NR_futex (__NR_Linux + 238) #endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_Linux + 239) -#define __NR_sched_getaffinity (__NR_Linux + 240) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_Linux + 252) -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 (__NR_Linux + 255) -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 (__NR_Linux + 256) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_Linux + 263) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_Linux + 264) -#endif #ifndef __NR_openat #define __NR_openat (__NR_Linux + 288) #endif #ifndef __NR_fstatat #define __NR_fstatat (__NR_Linux + 293) #endif -#ifndef __NR_unlinkat -#define __NR_unlinkat (__NR_Linux + 294) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_Linux + 308) -#endif #ifndef __NR_getcpu #define __NR_getcpu (__NR_Linux + 312) #endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_Linux + 314) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_Linux + 315) -#endif /* End of MIPS (old 32bit API) definitions */ #elif _MIPS_SIM == _MIPS_SIM_ABI64 -#ifndef __NR_pread64 -#define __NR_pread64 (__NR_Linux + 16) -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 (__NR_Linux + 17) -#endif -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_Linux + 115) -#define __NR_setresgid (__NR_Linux + 117) -#endif #ifndef __NR_gettid #define __NR_gettid (__NR_Linux + 178) #endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_Linux + 179) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_Linux + 180) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_Linux + 181) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_Linux + 183) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_Linux + 184) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_Linux + 186) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_Linux + 187) -#endif #ifndef __NR_futex #define __NR_futex (__NR_Linux + 194) #endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_Linux + 195) -#define __NR_sched_getaffinity (__NR_Linux + 196) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_Linux + 212) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_Linux + 222) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_Linux + 223) -#endif #ifndef __NR_openat #define __NR_openat (__NR_Linux + 247) #endif #ifndef __NR_fstatat #define __NR_fstatat (__NR_Linux + 252) #endif -#ifndef __NR_unlinkat -#define __NR_unlinkat (__NR_Linux + 253) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_Linux + 267) -#endif #ifndef __NR_getcpu #define __NR_getcpu (__NR_Linux + 271) #endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_Linux + 273) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_Linux + 274) -#endif /* End of MIPS (64bit API) definitions */ #else -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_Linux + 115) -#define __NR_setresgid (__NR_Linux + 117) -#endif #ifndef __NR_gettid #define __NR_gettid (__NR_Linux + 178) #endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_Linux + 179) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_Linux + 180) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_Linux + 181) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_Linux + 183) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_Linux + 184) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_Linux + 186) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_Linux + 187) -#endif #ifndef __NR_futex #define __NR_futex (__NR_Linux + 194) #endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_Linux + 195) -#define __NR_sched_getaffinity (__NR_Linux + 196) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_Linux + 213) -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 (__NR_Linux + 217) -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 (__NR_Linux + 218) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_Linux + 226) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_Linux + 227) -#endif #ifndef __NR_openat #define __NR_openat (__NR_Linux + 251) #endif #ifndef __NR_fstatat #define __NR_fstatat (__NR_Linux + 256) #endif -#ifndef __NR_unlinkat -#define __NR_unlinkat (__NR_Linux + 257) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_Linux + 271) -#endif #ifndef __NR_getcpu #define __NR_getcpu (__NR_Linux + 275) #endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_Linux + 277) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_Linux + 278) -#endif /* End of MIPS (new 32bit API) definitions */ #endif /* End of MIPS definitions */ #elif defined(__PPC__) -#ifndef __NR_setfsuid -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#endif -#ifndef __NR_setresuid -#define __NR_setresuid 164 -#define __NR_setresgid 169 -#endif #ifndef __NR_rt_sigaction #define __NR_rt_sigaction 173 #define __NR_rt_sigprocmask 174 -#define __NR_rt_sigpending 175 -#define __NR_rt_sigsuspend 178 -#endif -#ifndef __NR_pread64 -#define __NR_pread64 179 -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 180 -#endif -#ifndef __NR_ugetrlimit -#define __NR_ugetrlimit 190 -#endif -#ifndef __NR_readahead -#define __NR_readahead 191 #endif #ifndef __NR_stat64 #define __NR_stat64 195 @@ -1150,67 +664,12 @@ struct kernel_statfs { #ifndef __NR_gettid #define __NR_gettid 207 #endif -#ifndef __NR_setxattr -#define __NR_setxattr 209 -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr 210 -#endif -#ifndef __NR_getxattr -#define __NR_getxattr 212 -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr 213 -#endif -#ifndef __NR_listxattr -#define __NR_listxattr 215 -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr 216 -#endif #ifndef __NR_futex #define __NR_futex 221 #endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 222 -#define __NR_sched_getaffinity 223 -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address 232 -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime 246 -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres 247 -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 252 -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 253 -#endif -#ifndef __NR_fadvise64_64 -#define __NR_fadvise64_64 254 -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set 273 -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get 274 -#endif #ifndef __NR_openat #define __NR_openat 286 #endif -#ifndef __NR_fstatat64 -#define __NR_fstatat64 291 -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat 292 -#endif -#ifndef __NR_move_pages -#define __NR_move_pages 301 -#endif #ifndef __NR_getcpu #define __NR_getcpu 302 #endif @@ -1315,6 +774,15 @@ struct kernel_statfs { } while (0) #endif #if defined(__i386__) + #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) + /* This only works for GCC-4.4 and above -- the first version to use + .cfi directives for dwarf unwind info. */ + #define CFI_ADJUST_CFA_OFFSET(adjust) \ + ".cfi_adjust_cfa_offset " #adjust "\n" + #else + #define CFI_ADJUST_CFA_OFFSET(adjust) /**/ + #endif + /* In PIC mode (e.g. when building shared libraries), gcc for i386 * reserves ebx. Unfortunately, most distribution ship with implementations * of _syscallX() which clobber ebx. @@ -1327,11 +795,13 @@ struct kernel_statfs { #define LSS_BODY(type,args...) \ long __res; \ __asm__ __volatile__("push %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(4) \ "movl %2,%%ebx\n" \ "int $0x80\n" \ - "pop %%ebx" \ + "pop %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(-4) \ args \ - : "memory"); \ + : "esp", "memory"); \ LSS_RETURN(type,__res) #undef _syscall0 #define _syscall0(type,name) \ @@ -1388,7 +858,7 @@ struct kernel_statfs { : "i" (__NR_##name), "ri" ((long)(arg1)), \ "c" ((long)(arg2)), "d" ((long)(arg3)), \ "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "memory"); \ + : "esp", "memory"); \ LSS_RETURN(type,__res); \ } #undef _syscall6 @@ -1410,7 +880,7 @@ struct kernel_statfs { : "i" (__NR_##name), "0" ((long)(&__s)), \ "c" ((long)(arg2)), "d" ((long)(arg3)), \ "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "memory"); \ + : "esp", "memory"); \ LSS_RETURN(type,__res); \ } LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, @@ -1496,36 +966,10 @@ struct kernel_statfs { : "0"(-EINVAL), "i"(__NR_clone), "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) - : "memory", "ecx", "edx", "esi", "edi"); + : "esp", "memory", "ecx", "edx", "esi", "edi"); LSS_RETURN(int, __res); } - #define __NR__fadvise64_64 __NR_fadvise64_64 - LSS_INLINE _syscall6(int, _fadvise64_64, int, fd, - unsigned, offset_lo, unsigned, offset_hi, - unsigned, len_lo, unsigned, len_hi, - int, advice) - - LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, - loff_t len, int advice) { - return LSS_NAME(_fadvise64_64)(fd, - (unsigned)offset, (unsigned)(offset >>32), - (unsigned)len, (unsigned)(len >> 32), - advice); - } - - #define __NR__fallocate __NR_fallocate - LSS_INLINE _syscall6(int, _fallocate, int, fd, - int, mode, - unsigned, offset_lo, unsigned, offset_hi, - unsigned, len_lo, unsigned, len_hi) - - LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode, - loff_t offset, loff_t len) { - union { loff_t off; unsigned w[2]; } o = { offset }, l = { len }; - return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]); - } - LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { /* On i386, the kernel does not know how to return from a signal * handler. Instead, it relies on user space to provide a @@ -1604,7 +1048,7 @@ struct kernel_statfs { __asm__ __volatile__("movq %5,%%r10; syscall" : \ "=a" (__res) : "0" (__NR_##name), \ "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "g" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ + "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ LSS_RETURN(type, __res); \ } #undef _syscall5 @@ -1616,7 +1060,7 @@ struct kernel_statfs { __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \ "=a" (__res) : "0" (__NR_##name), \ "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "g" ((long)(arg4)), "g" ((long)(arg5)) : \ + "r" ((long)(arg4)), "r" ((long)(arg5)) : \ "r8", "r10", "r11", "rcx", "memory"); \ LSS_RETURN(type, __res); \ } @@ -1630,7 +1074,7 @@ struct kernel_statfs { "syscall" : \ "=a" (__res) : "0" (__NR_##name), \ "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "g" ((long)(arg4)), "g" ((long)(arg5)), "g" ((long)(arg6)) : \ + "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \ "r8", "r9", "r10", "r11", "rcx", "memory"); \ LSS_RETURN(type, __res); \ } @@ -1639,8 +1083,6 @@ struct kernel_statfs { void *newtls, int *child_tidptr) { long __res; { - register void *__tls __asm__("r8") = newtls; - register int *__ctid __asm__("r10") = child_tidptr; __asm__ __volatile__(/* if (fn == NULL) * return -EINVAL; */ @@ -1653,8 +1095,10 @@ struct kernel_statfs { "testq %5,%5\n" "jz 1f\n" - /* childstack -= 2*sizeof(void *); + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 16; */ + "andq $-16,%5\n" "subq $16,%5\n" /* Push "arg" and "fn" onto the stack that will be @@ -1671,6 +1115,8 @@ struct kernel_statfs { * %r10 = child_tidptr) */ "movq %2,%%rax\n" + "movq %9,%%r8\n" + "movq %10,%%r10\n" "syscall\n" /* if (%rax != 0) @@ -1701,13 +1147,11 @@ struct kernel_statfs { : "=a" (__res) : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), - "d"(parent_tidptr), "r"(__tls), "r"(__ctid) - : "memory", "r11", "rcx"); + "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr) + : "rsp", "memory", "r8", "r10", "r11", "rcx"); } LSS_RETURN(int, __res); } - LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len, - int, advice) LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { /* On x86-64, the kernel does not know how to return from @@ -1735,12 +1179,26 @@ struct kernel_statfs { */ #undef LSS_REG #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a + + /* r0..r3 are scratch registers and not preserved across function + * calls. We need to first evaluate the first 4 syscall arguments + * and store them on stack. They must be loaded into r0..r3 after + * all function calls to avoid r0..r3 being clobbered. + */ + #undef LSS_SAVE_ARG + #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a + #undef LSS_LOAD_ARG + #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r + #undef LSS_BODY - #define LSS_BODY(type,name,args...) \ + #define LSS_BODY(type, name, args...) \ register long __res_r0 __asm__("r0"); \ long __res; \ - __asm__ __volatile__ (__syscall(name) \ - : "=r"(__res_r0) : args : "lr", "memory"); \ + __SYS_REG(name) \ + __asm__ __volatile__ (__syscall_safe(name) \ + : "=r"(__res_r0) \ + : __SYS_REG_LIST(args) \ + : "lr", "memory"); \ __res = __res_r0; \ LSS_RETURN(type, __res) #undef _syscall0 @@ -1751,77 +1209,126 @@ struct kernel_statfs { #undef _syscall1 #define _syscall1(type, name, type1, arg1) \ type LSS_NAME(name)(type1 arg1) { \ - LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + /* There is no need for using a volatile temp. */ \ + LSS_REG(0, arg1); \ + LSS_BODY(type, name, "r"(__r0)); \ } #undef _syscall2 #define _syscall2(type, name, type1, arg1, type2, arg2) \ type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ } #undef _syscall3 #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ } #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ } #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ type5 arg5) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ "r"(__r4)); \ } #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ type5 arg5, type6 arg6) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_REG(5, arg6); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ "r"(__r4), "r"(__r5)); \ } LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { - long __res; + register long __res __asm__("r5"); { - register int __flags __asm__("r0") = flags; - register void *__stack __asm__("r1") = child_stack; - register void *__ptid __asm__("r2") = parent_tidptr; - register void *__tls __asm__("r3") = newtls; - register int *__ctid __asm__("r4") = child_tidptr; - __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL) - * return -EINVAL; - */ - "cmp %2,#0\n" - "cmpne %3,#0\n" - "moveq %0,%1\n" - "beq 1f\n" + if (fn == NULL || child_stack == NULL) { + __res = -EINVAL; + goto clone_exit; + } - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - "str %5,[%3,#-4]!\n" - "str %2,[%3,#-4]!\n" + /* stash first 4 arguments on stack first because we can only load + * them after all function calls. + */ + int tmp_flags = flags; + int * tmp_stack = (int*) child_stack; + void * tmp_ptid = parent_tidptr; + void * tmp_tls = newtls; + + register int *__ctid __asm__("r4") = child_tidptr; - /* %r0 = syscall(%r0 = flags, + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + *(--tmp_stack) = (int) arg; + *(--tmp_stack) = (int) fn; + + /* We must load r0..r3 last after all possible function calls. */ + register int __flags __asm__("r0") = tmp_flags; + register void *__stack __asm__("r1") = tmp_stack; + register void *__ptid __asm__("r2") = tmp_ptid; + register void *__tls __asm__("r3") = tmp_tls; + + /* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + __SYS_REG(clone) + __asm__ __volatile__(/* %r0 = syscall(%r0 = flags, * %r1 = child_stack, * %r2 = parent_tidptr, * %r3 = newtls, * %r4 = child_tidptr) */ + "push {r7}\n" + "mov r7,%1\n" __syscall(clone)"\n" /* if (%r0 != 0) @@ -1836,16 +1343,24 @@ struct kernel_statfs { "mov lr,pc\n" "ldr pc,[sp]\n" - /* Call _exit(%r0). + /* Call _exit(%r0), which never returns. We only + * need to set r7 for EABI syscall ABI but we do + * this always to simplify code sharing between + * old and new syscall ABIs. */ + "mov r7,%2\n" __syscall(exit)"\n" - "1:\n" + + /* Pop r7 from the stack only in the parent. + */ + "1: pop {r7}\n" : "=r" (__res) - : "i"(-EINVAL), - "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + : "r"(__sysreg), + "i"(__NR_exit), "r"(__stack), "r"(__flags), "r"(__ptid), "r"(__tls), "r"(__ctid) - : "lr", "memory"); + : "cc", "lr", "memory"); } + clone_exit: LSS_RETURN(int, __res); } #elif defined(__mips__) @@ -2263,25 +1778,12 @@ struct kernel_statfs { #define __NR__exit __NR_exit #define __NR__gettid __NR_gettid #define __NR__mremap __NR_mremap - LSS_INLINE _syscall1(int, chdir, const char *,p) LSS_INLINE _syscall1(int, close, int, f) - LSS_INLINE _syscall2(int, clock_getres, int, c, - struct kernel_timespec*, t) - LSS_INLINE _syscall2(int, clock_gettime, int, c, - struct kernel_timespec*, t) - LSS_INLINE _syscall1(int, dup, int, f) - LSS_INLINE _syscall2(int, dup2, int, s, - int, d) - LSS_INLINE _syscall3(int, execve, const char*, f, - const char*const*,a,const char*const*, e) LSS_INLINE _syscall1(int, _exit, int, e) LSS_INLINE _syscall3(int, fcntl, int, f, int, c, long, a) - LSS_INLINE _syscall0(pid_t, fork) LSS_INLINE _syscall2(int, fstat, int, f, struct kernel_stat*, b) - LSS_INLINE _syscall2(int, fstatfs, int, f, - struct kernel_statfs*, b) LSS_INLINE _syscall4(int, futex, int*, a, int, o, int, v, struct kernel_timespec*, t) @@ -2291,147 +1793,53 @@ struct kernel_statfs { LSS_INLINE _syscall3(int, getdents64, int, f, struct kernel_dirent64*, d, int, c) #endif - LSS_INLINE _syscall0(gid_t, getegid) - LSS_INLINE _syscall0(uid_t, geteuid) - LSS_INLINE _syscall0(pid_t, getpgrp) LSS_INLINE _syscall0(pid_t, getpid) LSS_INLINE _syscall0(pid_t, getppid) - LSS_INLINE _syscall2(int, getpriority, int, a, - int, b) - LSS_INLINE _syscall2(int, getrlimit, int, r, - struct kernel_rlimit*, l) - LSS_INLINE _syscall1(pid_t, getsid, pid_t, p) LSS_INLINE _syscall0(pid_t, _gettid) - LSS_INLINE _syscall5(int, setxattr, const char *,p, - const char *, n, const void *,v, - size_t, s, int, f) - LSS_INLINE _syscall5(int, lsetxattr, const char *,p, - const char *, n, const void *,v, - size_t, s, int, f) - LSS_INLINE _syscall4(ssize_t, getxattr, const char *,p, - const char *, n, void *, v, size_t, s) - LSS_INLINE _syscall4(ssize_t, lgetxattr, const char *,p, - const char *, n, void *, v, size_t, s) - LSS_INLINE _syscall3(ssize_t, listxattr, const char *,p, - char *, l, size_t, s) - LSS_INLINE _syscall3(ssize_t, llistxattr, const char *,p, - char *, l, size_t, s) - LSS_INLINE _syscall2(int, ioprio_get, int, which, - int, who) - LSS_INLINE _syscall3(int, ioprio_set, int, which, - int, who, int, ioprio) LSS_INLINE _syscall2(int, kill, pid_t, p, int, s) LSS_INLINE _syscall3(off_t, lseek, int, f, off_t, o, int, w) LSS_INLINE _syscall2(int, munmap, void*, s, size_t, l) - LSS_INLINE _syscall6(long, move_pages, pid_t, p, - unsigned long, n, void **,g, int *, d, - int *, s, int, f) LSS_INLINE _syscall5(void*, _mremap, void*, o, size_t, os, size_t, ns, unsigned long, f, void *, a) LSS_INLINE _syscall3(int, open, const char*, p, int, f, int, m) - LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u, - unsigned int, n, int, t) LSS_INLINE _syscall2(int, prctl, int, o, long, a) LSS_INLINE _syscall4(long, ptrace, int, r, pid_t, p, void *, a, void *, d) LSS_INLINE _syscall3(ssize_t, read, int, f, void *, b, size_t, c) - LSS_INLINE _syscall3(int, readlink, const char*, p, - char*, b, size_t, s) LSS_INLINE _syscall4(int, rt_sigaction, int, s, const struct kernel_sigaction*, a, struct kernel_sigaction*, o, size_t, c) - LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s, - size_t, c) LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, const struct kernel_sigset_t*, s, struct kernel_sigset_t*, o, size_t, c); - LSS_INLINE _syscall2(int, rt_sigsuspend, - const struct kernel_sigset_t*, s, size_t, c); - LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p, - unsigned int, l, unsigned long *, m) - LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p, - unsigned int, l, unsigned long *, m) LSS_INLINE _syscall0(int, sched_yield) - LSS_INLINE _syscall1(long, set_tid_address, int *, t) - LSS_INLINE _syscall1(int, setfsgid, gid_t, g) - LSS_INLINE _syscall1(int, setfsuid, uid_t, u) - LSS_INLINE _syscall1(int, setuid, uid_t, u) - LSS_INLINE _syscall1(int, setgid, gid_t, g) - LSS_INLINE _syscall2(int, setpgid, pid_t, p, - pid_t, g) - LSS_INLINE _syscall3(int, setpriority, int, a, - int, b, int, p) - LSS_INLINE _syscall3(int, setresgid, gid_t, r, - gid_t, e, gid_t, s) - LSS_INLINE _syscall3(int, setresuid, uid_t, r, - uid_t, e, uid_t, s) - LSS_INLINE _syscall2(int, setrlimit, int, r, - const struct kernel_rlimit*, l) - LSS_INLINE _syscall0(pid_t, setsid) LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, const stack_t*, o) LSS_INLINE _syscall2(int, stat, const char*, f, struct kernel_stat*, b) - LSS_INLINE _syscall2(int, statfs, const char*, f, - struct kernel_statfs*, b) LSS_INLINE _syscall3(ssize_t, write, int, f, const void *, b, size_t, c) - LSS_INLINE _syscall3(ssize_t, writev, int, f, - const struct kernel_iovec*, v, size_t, c) #if defined(__NR_getcpu) LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, unsigned *, node, void *, unused); #endif #if defined(__x86_64__) || \ (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) - LSS_INLINE _syscall3(int, recvmsg, int, s, - struct kernel_msghdr*, m, int, f) - LSS_INLINE _syscall3(int, sendmsg, int, s, - const struct kernel_msghdr*, m, int, f) - LSS_INLINE _syscall6(int, sendto, int, s, - const void*, m, size_t, l, - int, f, - const struct kernel_sockaddr*, a, int, t) - LSS_INLINE _syscall2(int, shutdown, int, s, - int, h) LSS_INLINE _syscall3(int, socket, int, d, int, t, int, p) - LSS_INLINE _syscall4(int, socketpair, int, d, - int, t, int, p, int*, s) #endif #if defined(__x86_64__) - LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode, - loff_t, offset, loff_t, len) LSS_INLINE _syscall6(void*, mmap, void*, s, size_t, l, int, p, int, f, int, d, __off64_t, o) - LSS_INLINE _syscall4(int, newfstatat, int, d, - const char *, p, - struct kernel_stat*, b, int, f) - - LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { - return LSS_NAME(setfsgid)(gid); - } - - LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { - return LSS_NAME(setfsuid)(uid); - } - - LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { - return LSS_NAME(setresgid)(rgid, egid, sgid); - } - - LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { - return LSS_NAME(setresuid)(ruid, euid, suid); - } LSS_INLINE int LSS_NAME(sigaction)(int signum, const struct kernel_sigaction *act, @@ -2453,117 +1861,35 @@ struct kernel_statfs { } } - LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { - return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); - } - LSS_INLINE int LSS_NAME(sigprocmask)(int how, const struct kernel_sigset_t *set, struct kernel_sigset_t *oldset) { return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); } - - LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { - return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); - } #endif #if defined(__x86_64__) || \ defined(__arm__) || \ (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, int*, s, int, o, - struct kernel_rusage*, r) - + struct kernel_rusage*, r) LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ return LSS_NAME(wait4)(pid, status, options, 0); } - #endif - #if defined(__i386__) || defined(__x86_64__) + #endif + #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) - LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f) - #endif - #if defined(__i386__) || defined(__arm__) - #define __NR__getresgid32 __NR_getresgid32 - #define __NR__getresuid32 __NR_getresuid32 - #define __NR__setfsgid32 __NR_setfsgid32 - #define __NR__setfsuid32 __NR_setfsuid32 - #define __NR__setresgid32 __NR_setresgid32 - #define __NR__setresuid32 __NR_setresuid32 - LSS_INLINE _syscall2(int, ugetrlimit, int, r, - struct kernel_rlimit*, l) - LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f) - LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f) - LSS_INLINE _syscall3(int, _setresgid32, gid_t, r, - gid_t, e, gid_t, s) - LSS_INLINE _syscall3(int, _setresuid32, uid_t, r, - uid_t, e, uid_t, s) - - LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { - int rc; - if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)gid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setfsgid)(gid); - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { - int rc; - if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)uid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setfsuid)(uid); - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { - int rc; - if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)rgid & ~0xFFFFu || - (unsigned int)egid & ~0xFFFFu || - (unsigned int)sgid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setresgid)(rgid, egid, sgid); - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { - int rc; - if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)ruid & ~0xFFFFu || - (unsigned int)euid & ~0xFFFFu || - (unsigned int)suid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setresuid)(ruid, euid, suid); - } - } - return rc; - } #endif LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { memset(&set->sig, 0, sizeof(set->sig)); return 0; } - + LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { memset(&set->sig, -1, sizeof(set->sig)); return 0; } - + LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, int signum) { if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { @@ -2575,7 +1901,7 @@ struct kernel_statfs { return 0; } } - + LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, int signum) { if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { @@ -2587,30 +1913,26 @@ struct kernel_statfs { return 0; } } - - LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; - } else { - return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] & - (1UL << ((signum - 1) % (8*sizeof(set->sig[0]))))); - } - } + #if defined(__i386__) || \ defined(__arm__) || \ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) #define __NR__sigaction __NR_sigaction - #define __NR__sigpending __NR_sigpending #define __NR__sigprocmask __NR_sigprocmask - #define __NR__sigsuspend __NR_sigsuspend LSS_INLINE _syscall2(int, fstat64, int, f, struct kernel_stat64 *, b) LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, loff_t *, res, uint, wh) +#ifdef __PPC64__ + LSS_INLINE _syscall6(void*, mmap, void*, s, + size_t, l, int, p, + int, f, int, d, + off_t, o) +#else + #ifndef __ARM_EABI__ + /* Not available on ARM EABI Linux. */ LSS_INLINE _syscall1(void*, mmap, void*, a) -#ifndef __PPC64__ + #endif LSS_INLINE _syscall6(void*, mmap2, void*, s, size_t, l, int, p, int, f, int, d, @@ -2619,17 +1941,9 @@ struct kernel_statfs { LSS_INLINE _syscall3(int, _sigaction, int, s, const struct kernel_old_sigaction*, a, struct kernel_old_sigaction*, o) - LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s) LSS_INLINE _syscall3(int, _sigprocmask, int, h, const unsigned long*, s, unsigned long*, o) - #ifdef __PPC__ - LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s) - #else - LSS_INLINE _syscall3(int, _sigsuspend, const void*, a, - int, b, - unsigned long, s) - #endif LSS_INLINE _syscall2(int, stat64, const char *, p, struct kernel_stat64 *, b) @@ -2695,17 +2009,6 @@ struct kernel_statfs { return rc; } - LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { - int old_errno = LSS_ERRNO; - int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - LSS_ERRNO = old_errno; - LSS_NAME(sigemptyset)(set); - rc = LSS_NAME(_sigpending)(&set->sig[0]); - } - return rc; - } - LSS_INLINE int LSS_NAME(sigprocmask)(int how, const struct kernel_sigset_t *set, struct kernel_sigset_t *oldset) { @@ -2722,20 +2025,6 @@ struct kernel_statfs { } return rc; } - - LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { - int olderrno = LSS_ERRNO; - int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - LSS_ERRNO = olderrno; - rc = LSS_NAME(_sigsuspend)( - #ifndef __PPC__ - set, 0, - #endif - set->sig[0]); - } - return rc; - } #endif #if defined(__PPC__) #undef LSS_SC_LOADARGS_0 @@ -2792,93 +2081,20 @@ struct kernel_statfs { } \ LSS_RETURN(type, __sc_ret, __sc_err) - LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, - int flags){ - LSS_SC_BODY(3, ssize_t, 17, s, msg, flags); - } - - LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, - const struct kernel_msghdr *msg, - int flags) { - LSS_SC_BODY(3, ssize_t, 16, s, msg, flags); - } - - // TODO(csilvers): why is this ifdef'ed out? -#if 0 - LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, - int flags, - const struct kernel_sockaddr *to, - unsigned int tolen) { - LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen); - } -#endif - - LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { - LSS_SC_BODY(2, int, 13, s, how); - } - LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { LSS_SC_BODY(3, int, 1, domain, type, protocol); } - - LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, - int sv[2]) { - LSS_SC_BODY(4, int, 8, d, type, protocol, sv); - } #endif #if defined(__i386__) || \ - defined(__arm__) || \ + (defined(__arm__) && !defined(__ARM_EABI__)) || \ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + /* See sys_socketcall in net/socket.c in kernel source. * It de-multiplexes on its first arg and unpacks the arglist * array in its second arg. */ LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a) - LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, - int flags){ - unsigned long args[3] = { - (unsigned long) s, - (unsigned long) msg, - (unsigned long) flags - }; - return (ssize_t) LSS_NAME(socketcall)(17, args); - } - - LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, - const struct kernel_msghdr *msg, - int flags) { - unsigned long args[3] = { - (unsigned long) s, - (unsigned long) msg, - (unsigned long) flags - }; - return (ssize_t) LSS_NAME(socketcall)(16, args); - } - - LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, - int flags, - const struct kernel_sockaddr *to, - unsigned int tolen) { - unsigned long args[6] = { - (unsigned long) s, - (unsigned long) buf, - (unsigned long) len, - (unsigned long) flags, - (unsigned long) to, - (unsigned long) tolen - }; - return (ssize_t) LSS_NAME(socketcall)(11, args); - } - - LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { - unsigned long args[2] = { - (unsigned long) s, - (unsigned long) how - }; - return LSS_NAME(socketcall)(13, args); - } - LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { unsigned long args[3] = { (unsigned long) domain, @@ -2887,22 +2103,9 @@ struct kernel_statfs { }; return LSS_NAME(socketcall)(1, args); } - - LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, - int sv[2]) { - unsigned long args[4] = { - (unsigned long) d, - (unsigned long) type, - (unsigned long) protocol, - (unsigned long) sv - }; - return LSS_NAME(socketcall)(8, args); - } - #endif - #if defined(__i386__) || defined(__PPC__) - LSS_INLINE _syscall4(int, fstatat64, int, d, - const char *, p, - struct kernel_stat64 *, b, int, f) + #elif defined(__ARM_EABI__) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) #endif #if defined(__i386__) || defined(__PPC__) || \ (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) @@ -2934,29 +2137,6 @@ struct kernel_statfs { #else LSS_INLINE _syscall1(int, pipe, int *, p) #endif - /* TODO(csilvers): see if ppc can/should support this as well */ - #if defined(__i386__) || \ - defined(__arm__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) - #define __NR__statfs64 __NR_statfs64 - #define __NR__fstatfs64 __NR_fstatfs64 - LSS_INLINE _syscall3(int, _statfs64, const char*, p, - size_t, s,struct kernel_statfs64*, b) - LSS_INLINE _syscall3(int, _fstatfs64, int, f, - size_t, s,struct kernel_statfs64*, b) - LSS_INLINE int LSS_NAME(statfs64)(const char *p, - struct kernel_statfs64 *b) { - return LSS_NAME(_statfs64)(p, sizeof(*b), b); - } - LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) { - return LSS_NAME(_fstatfs64)(f, sizeof(*b), b); - } - #endif - - LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) { - extern char **environ; - return LSS_NAME(execve)(path, argv, (const char *const *)environ); - } LSS_INLINE pid_t LSS_NAME(gettid)() { pid_t tid = LSS_NAME(_gettid)(); @@ -2993,72 +2173,6 @@ struct kernel_statfs { LSS_ERRNO = err; return rc; } - - LSS_INLINE int LSS_NAME(raise)(int sig) { - return LSS_NAME(kill)(LSS_NAME(getpid)(), sig); - } - - LSS_INLINE int LSS_NAME(setpgrp)() { - return LSS_NAME(setpgid)(0, 0); - } - - LSS_INLINE int LSS_NAME(sysconf)(int name) { - extern int __getpagesize(void); - switch (name) { - case _SC_OPEN_MAX: { - struct kernel_rlimit limit; - return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0 - ? 8192 : limit.rlim_cur; - } - case _SC_PAGESIZE: - return __getpagesize(); - default: - errno = ENOSYS; - return -1; - } - } - #if defined(__x86_64__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64) - LSS_INLINE _syscall4(ssize_t, pread64, int, f, - void *, b, size_t, c, - loff_t, o) - LSS_INLINE _syscall4(ssize_t, pwrite64, int, f, - const void *, b, size_t, c, - loff_t, o) - LSS_INLINE _syscall3(int, readahead, int, f, - loff_t, o, unsigned, c) - #else - #define __NR__pread64 __NR_pread64 - #define __NR__pwrite64 __NR_pwrite64 - #define __NR__readahead __NR_readahead - LSS_INLINE _syscall5(ssize_t, _pread64, int, f, - void *, b, size_t, c, unsigned, o1, - unsigned, o2) - LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f, - const void *, b, size_t, c, unsigned, o1, - long, o2) - LSS_INLINE _syscall4(int, _readahead, int, f, - unsigned, o1, unsigned, o2, size_t, c); - /* We force 64bit-wide parameters onto the stack, then access each - * 32-bit component individually. This guarantees that we build the - * correct parameters independent of the native byte-order of the - * underlying architecture. - */ - LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count, - loff_t off) { - union { loff_t off; unsigned arg[2]; } o = { off }; - return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]); - } - LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf, - size_t count, loff_t off) { - union { loff_t off; unsigned arg[2]; } o = { off }; - return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]); - } - LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) { - union { loff_t off; unsigned arg[2]; } o = { off }; - return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len); - } - #endif #endif #if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) diff --git a/src/base/stl_allocator.h b/src/base/stl_allocator.h index 3152cf9..8276a83 100644 --- a/src/base/stl_allocator.h +++ b/src/base/stl_allocator.h @@ -87,6 +87,7 @@ class STL_Allocator { size_type max_size() const { return size_t(-1) / sizeof(T); } void construct(pointer p, const T& val) { ::new(p) T(val); } + void construct(pointer p) { ::new(p) T(); } void destroy(pointer p) { p->~T(); } // There's no state, so these allocators are always equal diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc index 5396743..2559158 100644 --- a/src/base/sysinfo.cc +++ b/src/base/sysinfo.cc @@ -86,12 +86,20 @@ // time, so prefer making the syscalls directly if we can. #ifdef HAVE_SYS_SYSCALL_H # include <sys/syscall.h> +#endif +#ifdef SYS_open // solaris 11, at least sometimes, only defines SYS_openat # define safeopen(filename, mode) syscall(SYS_open, filename, mode) -# define saferead(fd, buffer, size) syscall(SYS_read, fd, buffer, size) -# define safeclose(fd) syscall(SYS_close, fd) #else # define safeopen(filename, mode) open(filename, mode) +#endif +#ifdef SYS_read +# define saferead(fd, buffer, size) syscall(SYS_read, fd, buffer, size) +#else # define saferead(fd, buffer, size) read(fd, buffer, size) +#endif +#ifdef SYS_close +# define safeclose(fd) syscall(SYS_close, fd) +#else # define safeclose(fd) close(fd) #endif diff --git a/src/debugallocation.cc b/src/debugallocation.cc index d65b13e..8ecb351 100644 --- a/src/debugallocation.cc +++ b/src/debugallocation.cc @@ -742,7 +742,7 @@ class MallocBlock { return FromRawPointer(const_cast<void*>(p)); } - void Check(int type) { + void Check(int type) const { alloc_map_lock_.Lock(); CheckLocked(type); alloc_map_lock_.Unlock(); @@ -1015,17 +1015,17 @@ class DebugMallocImplementation : public TCMallocImplementation { return result; } - virtual bool VerifyNewMemory(void* p) { + virtual bool VerifyNewMemory(const void* p) { if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType); return true; } - virtual bool VerifyArrayNewMemory(void* p) { + virtual bool VerifyArrayNewMemory(const void* p) { if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType); return true; } - virtual bool VerifyMallocMemory(void* p) { + virtual bool VerifyMallocMemory(const void* p) { if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType); return true; } @@ -1043,7 +1043,7 @@ class DebugMallocImplementation : public TCMallocImplementation { return size; } - virtual size_t GetAllocatedSize(void* p) { + virtual size_t GetAllocatedSize(const void* p) { if (p) { RAW_CHECK(GetOwnership(p) != MallocExtension::kNotOwned, "ptr not allocated by tcmalloc"); diff --git a/src/google/malloc_extension.h b/src/google/malloc_extension.h index 4b06b2d..5bee019 100644 --- a/src/google/malloc_extension.h +++ b/src/google/malloc_extension.h @@ -100,10 +100,9 @@ class PERFTOOLS_DLL_DECL MallocExtension { // See "verify_memory.h" to see what these routines do virtual bool VerifyAllMemory(); - // TODO(csilvers): change these to const void*. - virtual bool VerifyNewMemory(void* p); - virtual bool VerifyArrayNewMemory(void* p); - virtual bool VerifyMallocMemory(void* p); + virtual bool VerifyNewMemory(const void* p); + virtual bool VerifyArrayNewMemory(const void* p); + virtual bool VerifyMallocMemory(const void* p); virtual bool MallocMemoryStats(int* blocks, size_t* total, int histogram[kMallocHistogramSize]); @@ -281,8 +280,7 @@ class PERFTOOLS_DLL_DECL MallocExtension { // will return 0.) // This is equivalent to malloc_size() in OS X, malloc_usable_size() // in glibc, and _msize() for windows. - // TODO(csilvers): change to const void*. - virtual size_t GetAllocatedSize(void* p); + virtual size_t GetAllocatedSize(const void* p); // Returns kOwned if this malloc implementation allocated the memory // pointed to by p, or kNotOwned if some other malloc implementation diff --git a/src/google/malloc_extension_c.h b/src/google/malloc_extension_c.h index e3f7f79..72a0a7c 100644 --- a/src/google/malloc_extension_c.h +++ b/src/google/malloc_extension_c.h @@ -58,9 +58,9 @@ extern "C" { #define kMallocExtensionHistogramSize 64 PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void); -PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(void* p); -PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(void* p); -PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(const void* p); PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total, int histogram[kMallocExtensionHistogramSize]); PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length); @@ -78,7 +78,7 @@ PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void); PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes); PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void); PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size); -PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(void* p); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(const void* p); /* * NOTE: These enum values MUST be kept in sync with the version in diff --git a/src/heap-checker.cc b/src/heap-checker.cc index 1c1cf30..5cb582d 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -784,6 +784,8 @@ static void MakeDisabledLiveCallbackLocked( } } +static const char kUnnamedProcSelfMapEntry[] = "UNNAMED"; + // This function takes some fields from a /proc/self/maps line: // // start_address start address of a memory region. @@ -801,7 +803,9 @@ static void RecordGlobalDataLocked(uintptr_t start_address, RAW_DCHECK(heap_checker_lock.IsHeld(), ""); // Ignore non-writeable regions. if (strchr(permissions, 'w') == NULL) return; - if (filename == NULL || *filename == '\0') filename = "UNNAMED"; + if (filename == NULL || *filename == '\0') { + filename = kUnnamedProcSelfMapEntry; + } RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, filename, start_address, end_address); (*library_live_objects)[filename]. @@ -1405,6 +1409,31 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); } } if (size < sizeof(void*)) continue; + +#ifdef NO_FRAME_POINTER + // Frame pointer omission requires us to use libunwind, which uses direct + // mmap and munmap system calls, and that needs special handling. + if (name2 == kUnnamedProcSelfMapEntry) { + static const uintptr_t page_mask = ~(getpagesize() - 1); + const uintptr_t addr = reinterpret_cast<uintptr_t>(object); + if ((addr & page_mask) == 0 && (size & page_mask) == 0) { + // This is an object we slurped from /proc/self/maps. + // It may or may not be readable at this point. + // + // In case all the above conditions made a mistake, and the object is + // not related to libunwind, we also verify that it's not readable + // before ignoring it. + if (msync(const_cast<char*>(object), size, MS_ASYNC) != 0) { + // Skip unreadable object, so we don't crash trying to sweep it. + RAW_VLOG(0, "Ignoring inaccessible object [%p, %p) " + "(msync error %d (%s))", + object, object + size, errno, strerror(errno)); + continue; + } + } + } +#endif + const char* const max_object = object + size - sizeof(void*); while (object <= max_object) { // potentially unaligned load: @@ -1986,6 +2015,16 @@ void HeapLeakChecker_InternalInitStart() { RAW_LOG(FATAL, "Unsupported heap_check flag: %s", FLAGS_heap_check.c_str()); } + // FreeBSD doesn't seem to honor atexit execution order: + // http://code.google.com/p/gperftools/issues/detail?id=375 + // Since heap-checking before destructors depends on atexit running + // at the right time, on FreeBSD we always check after, even in the + // less strict modes. This just means FreeBSD is always a bit + // stricter in its checking than other OSes. +#ifdef __FreeBSD__ + FLAGS_heap_check_after_destructors = true; +#endif + { SpinLockHolder l(&heap_checker_lock); RAW_DCHECK(heap_checker_pid == getpid(), ""); heap_checker_on = true; diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc index e85b16e..1d79924 100644 --- a/src/heap-profile-table.cc +++ b/src/heap-profile-table.cc @@ -63,6 +63,7 @@ #include "symbolize.h" #include <google/stacktrace.h> #include <google/malloc_hook.h> +#include "memory_region_map.h" #include "base/commandlineflags.h" #include "base/logging.h" // for the RawFD I/O commands #include "base/sysinfo.h" @@ -98,7 +99,8 @@ const char HeapProfileTable::kFileExt[] = ".heap"; //---------------------------------------------------------------------- -static const int kHashTableSize = 179999; // Size for table_. +// Size for alloc_table_ and mmap_table_. +static const int kHashTableSize = 179999; /*static*/ const int HeapProfileTable::kMaxStackDepth; //---------------------------------------------------------------------- @@ -122,38 +124,60 @@ static bool ByAllocatedSpace(HeapProfileTable::Stats* a, HeapProfileTable::HeapProfileTable(Allocator alloc, DeAllocator dealloc) : alloc_(alloc), dealloc_(dealloc) { - // Make the table - const int table_bytes = kHashTableSize * sizeof(*table_); - table_ = reinterpret_cast<Bucket**>(alloc_(table_bytes)); - memset(table_, 0, table_bytes); - // Make allocation map - allocation_ = - new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_); - // init the rest: + // Initialize the overall profile stats. memset(&total_, 0, sizeof(total_)); - num_buckets_ = 0; + + // Make the malloc table. + const int alloc_table_bytes = kHashTableSize * sizeof(*alloc_table_); + alloc_table_ = reinterpret_cast<Bucket**>(alloc_(alloc_table_bytes)); + memset(alloc_table_, 0, alloc_table_bytes); + num_alloc_buckets_ = 0; + + // Initialize the mmap table. + mmap_table_ = NULL; + num_available_mmap_buckets_ = 0; + + // Make malloc and mmap allocation maps. + alloc_address_map_ = + new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_); + mmap_address_map_ = NULL; } HeapProfileTable::~HeapProfileTable() { - // free allocation map - allocation_->~AllocationMap(); - dealloc_(allocation_); - allocation_ = NULL; - // free hash table - for (int b = 0; b < kHashTableSize; b++) { - for (Bucket* x = table_[b]; x != 0; /**/) { - Bucket* b = x; - x = x->next; - dealloc_(b->stack); - dealloc_(b); + DeallocateBucketTable(alloc_table_); + alloc_table_ = NULL; + DeallocateBucketTable(mmap_table_); + mmap_table_ = NULL; + DeallocateAllocationMap(alloc_address_map_); + alloc_address_map_ = NULL; + DeallocateAllocationMap(mmap_address_map_); + mmap_address_map_ = NULL; +} + +void HeapProfileTable::DeallocateAllocationMap(AllocationMap* allocation) { + if (allocation != NULL) { + alloc_address_map_->~AllocationMap(); + dealloc_(allocation); + } +} + +void HeapProfileTable::DeallocateBucketTable(Bucket** table) { + if (table != NULL) { + for (int b = 0; b < kHashTableSize; b++) { + for (Bucket* x = table[b]; x != 0; /**/) { + Bucket* b = x; + x = x->next; + dealloc_(b->stack); + dealloc_(b); + } } + dealloc_(table); } - dealloc_(table_); - table_ = NULL; } -HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth, - const void* const key[]) { +HeapProfileTable::Bucket* HeapProfileTable::GetBucket( + int depth, const void* const key[], Bucket** table, + int* bucket_count) { // Make hash-value uintptr_t h = 0; for (int i = 0; i < depth; i++) { @@ -166,7 +190,7 @@ HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth, // Lookup stack trace in table unsigned int buck = ((unsigned int) h) % kHashTableSize; - for (Bucket* b = table_[buck]; b != 0; b = b->next) { + for (Bucket* b = table[buck]; b != 0; b = b->next) { if ((b->hash == h) && (b->depth == depth) && equal(key, key + depth, b->stack)) { @@ -183,9 +207,11 @@ HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth, b->hash = h; b->depth = depth; b->stack = kcopy; - b->next = table_[buck]; - table_[buck] = b; - num_buckets_++; + b->next = table[buck]; + table[buck] = b; + if (bucket_count != NULL) { + ++(*bucket_count); + } return b; } @@ -198,7 +224,8 @@ int HeapProfileTable::GetCallerStackTrace( void HeapProfileTable::RecordAlloc( const void* ptr, size_t bytes, int stack_depth, const void* const call_stack[]) { - Bucket* b = GetBucket(stack_depth, call_stack); + Bucket* b = GetBucket(stack_depth, call_stack, alloc_table_, + &num_alloc_buckets_); b->allocs++; b->alloc_size += bytes; total_.allocs++; @@ -207,12 +234,12 @@ void HeapProfileTable::RecordAlloc( AllocValue v; v.set_bucket(b); // also did set_live(false); set_ignore(false) v.bytes = bytes; - allocation_->Insert(ptr, v); + alloc_address_map_->Insert(ptr, v); } void HeapProfileTable::RecordFree(const void* ptr) { AllocValue v; - if (allocation_->FindAndRemove(ptr, &v)) { + if (alloc_address_map_->FindAndRemove(ptr, &v)) { Bucket* b = v.bucket(); b->frees++; b->free_size += v.bytes; @@ -222,14 +249,14 @@ void HeapProfileTable::RecordFree(const void* ptr) { } bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const { - const AllocValue* alloc_value = allocation_->Find(ptr); + const AllocValue* alloc_value = alloc_address_map_->Find(ptr); if (alloc_value != NULL) *object_size = alloc_value->bytes; return alloc_value != NULL; } bool HeapProfileTable::FindAllocDetails(const void* ptr, AllocInfo* info) const { - const AllocValue* alloc_value = allocation_->Find(ptr); + const AllocValue* alloc_value = alloc_address_map_->Find(ptr); if (alloc_value != NULL) { info->object_size = alloc_value->bytes; info->call_stack = alloc_value->bucket()->stack; @@ -243,13 +270,13 @@ bool HeapProfileTable::FindInsideAlloc(const void* ptr, const void** object_ptr, size_t* object_size) const { const AllocValue* alloc_value = - allocation_->FindInside(&AllocValueSize, max_size, ptr, object_ptr); + alloc_address_map_->FindInside(&AllocValueSize, max_size, ptr, object_ptr); if (alloc_value != NULL) *object_size = alloc_value->bytes; return alloc_value != NULL; } bool HeapProfileTable::MarkAsLive(const void* ptr) { - AllocValue* alloc = allocation_->FindMutable(ptr); + AllocValue* alloc = alloc_address_map_->FindMutable(ptr); if (alloc && !alloc->live()) { alloc->set_live(true); return true; @@ -258,7 +285,7 @@ bool HeapProfileTable::MarkAsLive(const void* ptr) { } void HeapProfileTable::MarkAsIgnored(const void* ptr) { - AllocValue* alloc = allocation_->FindMutable(ptr); + AllocValue* alloc = alloc_address_map_->FindMutable(ptr); if (alloc) { alloc->set_ignore(true); } @@ -299,27 +326,81 @@ int HeapProfileTable::UnparseBucket(const Bucket& b, HeapProfileTable::Bucket** HeapProfileTable::MakeSortedBucketList() const { - Bucket** list = - reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_)); + Bucket** list = reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * + (num_alloc_buckets_ + num_available_mmap_buckets_))); + + RAW_DCHECK(mmap_table_ != NULL || num_available_mmap_buckets_ == 0, ""); int n = 0; + for (int b = 0; b < kHashTableSize; b++) { - for (Bucket* x = table_[b]; x != 0; x = x->next) { + for (Bucket* x = alloc_table_[b]; x != 0; x = x->next) { list[n++] = x; } } - RAW_DCHECK(n == num_buckets_, ""); + RAW_DCHECK(n == num_alloc_buckets_, ""); + + if (mmap_table_ != NULL) { + for (int b = 0; b < kHashTableSize; b++) { + for (Bucket* x = mmap_table_[b]; x != 0; x = x->next) { + list[n++] = x; + } + } + } + RAW_DCHECK(n == num_alloc_buckets_ + num_available_mmap_buckets_, ""); - sort(list, list + num_buckets_, ByAllocatedSpace); + sort(list, list + num_alloc_buckets_ + num_available_mmap_buckets_, + ByAllocatedSpace); return list; } +void HeapProfileTable::RefreshMMapData() { + // Make the table + static const int mmap_table_bytes = kHashTableSize * sizeof(*mmap_table_); + if (mmap_table_ == NULL) { + mmap_table_ = reinterpret_cast<Bucket**>(alloc_(mmap_table_bytes)); + memset(mmap_table_, 0, mmap_table_bytes); + } + num_available_mmap_buckets_ = 0; + + ClearMMapData(); + mmap_address_map_ = + new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_); + + MemoryRegionMap::LockHolder l; + for (MemoryRegionMap::RegionIterator r = + MemoryRegionMap::BeginRegionLocked(); + r != MemoryRegionMap::EndRegionLocked(); ++r) { + Bucket* b = + GetBucket(r->call_stack_depth, r->call_stack, mmap_table_, NULL); + if (b->alloc_size == 0) { + num_available_mmap_buckets_ += 1; + } + b->allocs += 1; + b->alloc_size += r->end_addr - r->start_addr; + + AllocValue v; + v.set_bucket(b); + v.bytes = r->end_addr - r->start_addr; + mmap_address_map_->Insert(reinterpret_cast<const void*>(r->start_addr), v); + } +} + +void HeapProfileTable::ClearMMapData() { + if (mmap_address_map_ != NULL) { + mmap_address_map_->Iterate(ZeroBucketCountsIterator, this); + mmap_address_map_->~AllocationMap(); + dealloc_(mmap_address_map_); + mmap_address_map_ = NULL; + } +} + void HeapProfileTable::IterateOrderedAllocContexts( AllocContextIterator callback) const { Bucket** list = MakeSortedBucketList(); AllocContextInfo info; - for (int i = 0; i < num_buckets_; ++i) { + for (int i = 0; i < num_alloc_buckets_; ++i) { *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]); info.stack_depth = list[i]->depth; info.call_stack = list[i]->stack; @@ -351,9 +432,14 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { memset(&stats, 0, sizeof(stats)); int bucket_length = snprintf(buf, size, "%s", kProfileHeader); if (bucket_length < 0 || bucket_length >= size) return 0; - bucket_length = UnparseBucket(total_, buf, bucket_length, size, + Bucket total_with_mmap(total_); + if (mmap_table_ != NULL) { + total_with_mmap.alloc_size += MemoryRegionMap::MapSize(); + total_with_mmap.free_size += MemoryRegionMap::UnmapSize(); + } + bucket_length = UnparseBucket(total_with_mmap, buf, bucket_length, size, " heapprofile", &stats); - for (int i = 0; i < num_buckets_; i++) { + for (int i = 0; i < num_alloc_buckets_; i++) { bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "", &stats); } @@ -388,6 +474,17 @@ void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v, RawWrite(args.fd, buf, len); } +inline void HeapProfileTable::ZeroBucketCountsIterator( + const void* ptr, AllocValue* v, HeapProfileTable* heap_profile) { + Bucket* b = v->bucket(); + if (b != NULL) { + b->allocs = 0; + b->alloc_size = 0; + b->free_size = 0; + b->frees = 0; + } +} + // Callback from NonLiveSnapshot; adds entry to arg->dest // if not the entry is not live and is not present in arg->base. void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v, @@ -452,7 +549,7 @@ void HeapProfileTable::CleanupOldProfiles(const char* prefix) { HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() { Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_); - allocation_->Iterate(AddToSnapshot, s); + alloc_address_map_->Iterate(AddToSnapshot, s); return s; } @@ -477,7 +574,7 @@ HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot( AddNonLiveArgs args; args.dest = s; args.base = base; - allocation_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args); + alloc_address_map_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args); RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n", int(s->total_.allocs - s->total_.frees), int(s->total_.alloc_size - s->total_.free_size)); diff --git a/src/heap-profile-table.h b/src/heap-profile-table.h index 8ab1164..abd3184 100644 --- a/src/heap-profile-table.h +++ b/src/heap-profile-table.h @@ -138,7 +138,8 @@ class HeapProfileTable { // are skipped in heap checking reports. void MarkAsIgnored(const void* ptr); - // Return current total (de)allocation statistics. + // Return current total (de)allocation statistics. It doesn't contain + // mmap'ed regions. const Stats& total() const { return total_; } // Allocation data iteration callback: gets passed object pointer and @@ -148,7 +149,7 @@ class HeapProfileTable { // Iterate over the allocation profile data calling "callback" // for every allocation. void IterateAllocs(AllocIterator callback) const { - allocation_->Iterate(MapArgsAllocIterator, callback); + alloc_address_map_->Iterate(MapArgsAllocIterator, callback); } // Allocation context profile data iteration callback @@ -186,6 +187,16 @@ class HeapProfileTable { // Caller must call ReleaseSnapshot() on result when no longer needed. Snapshot* NonLiveSnapshot(Snapshot* base); + // Refresh the internal mmap information from MemoryRegionMap. Results of + // FillOrderedProfile and IterateOrderedAllocContexts will contain mmap'ed + // memory regions as at calling RefreshMMapData. + void RefreshMMapData(); + + // Clear the internal mmap information. Results of FillOrderedProfile and + // IterateOrderedAllocContexts won't contain mmap'ed memory regions after + // calling ClearMMapData. + void ClearMMapData(); + private: // data types ---------------------------- @@ -263,9 +274,18 @@ class HeapProfileTable { const char* extra, Stats* profile_stats); - // Get the bucket for the caller stack trace 'key' of depth 'depth' - // creating the bucket if needed. - Bucket* GetBucket(int depth, const void* const key[]); + // Deallocate a given allocation map. + void DeallocateAllocationMap(AllocationMap* allocation); + + // Deallocate a given bucket table. + void DeallocateBucketTable(Bucket** table); + + // Get the bucket for the caller stack trace 'key' of depth 'depth' from a + // bucket hash map 'table' creating the bucket if needed. '*bucket_count' + // is incremented both when 'bucket_count' is not NULL and when a new + // bucket object is created. + Bucket* GetBucket(int depth, const void* const key[], Bucket** table, + int* bucket_count); // Helper for IterateAllocs to do callback signature conversion // from AllocationMap::Iterate to AllocIterator. @@ -285,9 +305,14 @@ class HeapProfileTable { inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v, const DumpArgs& args); + // Helper for filling size variables in buckets by zero. + inline static void ZeroBucketCountsIterator( + const void* ptr, AllocValue* v, HeapProfileTable* heap_profile); + // Helper for IterateOrderedAllocContexts and FillOrderedProfile. - // Creates a sorted list of Buckets whose length is num_buckets_. - // The caller is responsible for dellocating the returned list. + // Creates a sorted list of Buckets whose length is num_alloc_buckets_ + + // num_avaliable_mmap_buckets_. + // The caller is responsible for deallocating the returned list. Bucket** MakeSortedBucketList() const; // Helper for TakeSnapshot. Saves object to snapshot. @@ -319,17 +344,25 @@ class HeapProfileTable { // Overall profile stats; we use only the Stats part, // but make it a Bucket to pass to UnparseBucket. + // It doesn't contain mmap'ed regions. Bucket total_; - // Bucket hash table. + // Bucket hash table for malloc. // We hand-craft one instead of using one of the pre-written // ones because we do not want to use malloc when operating on the table. // It is only few lines of code, so no big deal. - Bucket** table_; - int num_buckets_; - - // Map of all currently allocated objects we know about. - AllocationMap* allocation_; + Bucket** alloc_table_; + int num_alloc_buckets_; + + // Bucket hash table for mmap. + // This table is filled with the information from MemoryRegionMap by calling + // RefreshMMapData. + Bucket** mmap_table_; + int num_available_mmap_buckets_; + + // Map of all currently allocated objects and mapped regions we know about. + AllocationMap* alloc_address_map_; + AllocationMap* mmap_address_map_; DISALLOW_COPY_AND_ASSIGN(HeapProfileTable); }; diff --git a/src/heap-profiler.cc b/src/heap-profiler.cc index 32815b4..68b379d 100644 --- a/src/heap-profiler.cc +++ b/src/heap-profiler.cc @@ -175,29 +175,6 @@ static HeapProfileTable* heap_profile = NULL; // the heap profile table // Profile generation //---------------------------------------------------------------------- -enum AddOrRemove { ADD, REMOVE }; - -// Add or remove all MMap-allocated regions to/from *heap_profile. -// Assumes heap_lock is held. -static void AddRemoveMMapDataLocked(AddOrRemove mode) { - RAW_DCHECK(heap_lock.IsHeld(), ""); - if (!FLAGS_mmap_profile || !is_on) return; - // MemoryRegionMap maintained all the data we need for all - // mmap-like allocations, so we just use it here: - MemoryRegionMap::LockHolder l; - for (MemoryRegionMap::RegionIterator r = MemoryRegionMap::BeginRegionLocked(); - r != MemoryRegionMap::EndRegionLocked(); ++r) { - if (mode == ADD) { - heap_profile->RecordAlloc( - reinterpret_cast<const void*>(r->start_addr), - r->end_addr - r->start_addr, - r->call_stack_depth, r->call_stack); - } else { - heap_profile->RecordFree(reinterpret_cast<void*>(r->start_addr)); - } - } -} - // Input must be a buffer of size at least 1MB. static char* DoGetHeapProfileLocked(char* buf, int buflen) { // We used to be smarter about estimating the required memory and @@ -208,16 +185,13 @@ static char* DoGetHeapProfileLocked(char* buf, int buflen) { RAW_DCHECK(heap_lock.IsHeld(), ""); int bytes_written = 0; if (is_on) { - HeapProfileTable::Stats const stats = heap_profile->total(); - (void)stats; // avoid an unused-variable warning in non-debug mode. - AddRemoveMMapDataLocked(ADD); + if (FLAGS_mmap_profile) { + heap_profile->RefreshMMapData(); + } bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1); - // FillOrderedProfile should not reduce the set of active mmap-ed regions, - // hence MemoryRegionMap will let us remove everything we've added above: - AddRemoveMMapDataLocked(REMOVE); - RAW_DCHECK(stats.Equivalent(heap_profile->total()), ""); - // if this fails, we somehow removed by AddRemoveMMapDataLocked - // more than we have added. + if (FLAGS_mmap_profile) { + heap_profile->ClearMMapData(); + } } buf[bytes_written] = '\0'; RAW_DCHECK(bytes_written == strlen(buf), ""); diff --git a/src/malloc_extension.cc b/src/malloc_extension.cc index bf946e6..deed4cd 100644 --- a/src/malloc_extension.cc +++ b/src/malloc_extension.cc @@ -108,9 +108,9 @@ SysAllocator::~SysAllocator() {} // Default implementation -- does nothing MallocExtension::~MallocExtension() { } bool MallocExtension::VerifyAllMemory() { return true; } -bool MallocExtension::VerifyNewMemory(void* p) { return true; } -bool MallocExtension::VerifyArrayNewMemory(void* p) { return true; } -bool MallocExtension::VerifyMallocMemory(void* p) { return true; } +bool MallocExtension::VerifyNewMemory(const void* p) { return true; } +bool MallocExtension::VerifyArrayNewMemory(const void* p) { return true; } +bool MallocExtension::VerifyMallocMemory(const void* p) { return true; } bool MallocExtension::GetNumericProperty(const char* property, size_t* value) { return false; @@ -177,7 +177,7 @@ size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) { return size; } -size_t MallocExtension::GetAllocatedSize(void* p) { +size_t MallocExtension::GetAllocatedSize(const void* p) { assert(GetOwnership(p) != kNotOwned); return 0; } @@ -343,9 +343,9 @@ void MallocExtension::Ranges(void* arg, RangeFunction func) { } C_SHIM(VerifyAllMemory, int, (void), ()); -C_SHIM(VerifyNewMemory, int, (void* p), (p)); -C_SHIM(VerifyArrayNewMemory, int, (void* p), (p)); -C_SHIM(VerifyMallocMemory, int, (void* p), (p)); +C_SHIM(VerifyNewMemory, int, (const void* p), (p)); +C_SHIM(VerifyArrayNewMemory, int, (const void* p), (p)); +C_SHIM(VerifyMallocMemory, int, (const void* p), (p)); C_SHIM(MallocMemoryStats, int, (int* blocks, size_t* total, int histogram[kMallocHistogramSize]), (blocks, total, histogram)); @@ -362,7 +362,7 @@ C_SHIM(MarkThreadBusy, void, (void), ()); C_SHIM(ReleaseFreeMemory, void, (void), ()); C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes)); C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size)); -C_SHIM(GetAllocatedSize, size_t, (void* p), (p)); +C_SHIM(GetAllocatedSize, size_t, (const void* p), (p)); // Can't use the shim here because of the need to translate the enums. extern "C" diff --git a/src/memory_region_map.cc b/src/memory_region_map.cc index 3bf8983..e281706 100644 --- a/src/memory_region_map.cc +++ b/src/memory_region_map.cc @@ -145,6 +145,8 @@ SpinLock MemoryRegionMap::owner_lock_( // ACQUIRED_AFTER(lock_) SpinLock::LINKER_INITIALIZED); int MemoryRegionMap::recursion_count_ = 0; // GUARDED_BY(owner_lock_) pthread_t MemoryRegionMap::lock_owner_tid_; // GUARDED_BY(owner_lock_) +int64 MemoryRegionMap::map_size_ = 0; +int64 MemoryRegionMap::unmap_size_ = 0; // ========================================================================= // @@ -462,6 +464,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) { reinterpret_cast<void*>(region.caller())); // Note: none of the above allocates memory. Lock(); // recursively lock + map_size_ += size; InsertRegionLocked(region); // This will (eventually) allocate storage for and copy over the stack data // from region.call_stack_data_ that is pointed by region.call_stack(). @@ -573,6 +576,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { reinterpret_cast<void*>(end_addr), regions_->size()); if (VLOG_IS_ON(12)) LogAllLocked(); + unmap_size_ += size; Unlock(); } diff --git a/src/memory_region_map.h b/src/memory_region_map.h index 739514c..988ea70 100644 --- a/src/memory_region_map.h +++ b/src/memory_region_map.h @@ -252,6 +252,10 @@ class MemoryRegionMap { static RegionIterator BeginRegionLocked(); static RegionIterator EndRegionLocked(); + // Return the accumulated sizes of mapped and unmapped regions. + static int64 MapSize() { return map_size_; } + static int64 UnmapSize() { return unmap_size_; } + // Effectively private type from our .cc ================================= // public to let us declare global objects: union RegionSetRep; @@ -286,6 +290,11 @@ class MemoryRegionMap { // The thread id of the thread that's inside the recursive lock. static pthread_t lock_owner_tid_; + // Total size of all mapped pages so far + static int64 map_size_; + // Total size of all unmapped pages so far + static int64 unmap_size_; + // helpers ================================================================== // Helper for FindRegion and FindAndMarkStackRegion: diff --git a/src/system-alloc.cc b/src/system-alloc.cc index 6379494..cf35446 100644 --- a/src/system-alloc.cc +++ b/src/system-alloc.cc @@ -61,6 +61,13 @@ # define MAP_ANONYMOUS MAP_ANON #endif +// MADV_FREE is specifically designed for use by malloc(), but only +// FreeBSD supports it; in linux we fall back to the somewhat inferior +// MADV_DONTNEED. +#ifndef MADV_FREE +# define MADV_FREE MADV_DONTNEED +#endif + // Solaris has a bug where it doesn't declare madvise() for C++. // http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0 #if defined(__sun) && defined(__SVR4) @@ -107,7 +114,7 @@ union MemoryAligner { static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); -#if defined(HAVE_MMAP) || defined(MADV_DONTNEED) +#if defined(HAVE_MMAP) || defined(MADV_FREE) // Page size is initialized on demand (only needed for mmap-based allocators) static size_t pagesize = 0; #endif @@ -424,7 +431,6 @@ void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size, if (result != NULL) { return result; } - Log(kLog, __FILE__, __LINE__, names_[i], "failed"); failed_[i] = true; } } @@ -488,10 +494,10 @@ void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, } void TCMalloc_SystemRelease(void* start, size_t length) { -#ifdef MADV_DONTNEED +#ifdef MADV_FREE if (FLAGS_malloc_devmem_start) { - // It's not safe to use MADV_DONTNEED if we've been mapping - // /dev/mem for heap memory + // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been + // mapping /dev/mem for heap memory. return; } if (pagesize == 0) pagesize = getpagesize(); @@ -515,7 +521,7 @@ void TCMalloc_SystemRelease(void* start, size_t length) { // Note -- ignoring most return codes, because if this fails it // doesn't matter... while (madvise(reinterpret_cast<char*>(new_start), new_end - new_start, - MADV_DONTNEED) == -1 && + MADV_FREE) == -1 && errno == EAGAIN) { // NOP } diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index cfce3a7..a83e7ea 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -289,13 +289,13 @@ void InvalidFree(void* ptr) { Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr); } -size_t InvalidGetSizeForRealloc(void* old_ptr) { +size_t InvalidGetSizeForRealloc(const void* old_ptr) { Log(kCrash, __FILE__, __LINE__, "Attempt to realloc invalid pointer", old_ptr); return 0; } -size_t InvalidGetAllocatedSize(void* ptr) { +size_t InvalidGetAllocatedSize(const void* ptr) { Log(kCrash, __FILE__, __LINE__, "Attempt to get the size of an invalid pointer", ptr); return 0; @@ -757,7 +757,7 @@ class TCMallocImplementation : public MallocExtension { // This just calls GetSizeWithCallback, but because that's in an // unnamed namespace, we need to move the definition below it in the // file. - virtual size_t GetAllocatedSize(void* ptr); + virtual size_t GetAllocatedSize(const void* ptr); // This duplicates some of the logic in GetSizeWithCallback, but is // faster. This is important on OS X, where this function is called @@ -1147,8 +1147,8 @@ inline void do_free(void* ptr) { // NOTE: some logic here is duplicated in GetOwnership (above), for // speed. If you change this function, look at that one too. -inline size_t GetSizeWithCallback(void* ptr, - size_t (*invalid_getsize_fn)(void*)) { +inline size_t GetSizeWithCallback(const void* ptr, + size_t (*invalid_getsize_fn)(const void*)) { if (ptr == NULL) return 0; const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; @@ -1173,7 +1173,7 @@ inline size_t GetSizeWithCallback(void* ptr, inline void* do_realloc_with_callback( void* old_ptr, size_t new_size, void (*invalid_free_fn)(void*), - size_t (*invalid_get_size_fn)(void*)) { + size_t (*invalid_get_size_fn)(const void*)) { // Get the size of the old entry const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); @@ -1438,7 +1438,7 @@ void* cpp_memalign(size_t align, size_t size) { } // end unnamed namespace // As promised, the definition of this function, declared above. -size_t TCMallocImplementation::GetAllocatedSize(void* ptr) { +size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) { ASSERT(TCMallocImplementation::GetOwnership(ptr) != TCMallocImplementation::kNotOwned); return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); diff --git a/src/tests/sampler_test.cc b/src/tests/sampler_test.cc index 31c87cd..c55d5dc 100755 --- a/src/tests/sampler_test.cc +++ b/src/tests/sampler_test.cc @@ -357,7 +357,7 @@ bool CheckMean(size_t mean, int num_samples) { } double empirical_mean = total / static_cast<double>(num_samples); double expected_sd = mean / pow(num_samples * 1.0, 0.5); - return(abs(mean-empirical_mean) < expected_sd * kSigmas); + return(fabs(mean-empirical_mean) < expected_sd * kSigmas); } // Prints a sequence so you can look at the distribution @@ -409,8 +409,8 @@ TEST(Sampler, LargeAndSmallAllocs_CombinedTest) { size_small, kSamplingInterval); LOG(INFO) << StringPrintf("large_allocs_sds = %f\n", large_allocs_sds); LOG(INFO) << StringPrintf("small_allocs_sds = %f\n", small_allocs_sds); - CHECK_LE(abs(large_allocs_sds), kSigmas); - CHECK_LE(abs(small_allocs_sds), kSigmas); + CHECK_LE(fabs(large_allocs_sds), kSigmas); + CHECK_LE(fabs(small_allocs_sds), kSigmas); } // Tests whether the mean is about right over 1000 samples diff --git a/src/thread_cache.cc b/src/thread_cache.cc index 39f2424..d6dead3 100644 --- a/src/thread_cache.cc +++ b/src/thread_cache.cc @@ -86,7 +86,7 @@ bool kernel_supports_tls = false; // be conservative # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too void CheckIfKernelSupportsTLS() { struct utsname buf; - if (uname(&buf) != 0) { // should be impossible + if (uname(&buf) < 0) { // should be impossible Log(kLog, __FILE__, __LINE__, "uname failed assuming no TLS support (errno)", errno); kernel_supports_tls = false; diff --git a/src/windows/ia32_opcode_map.cc b/src/windows/ia32_opcode_map.cc index c9ec18b..ba6a79e 100644 --- a/src/windows/ia32_opcode_map.cc +++ b/src/windows/ia32_opcode_map.cc @@ -111,6 +111,25 @@ const Opcode s_first_opcode_byte[] = { /* 0x3D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x3E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x3F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aas", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#ifdef _M_X64 + /* REX Prefixes in 64-bit mode. */ + /* 0x40 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#else /* 0x40 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x41 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x42 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, @@ -127,6 +146,7 @@ const Opcode s_first_opcode_byte[] = { /* 0x4D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x4E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x4F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#endif /* 0x50 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x51 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0x52 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, @@ -231,6 +251,16 @@ const Opcode s_first_opcode_byte[] = { /* 0xB5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xB6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xB7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#ifdef _M_X64 + /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#else /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, @@ -239,6 +269,7 @@ const Opcode s_first_opcode_byte[] = { /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#endif /* 0xC0 */ { 6, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xC1 */ { 7, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, /* 0xC2 */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, diff --git a/src/windows/mini_disassembler.cc b/src/windows/mini_disassembler.cc index 30bdcc1..9e336ba 100644 --- a/src/windows/mini_disassembler.cc +++ b/src/windows/mini_disassembler.cc @@ -100,6 +100,12 @@ InstructionType MiniDisassembler::Disassemble( void MiniDisassembler::Initialize() { operand_is_32_bits_ = operand_default_is_32_bits_; address_is_32_bits_ = address_default_is_32_bits_; +#ifdef _M_X64 + operand_default_support_64_bits_ = true; +#else + operand_default_support_64_bits_ = false; +#endif + operand_is_64_bits_ = false; operand_bytes_ = 0; have_modrm_ = false; should_decode_modrm_ = false; @@ -129,6 +135,8 @@ InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, got_f3_prefix_ = true; else if (0x66 == (*start_byte)) got_66_prefix_ = true; + else if (operand_default_support_64_bits_ && (*start_byte) & 0x48) + operand_is_64_bits_ = true; instruction_type = opcode.type_; size ++; @@ -314,8 +322,12 @@ bool MiniDisassembler::ProcessOperand(int flag_operand) { // floating point succeeded = false; break; - case OT_V: // Word or doubleword, depending on operand-size attribute. - if (operand_is_32_bits_) + case OT_V: // Word, doubleword or quadword, depending on operand-size + // attribute. + if (operand_is_64_bits_ && flag_operand & AM_I && + flag_operand & IOS_64) + operand_bytes_ += OS_QUAD_WORD; + else if (operand_is_32_bits_) operand_bytes_ += OS_DOUBLE_WORD; else operand_bytes_ += OS_WORD; diff --git a/src/windows/mini_disassembler.h b/src/windows/mini_disassembler.h index e676232..59a5d08 100644 --- a/src/windows/mini_disassembler.h +++ b/src/windows/mini_disassembler.h @@ -165,6 +165,12 @@ class MiniDisassembler { // Default address size is 32 bits if true, 16 bits if false. bool address_default_is_32_bits_; + // Determines if 64 bit operands are supported (x64). + bool operand_default_support_64_bits_; + + // Current operand size is 64 bits if true, 32 bits if false. + bool operand_is_64_bits_; + // Huge big opcode table based on the IA-32 manual, defined // in Ia32OpcodeMap.cc static const OpcodeTable s_ia32_opcode_map_[]; diff --git a/src/windows/mini_disassembler_types.h b/src/windows/mini_disassembler_types.h index 7f8e997..83dee8b 100644 --- a/src/windows/mini_disassembler_types.h +++ b/src/windows/mini_disassembler_types.h @@ -143,6 +143,16 @@ enum OperandType { OT_ADDRESS_MODE_M = 0x80000000 }; +// Flag that indicates if an immediate operand is 64-bits. +// +// The Intel 64 and IA-32 Architecture Software Developer's Manual currently +// defines MOV as the only instruction supporting a 64-bit immediate operand. +enum ImmediateOperandSize { + IOS_MASK = 0x0000F000, + IOS_DEFAULT = 0x0, + IOS_64 = 0x00001000 +}; + // Everything that's in an Opcode (see below) except the three // alternative opcode structs for different prefixes. struct SpecificOpcode { @@ -154,8 +164,8 @@ struct SpecificOpcode { InstructionType type_; // Description of the type of the dest, src and aux operands, - // put together from an enOperandType flag and an enAddressingMethod - // flag. + // put together from enOperandType, enAddressingMethod and + // enImmediateOperandSize flags. int flag_dest_; int flag_source_; int flag_aux_; diff --git a/src/windows/patch_functions.cc b/src/windows/patch_functions.cc index f837e7a..3ff6eb5 100644 --- a/src/windows/patch_functions.cc +++ b/src/windows/patch_functions.cc @@ -181,6 +181,8 @@ class LibcInfo { kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow, // These are windows-only functions from malloc.h k_Msize, k_Expand, + // A MS CRT "internal" function, implemented using _calloc_impl + k_CallocCrt, kNumFunctions }; @@ -404,7 +406,7 @@ const char* const LibcInfo::function_name_[] = { NULL, // kMangledNewArrayNothrow, NULL, // kMangledDeleteNothrow, NULL, // kMangledDeleteArrayNothrow, - "_msize", "_expand", + "_msize", "_expand", "_calloc_crt", }; // For mingw, I can't patch the new/delete here, because the @@ -435,6 +437,7 @@ const GenericFnPtr LibcInfo::static_fn_[] = { #endif (GenericFnPtr)&::_msize, (GenericFnPtr)&::_expand, + (GenericFnPtr)&::calloc, }; template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = { @@ -457,6 +460,7 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { (GenericFnPtr)&Perftools_deletearray_nothrow, (GenericFnPtr)&Perftools__msize, (GenericFnPtr)&Perftools__expand, + (GenericFnPtr)&Perftools_calloc, }; /*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = { diff --git a/src/windows/preamble_patcher.cc b/src/windows/preamble_patcher.cc index 78a4763..b27a95b 100644 --- a/src/windows/preamble_patcher.cc +++ b/src/windows/preamble_patcher.cc @@ -29,6 +29,7 @@ * * --- * Author: Joi Sigurdsson + * Author: Scott Francis * * Implementation of PreamblePatcher */ @@ -46,18 +47,42 @@ #define ASM_JMP32ABS_0 0xFF #define ASM_JMP32ABS_1 0x25 #define ASM_JMP8REL 0xEB +#define ASM_JCC32REL_0 0x0F +#define ASM_JCC32REL_1_MASK 0x80 +#define ASM_NOP 0x90 +// X64 opcodes +#define ASM_REXW 0x48 +#define ASM_MOVRAX_IMM 0xB8 +#define ASM_JMP 0xFF +#define ASM_JMP_RAX 0xE0 namespace sidestep { +PreamblePatcher::PreamblePage* PreamblePatcher::preamble_pages_ = NULL; +long PreamblePatcher::granularity_ = 0; +long PreamblePatcher::pagesize_ = 0; +bool PreamblePatcher::initialized_ = false; + +static const unsigned int kPreamblePageMagic = 0x4347414D; // "MAGC" + // Handle a special case that we see with functions that point into an // IAT table (including functions linked statically into the // application): these function already starts with ASM_JMP32*. For // instance, malloc() might be implemented as a JMP to __malloc(). // This function follows the initial JMPs for us, until we get to the // place where the actual code is defined. If we get to STOP_BEFORE, -// we return the address before stop_before. +// we return the address before stop_before. The stop_before_trampoline +// flag is used in 64-bit mode. If true, we will return the address +// before a trampoline is detected. Trampolines are defined as: +// +// nop +// mov rax, <replacement_function> +// jmp rax +// +// See PreamblePatcher::RawPatchWithStub for more information. void* PreamblePatcher::ResolveTargetImpl(unsigned char* target, - unsigned char* stop_before) { + unsigned char* stop_before, + bool stop_before_trampoline) { if (target == NULL) return NULL; while (1) { @@ -81,15 +106,26 @@ void* PreamblePatcher::ResolveTargetImpl(unsigned char* target, // Visual studio seems to sometimes do it this way instead of the // previous way. Not sure what the rules are, but it was happening // with operator new in some binaries. - void **new_target_v; - SIDESTEP_ASSERT(sizeof(new_target) == 4); - memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4); + void** new_target_v; + if (kIs64BitBinary) { + // In 64-bit mode JMPs are RIP-relative, not absolute + int target_offset; + memcpy(reinterpret_cast<void*>(&target_offset), + reinterpret_cast<void*>(target + 2), 4); + new_target_v = reinterpret_cast<void**>(target + target_offset + 6); + } else { + SIDESTEP_ASSERT(sizeof(new_target) == 4); + memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4); + } new_target = reinterpret_cast<unsigned char*>(*new_target_v); } else { break; } if (new_target == stop_before) break; + if (stop_before_trampoline && *new_target == ASM_NOP + && new_target[1] == ASM_REXW && new_target[2] == ASM_MOVRAX_IMM) + break; target = new_target; } return target; @@ -103,7 +139,7 @@ class DeleteUnsignedCharArray { ~DeleteUnsignedCharArray() { if (array_) { - delete [] array_; + PreamblePatcher::FreePreambleBlock(array_); } } @@ -191,9 +227,23 @@ SideStepError PreamblePatcher::RawPatch(void* target_function, return SIDESTEP_INVALID_PARAMETER; } - // @see MAX_PREAMBLE_STUB_SIZE for an explanation of how we arrives at - // this size - unsigned char* preamble_stub = new unsigned char[MAX_PREAMBLE_STUB_SIZE]; + BOOL succeeded = FALSE; + + // First, deal with a special case that we see with functions that + // point into an IAT table (including functions linked statically + // into the application): these function already starts with + // ASM_JMP32REL. For instance, malloc() might be implemented as a + // JMP to __malloc(). In that case, we replace the destination of + // the JMP (__malloc), rather than the JMP itself (malloc). This + // way we get the correct behavior no matter how malloc gets called. + void* new_target = ResolveTarget(target_function); + if (new_target != target_function) { + target_function = new_target; + } + + // In 64-bit mode, preamble_stub must be within 2GB of target function + // so that if target contains a jump, we can translate it. + unsigned char* preamble_stub = AllocPreambleBlockNear(target_function); if (!preamble_stub) { SIDESTEP_ASSERT(false && "Unable to allocate preamble-stub."); return SIDESTEP_INSUFFICIENT_BUFFER; @@ -202,19 +252,6 @@ SideStepError PreamblePatcher::RawPatch(void* target_function, // Frees the array at end of scope. DeleteUnsignedCharArray guard_preamble_stub(preamble_stub); - // Change the protection of the newly allocated preamble stub to - // PAGE_EXECUTE_READWRITE. This is required to work with DEP (Data - // Execution Prevention) which will cause an exception if code is executed - // from a page on which you do not have read access. - DWORD old_stub_protect = 0; - BOOL succeeded = ::VirtualProtect(preamble_stub, MAX_PREAMBLE_STUB_SIZE, - PAGE_EXECUTE_READWRITE, &old_stub_protect); - if (!succeeded) { - SIDESTEP_ASSERT(false && - "Failed to make page preamble stub read-write-execute."); - return SIDESTEP_ACCESS_DENIED; - } - SideStepError error_code = RawPatchWithStubAndProtections( target_function, replacement_function, preamble_stub, MAX_PREAMBLE_STUB_SIZE, NULL); @@ -260,23 +297,6 @@ SideStepError PreamblePatcher::Unpatch(void* target_function, return SIDESTEP_INVALID_PARAMETER; } - // We disassemble the preamble of the _stub_ to see how many bytes we - // originally copied to the stub. - MiniDisassembler disassembler; - unsigned int preamble_bytes = 0; - while (preamble_bytes < 5) { - InstructionType instruction_type = - disassembler.Disassemble( - reinterpret_cast<unsigned char*>(original_function_stub) + - preamble_bytes, - preamble_bytes); - if (IT_GENERIC != instruction_type) { - SIDESTEP_ASSERT(false && - "Should only have generic instructions in stub!!"); - return SIDESTEP_UNSUPPORTED_INSTRUCTION; - } - } - // Before unpatching, target_function should be a JMP to // replacement_function. If it's not, then either it's an error, or // we're falling into the case where the original instruction was a @@ -286,7 +306,8 @@ SideStepError PreamblePatcher::Unpatch(void* target_function, unsigned char* target = reinterpret_cast<unsigned char*>(target_function); target = reinterpret_cast<unsigned char*>( ResolveTargetImpl( - target, reinterpret_cast<unsigned char*>(replacement_function))); + target, reinterpret_cast<unsigned char*>(replacement_function), + true)); // We should end at the function we patched. When we patch, we insert // a ASM_JMP32REL instruction, so look for that as a sanity check. if (target[0] != ASM_JMP32REL) { @@ -295,11 +316,13 @@ SideStepError PreamblePatcher::Unpatch(void* target_function, return SIDESTEP_INVALID_PARAMETER; } + const unsigned int kRequiredTargetPatchBytes = 5; + // We need to be able to write to a process-local copy of the first - // MAX_PREAMBLE_STUB_SIZE bytes of target_function + // kRequiredTargetPatchBytes bytes of target_function DWORD old_target_function_protect = 0; - BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), - MAX_PREAMBLE_STUB_SIZE, + BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target), + kRequiredTargetPatchBytes, PAGE_EXECUTE_READWRITE, &old_target_function_protect); if (!succeeded) { @@ -308,20 +331,67 @@ SideStepError PreamblePatcher::Unpatch(void* target_function, return SIDESTEP_ACCESS_DENIED; } - // Replace the first few bytes of the original function with the bytes we - // previously moved to the preamble stub. - memcpy(reinterpret_cast<void*>(target), - original_function_stub, preamble_bytes); + unsigned char* preamble_stub = reinterpret_cast<unsigned char*>( + original_function_stub); - // Stub is now useless so delete it. - // [csilvers: Commented out for perftools because it causes big problems - // when we're unpatching malloc. We just let this live on as a leak.] - //delete [] reinterpret_cast<unsigned char*>(original_function_stub); + // Disassemble the preamble of stub and copy the bytes back to target. + // If we've done any conditional jumps in the preamble we need to convert + // them back to the orignal REL8 jumps in the target. + MiniDisassembler disassembler; + unsigned int preamble_bytes = 0; + unsigned int target_bytes = 0; + while (target_bytes < kRequiredTargetPatchBytes) { + unsigned int cur_bytes = 0; + InstructionType instruction_type = + disassembler.Disassemble(preamble_stub + preamble_bytes, cur_bytes); + if (IT_JUMP == instruction_type) { + unsigned int jump_bytes = 0; + SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION; + if (IsNearConditionalJump(preamble_stub + preamble_bytes, cur_bytes) || + IsNearRelativeJump(preamble_stub + preamble_bytes, cur_bytes) || + IsNearAbsoluteCall(preamble_stub + preamble_bytes, cur_bytes) || + IsNearRelativeCall(preamble_stub + preamble_bytes, cur_bytes)) { + jump_ret = PatchNearJumpOrCall(preamble_stub + preamble_bytes, + cur_bytes, target + target_bytes, + &jump_bytes, MAX_PREAMBLE_STUB_SIZE); + } + if (jump_ret == SIDESTEP_JUMP_INSTRUCTION) { + SIDESTEP_ASSERT(false && + "Found unsupported jump instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + target_bytes += jump_bytes; + } else if (IT_GENERIC == instruction_type) { + if (IsMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes)) { + unsigned int mov_bytes = 0; + if (PatchMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes, + target + target_bytes, &mov_bytes, + MAX_PREAMBLE_STUB_SIZE) + != SIDESTEP_SUCCESS) { + SIDESTEP_ASSERT(false && + "Found unsupported generic instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + } else { + memcpy(reinterpret_cast<void*>(target + target_bytes), + reinterpret_cast<void*>(reinterpret_cast<unsigned char*>( + original_function_stub) + preamble_bytes), cur_bytes); + target_bytes += cur_bytes; + } + } else { + SIDESTEP_ASSERT(false && + "Found unsupported instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + preamble_bytes += cur_bytes; + } - // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of + FreePreambleBlock(reinterpret_cast<unsigned char*>(original_function_stub)); + + // Restore the protection of the first kRequiredTargetPatchBytes bytes of // target to what they were before we started goofing around. succeeded = ::VirtualProtect(reinterpret_cast<void*>(target), - MAX_PREAMBLE_STUB_SIZE, + kRequiredTargetPatchBytes, old_target_function_protect, &old_target_function_protect); @@ -341,4 +411,274 @@ SideStepError PreamblePatcher::Unpatch(void* target_function, return SIDESTEP_SUCCESS; } +void PreamblePatcher::Initialize() { + if (!initialized_) { + SYSTEM_INFO si = { 0 }; + ::GetSystemInfo(&si); + granularity_ = si.dwAllocationGranularity; + pagesize_ = si.dwPageSize; + initialized_ = true; + } +} + +unsigned char* PreamblePatcher::AllocPreambleBlockNear(void* target) { + PreamblePage* preamble_page = preamble_pages_; + while (preamble_page != NULL) { + if (preamble_page->free_ != NULL) { + __int64 val = reinterpret_cast<__int64>(preamble_page) - + reinterpret_cast<__int64>(target); + if ((val > 0 && val + pagesize_ <= INT_MAX) || + (val < 0 && val >= INT_MIN)) { + break; + } + } + preamble_page = preamble_page->next_; + } + + // The free_ member of the page is used to store the next available block + // of memory to use or NULL if there are no chunks available, in which case + // we'll allocate a new page. + if (preamble_page == NULL || preamble_page->free_ == NULL) { + // Create a new preamble page and initialize the free list + preamble_page = reinterpret_cast<PreamblePage*>(AllocPageNear(target)); + SIDESTEP_ASSERT(preamble_page != NULL && "Could not allocate page!"); + void** pp = &preamble_page->free_; + unsigned char* ptr = reinterpret_cast<unsigned char*>(preamble_page) + + MAX_PREAMBLE_STUB_SIZE; + unsigned char* limit = reinterpret_cast<unsigned char*>(preamble_page) + + pagesize_; + while (ptr < limit) { + *pp = ptr; + pp = reinterpret_cast<void**>(ptr); + ptr += MAX_PREAMBLE_STUB_SIZE; + } + *pp = NULL; + // Insert the new page into the list + preamble_page->magic_ = kPreamblePageMagic; + preamble_page->next_ = preamble_pages_; + preamble_pages_ = preamble_page; + } + unsigned char* ret = reinterpret_cast<unsigned char*>(preamble_page->free_); + preamble_page->free_ = *(reinterpret_cast<void**>(preamble_page->free_)); + return ret; +} + +void PreamblePatcher::FreePreambleBlock(unsigned char* block) { + SIDESTEP_ASSERT(block != NULL); + SIDESTEP_ASSERT(granularity_ != 0); + uintptr_t ptr = reinterpret_cast<uintptr_t>(block); + ptr -= ptr & (granularity_ - 1); + PreamblePage* preamble_page = reinterpret_cast<PreamblePage*>(ptr); + SIDESTEP_ASSERT(preamble_page->magic_ == kPreamblePageMagic); + *(reinterpret_cast<void**>(block)) = preamble_page->free_; + preamble_page->free_ = block; +} + +void* PreamblePatcher::AllocPageNear(void* target) { + MEMORY_BASIC_INFORMATION mbi = { 0 }; + if (!::VirtualQuery(target, &mbi, sizeof(mbi))) { + SIDESTEP_ASSERT(false && "VirtualQuery failed on target address"); + return 0; + } + if (initialized_ == false) { + PreamblePatcher::Initialize(); + SIDESTEP_ASSERT(initialized_); + } + void* pv = NULL; + unsigned char* allocation_base = reinterpret_cast<unsigned char*>( + mbi.AllocationBase); + __int64 i = 1; + bool high_target = reinterpret_cast<__int64>(target) > UINT_MAX; + while (pv == NULL) { + __int64 val = reinterpret_cast<__int64>(allocation_base) - + (i * granularity_); + if (high_target && + reinterpret_cast<__int64>(target) - val > INT_MAX) { + // We're further than 2GB from the target + break; + } else if (val <= NULL) { + // Less than 0 + break; + } + pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base - + (i++ * granularity_)), + pagesize_, MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + } + + // We couldn't allocate low, try to allocate high + if (pv == NULL) { + i = 1; + // Round up to the next multiple of page granularity + allocation_base = reinterpret_cast<unsigned char*>( + (reinterpret_cast<__int64>(target) & + (~(granularity_ - 1))) + granularity_); + while (pv == NULL) { + __int64 val = reinterpret_cast<__int64>(allocation_base) + + (i * granularity_) - reinterpret_cast<__int64>(target); + if (val > INT_MAX || val < 0) { + // We're too far or we overflowed + break; + } + pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base + + (i++ * granularity_)), + pagesize_, MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + } + } + return pv; +} + +bool PreamblePatcher::IsShortConditionalJump( + unsigned char* target, + unsigned int instruction_size) { + return (*(target) & 0x70) == 0x70 && instruction_size == 2; +} + +bool PreamblePatcher::IsNearConditionalJump( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xf && (*(target + 1) & 0x80) == 0x80 && + instruction_size == 6; +} + +bool PreamblePatcher::IsNearRelativeJump( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xe9 && instruction_size == 5; +} + +bool PreamblePatcher::IsNearAbsoluteCall( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xff && (*(target + 1) & 0x10) == 0x10 && + instruction_size == 6; +} + +bool PreamblePatcher::IsNearRelativeCall( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xe8 && instruction_size == 5; +} + +bool PreamblePatcher::IsMovWithDisplacement( + unsigned char* target, + unsigned int instruction_size) { + // In this case, the ModRM byte's mod field will be 0 and r/m will be 101b (5) + return instruction_size == 7 && *target == 0x48 && *(target + 1) == 0x8b && + (*(target + 2) >> 6) == 0 && (*(target + 2) & 0x7) == 5; +} + +SideStepError PreamblePatcher::PatchShortConditionalJump( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + unsigned char* original_jump_dest = (source + 2) + source[1]; + unsigned char* stub_jump_from = target + 6; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up short jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + *target_bytes = 6; + if (target_size > *target_bytes) { + // Convert the short jump to a near jump. + // + // 0f 8x xx xx xx xx = Jcc rel32off + unsigned short jmpcode = ((0x80 | (source[0] & 0xf)) << 8) | 0x0f; + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(&jmpcode), 2); + memcpy(reinterpret_cast<void*>(target + 2), + reinterpret_cast<void*>(&fixup_jump_offset), 4); + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchNearJumpOrCall( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + SIDESTEP_ASSERT(instruction_size == 5 || instruction_size == 6); + unsigned int jmp_offset_in_instruction = instruction_size == 5 ? 1 : 2; + unsigned char* original_jump_dest = reinterpret_cast<unsigned char *>( + reinterpret_cast<__int64>(source + instruction_size) + + *(reinterpret_cast<int*>(source + jmp_offset_in_instruction))); + unsigned char* stub_jump_from = target + instruction_size; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up near jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + if ((fixup_jump_offset < SCHAR_MAX && fixup_jump_offset > SCHAR_MIN)) { + *target_bytes = 2; + if (target_size > *target_bytes) { + // If the new offset is in range, use a short jump instead of a near jump. + if (source[0] == ASM_JCC32REL_0 && + (source[1] & ASM_JCC32REL_1_MASK) == ASM_JCC32REL_1_MASK) { + unsigned short jmpcode = (static_cast<unsigned char>( + fixup_jump_offset) << 8) | (0x70 | (source[1] & 0xf)); + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(&jmpcode), + 2); + } else { + target[0] = ASM_JMP8REL; + target[1] = static_cast<unsigned char>(fixup_jump_offset); + } + } + } else { + *target_bytes = instruction_size; + if (target_size > *target_bytes) { + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(source), + jmp_offset_in_instruction); + memcpy(reinterpret_cast<void*>(target + jmp_offset_in_instruction), + reinterpret_cast<void*>(&fixup_jump_offset), + 4); + } + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchMovWithDisplacement( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + SIDESTEP_ASSERT(instruction_size == 7); + const int mov_offset_in_instruction = 3; // 0x48 0x8b 0x0d <offset> + unsigned char* original_mov_dest = reinterpret_cast<unsigned char*>( + reinterpret_cast<__int64>(source + instruction_size) + + *(reinterpret_cast<int*>(source + mov_offset_in_instruction))); + unsigned char* stub_mov_from = target + instruction_size; + __int64 fixup_mov_offset = original_mov_dest - stub_mov_from; + if (fixup_mov_offset > INT_MAX || fixup_mov_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up near MOV because target is too far away."); + return SIDESTEP_UNEXPECTED; + } + *target_bytes = instruction_size; + if (target_size > *target_bytes) { + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(source), + mov_offset_in_instruction); + memcpy(reinterpret_cast<void*>(target + mov_offset_in_instruction), + reinterpret_cast<void*>(&fixup_mov_offset), + 4); + } + return SIDESTEP_SUCCESS; +} + }; // namespace sidestep diff --git a/src/windows/preamble_patcher.h b/src/windows/preamble_patcher.h index 0028e4e..50946bd 100644 --- a/src/windows/preamble_patcher.h +++ b/src/windows/preamble_patcher.h @@ -29,6 +29,7 @@ * * --- * Author: Joi Sigurdsson + * Author: Scott Francis * * Definition of PreamblePatcher */ @@ -47,7 +48,25 @@ // bytes of the function. Considering the worst case scenario, we need 4 // bytes + the max instruction size + 5 more bytes for our jump back to // the original code. With that in mind, 32 is a good number :) +#ifdef _M_X64 +// In 64-bit mode we may need more room. In 64-bit mode all jumps must be +// within +/-2GB of RIP. Because of this limitation we may need to use a +// trampoline to jump to the replacement function if it is further than 2GB +// away from the target. The trampoline is 14 bytes. +// +// So 4 bytes + max instruction size (17 bytes) + 5 bytes to jump back to the +// original code + trampoline size. 64 bytes is a nice number :-) +#define MAX_PREAMBLE_STUB_SIZE (64) +#else #define MAX_PREAMBLE_STUB_SIZE (32) +#endif + +// Determines if this is a 64-bit binary. +#ifdef _M_X64 +static const bool kIs64BitBinary = true; +#else +static const bool kIs64BitBinary = false; +#endif namespace sidestep { @@ -68,6 +87,8 @@ enum SideStepError { #define SIDESTEP_TO_HRESULT(error) \ MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error) +class DeleteUnsignedCharArray; + // Implements a patching mechanism that overwrites the first few bytes of // a function preamble with a jump to our hook function, which is then // able to call the original function via a specially-made preamble-stub @@ -287,7 +308,55 @@ class PreamblePatcher { return (T)ResolveTargetImpl((unsigned char*)target_function, NULL); } + // Allocates a block of memory of size MAX_PREAMBLE_STUB_SIZE that is as + // close (within 2GB) as possible to target. This is done to ensure that + // we can perform a relative jump from target to a trampoline if the + // replacement function is > +-2GB from target. This means that we only need + // to patch 5 bytes in the target function. + // + // @param target Pointer to target function. + // + // @return Returns a block of memory of size MAX_PREAMBLE_STUB_SIZE that can + // be used to store a function preamble block. + static unsigned char* AllocPreambleBlockNear(void* target); + + // Frees a block allocated by AllocPreambleBlockNear. + // + // @param block Block that was returned by AllocPreambleBlockNear. + static void FreePreambleBlock(unsigned char* block); + private: + friend class DeleteUnsignedCharArray; + + // Used to store data allocated for preamble stubs + struct PreamblePage { + unsigned int magic_; + PreamblePage* next_; + // This member points to a linked list of free blocks within the page + // or NULL if at the end + void* free_; + }; + + // In 64-bit mode, the replacement function must be within 2GB of the original + // target in order to only require 5 bytes for the function patch. To meet + // this requirement we're creating an allocator within this class to + // allocate blocks that are within 2GB of a given target. This member is the + // head of a linked list of pages used to allocate blocks that are within + // 2GB of the target. + static PreamblePage* preamble_pages_; + + // Page granularity + static long granularity_; + + // Page size + static long pagesize_; + + // Determines if the patcher has been initialized. + static bool initialized_; + + // Used to initialize static members. + static void Initialize(); + // Patches a function by overwriting its first few bytes with // a jump to a different function. This is similar to the RawPatch // function except that it uses the stub allocated by the caller @@ -318,7 +387,7 @@ class PreamblePatcher { // @return An error code indicating the result of patching. static SideStepError RawPatchWithStubAndProtections( void* target_function, - void *replacement_function, + void* replacement_function, unsigned char* preamble_stub, unsigned long stub_size, unsigned long* bytes_needed); @@ -348,7 +417,7 @@ class PreamblePatcher { // // @return An error code indicating the result of patching. static SideStepError RawPatchWithStub(void* target_function, - void *replacement_function, + void* replacement_function, unsigned char* preamble_stub, unsigned long stub_size, unsigned long* bytes_needed); @@ -365,12 +434,175 @@ class PreamblePatcher { // target_function, we get to the address stop, we return // immediately, the address that jumps to stop_before. // + // @param stop_before_trampoline When following JMP instructions from + // target_function, stop before a trampoline is detected. See comment in + // PreamblePatcher::RawPatchWithStub for more information. This parameter + // has no effect in 32-bit mode. + // // @return Either target_function (the input parameter), or if // target_function's body consists entirely of a JMP instruction, // the address it JMPs to (or more precisely, the address at the end // of a chain of JMPs). static void* ResolveTargetImpl(unsigned char* target_function, - unsigned char* stop_before); + unsigned char* stop_before, + bool stop_before_trampoline = false); + + // Helper routine that attempts to allocate a page as close (within 2GB) + // as possible to target. + // + // @param target Pointer to target function. + // + // @return Returns an address that is within 2GB of target. + static void* AllocPageNear(void* target); + + // Helper routine that determines if a target instruction is a short + // conditional jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a short conditional jump. + static bool IsShortConditionalJump(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // conditional jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near conditional jump. + static bool IsNearConditionalJump(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // relative jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute jump. + static bool IsNearRelativeJump(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // absolute call. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute call. + static bool IsNearAbsoluteCall(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // absolute call. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute call. + static bool IsNearRelativeCall(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a 64-bit MOV + // that uses a RIP-relative displacement. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a MOV with displacement. + static bool IsMovWithDisplacement(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that converts a short conditional jump instruction + // to a near conditional jump in a target buffer. Note that the target + // buffer must be within 2GB of the source for the near jump to work. + // + // A short conditional jump instruction is in the format: + // 7x xx = Jcc rel8off + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchShortConditionalJump(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + // Helper routine that converts an instruction that will convert various + // jump-like instructions to corresponding instructions in the target buffer. + // What this routine does is fix up the relative offsets contained in jump + // instructions to point back to the original target routine. Like with + // PatchShortConditionalJump, the target buffer must be within 2GB of the + // source. + // + // We currently handle the following instructions: + // + // E9 xx xx xx xx = JMP rel32off + // 0F 8x xx xx xx xx = Jcc rel32off + // FF /2 xx xx xx xx = CALL reg/mem32/mem64 + // E8 xx xx xx xx = CALL rel32off + // + // It should not be hard to update this function to support other + // instructions that jump to relative targets. + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchNearJumpOrCall(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + // Helper routine that patches a 64-bit MOV instruction with a RIP-relative + // displacement. The target buffer must be within 2GB of the source. + // + // 48 8B 0D XX XX XX XX = MOV rel32off + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchMovWithDisplacement(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); }; }; // namespace sidestep diff --git a/src/windows/preamble_patcher_test.cc b/src/windows/preamble_patcher_test.cc new file mode 100644 index 0000000..2506cfb --- /dev/null +++ b/src/windows/preamble_patcher_test.cc @@ -0,0 +1,356 @@ +/* Copyright (c) 2011, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Unit tests for PreamblePatcher + */
+
+#include "preamble_patcher.h"
+#include "mini_disassembler.h"
+#pragma warning(push)
+#pragma warning(disable:4553)
+#include "auto_testing_hook.h"
+#pragma warning(pop)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+
+// Turning off all optimizations for this file, since the official build's
+// "Whole program optimization" seems to cause the TestPatchUsingDynamicStub
+// test to crash with an access violation. We debugged this and found
+// that the optimized access a register that is changed by a call to the hook
+// function.
+#pragma optimize("", off)
+
+namespace {
+
+// Function for testing - this is what we patch
+//
+// NOTE: Because of the way the compiler optimizes this function in
+// release builds, we need to use a different input value every time we
+// call it within a function, otherwise the compiler will just reuse the
+// last calculated incremented value.
+int __declspec(noinline) IncrementNumber(int i) {
+#ifdef _M_X64
+ __int64 i2 = i + 1;
+ return (int) i2;
+#else
+ return i + 1;
+#endif
+}
+
+extern "C" int TooShortFunction(int);
+
+extern "C" int JumpShortCondFunction(int);
+
+extern "C" int JumpNearCondFunction(int);
+
+extern "C" int JumpAbsoluteFunction(int);
+
+extern "C" int CallNearRelativeFunction(int);
+
+typedef int (*IncrementingFunc)(int);
+IncrementingFunc original_function = NULL;
+
+int HookIncrementNumber(int i) {
+ SIDESTEP_ASSERT(original_function != NULL);
+ int incremented_once = original_function(i);
+ return incremented_once + 1;
+}
+
+// For the AutoTestingHook test, we can't use original_function, because
+// all that is encapsulated.
+// This function "increments" by 10, just to set it apart from the other
+// functions.
+int __declspec(noinline) AutoHookIncrementNumber(int i) {
+ return i + 10;
+}
+
+}; // namespace
+
+namespace sidestep {
+
+bool TestDisassembler() {
+ unsigned int instruction_size = 0;
+ sidestep::MiniDisassembler disassembler;
+ void * target = reinterpret_cast<unsigned char *>(IncrementNumber);
+ void * new_target = PreamblePatcher::ResolveTarget(target);
+ if (target != new_target)
+ target = new_target;
+
+ while (1) {
+ sidestep::InstructionType instructionType = disassembler.Disassemble(
+ reinterpret_cast<unsigned char *>(target) + instruction_size,
+ instruction_size);
+ if (sidestep::IT_RETURN == instructionType) {
+ return true;
+ }
+ }
+}
+
+bool TestPatchWithLongJump() {
+ original_function = NULL;
+ void *p = ::VirtualAlloc(reinterpret_cast<void *>(0x0000020000000000), 4096,
+ MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+ SIDESTEP_EXPECT_TRUE(p != NULL);
+ memset(p, 0xcc, 4096);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ (IncrementingFunc) p,
+ &original_function));
+ SIDESTEP_ASSERT((*original_function)(1) == 2);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(IncrementNumber,
+ (IncrementingFunc) p,
+ original_function));
+ ::VirtualFree(p, 0, MEM_RELEASE);
+ return true;
+}
+
+bool TestPatchWithPreambleShortCondJump() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(JumpShortCondFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(JumpShortCondFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchWithPreambleNearRelativeCondJump() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(JumpNearCondFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(0);
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(JumpNearCondFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchWithPreambleAbsoluteJump() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(JumpAbsoluteFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(0);
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(JumpAbsoluteFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchWithPreambleNearRelativeCall() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(
+ CallNearRelativeFunction,
+ HookIncrementNumber,
+ &original_function));
+ (*original_function)(0);
+ (*original_function)(1);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(
+ CallNearRelativeFunction,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool TestPatchUsingDynamicStub() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ HookIncrementNumber,
+ &original_function));
+ SIDESTEP_EXPECT_TRUE(original_function);
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 4);
+ SIDESTEP_EXPECT_TRUE(original_function(3) == 4);
+
+ // Clearbox test to see that the function has been patched.
+ sidestep::MiniDisassembler disassembler;
+ unsigned int instruction_size = 0;
+ SIDESTEP_EXPECT_TRUE(sidestep::IT_JUMP == disassembler.Disassemble(
+ reinterpret_cast<unsigned char*>(IncrementNumber),
+ instruction_size));
+
+ // Since we patched IncrementNumber, its first statement is a
+ // jmp to the hook function. So verify that we now can not patch
+ // IncrementNumber because it starts with a jump.
+#if 0
+ IncrementingFunc dummy = NULL;
+ // TODO(joi@chromium.org): restore this test once flag is added to
+ // disable JMP following
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_JUMP_INSTRUCTION ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ HookIncrementNumber,
+ &dummy));
+
+ // This test disabled because code in preamble_patcher_with_stub.cc
+ // asserts before returning the error code -- so there is no way
+ // to get an error code here, in debug build.
+ dummy = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_FUNCTION_TOO_SMALL ==
+ sidestep::PreamblePatcher::Patch(TooShortFunction,
+ HookIncrementNumber,
+ &dummy));
+#endif
+
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(IncrementNumber,
+ HookIncrementNumber,
+ original_function));
+ return true;
+}
+
+bool PatchThenUnpatch() {
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Patch(IncrementNumber,
+ HookIncrementNumber,
+ &original_function));
+ SIDESTEP_EXPECT_TRUE(original_function);
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 3);
+ SIDESTEP_EXPECT_TRUE(original_function(2) == 3);
+
+ SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+ sidestep::PreamblePatcher::Unpatch(IncrementNumber,
+ HookIncrementNumber,
+ original_function));
+ original_function = NULL;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+ return true;
+}
+
+bool AutoTestingHookTest() {
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+
+ // Inner scope, so we can test what happens when the AutoTestingHook
+ // goes out of scope
+ {
+ AutoTestingHook hook = MakeTestingHook(IncrementNumber,
+ AutoHookIncrementNumber);
+ (void) hook;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12);
+ }
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+ return true;
+}
+
+bool AutoTestingHookInContainerTest() {
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+
+ // Inner scope, so we can test what happens when the AutoTestingHook
+ // goes out of scope
+ {
+ AutoTestingHookHolder hook(MakeTestingHookHolder(IncrementNumber,
+ AutoHookIncrementNumber));
+ (void) hook;
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12);
+ }
+ SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+ return true;
+}
+
+bool TestPreambleAllocation() {
+ __int64 diff = 0;
+ void* p1 = reinterpret_cast<void*>(0x110000000);
+ void* p2 = reinterpret_cast<void*>(0x810000000);
+ unsigned char* b1 = PreamblePatcher::AllocPreambleBlockNear(p1);
+ SIDESTEP_EXPECT_TRUE(b1 != NULL);
+ diff = reinterpret_cast<__int64>(p1) - reinterpret_cast<__int64>(b1);
+ // Ensure blocks are within 2GB
+ SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN);
+ unsigned char* b2 = PreamblePatcher::AllocPreambleBlockNear(p2);
+ SIDESTEP_EXPECT_TRUE(b2 != NULL);
+ diff = reinterpret_cast<__int64>(p2) - reinterpret_cast<__int64>(b2);
+ SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN);
+
+ // Ensure we're reusing free blocks
+ unsigned char* b3 = b1;
+ unsigned char* b4 = b2;
+ PreamblePatcher::FreePreambleBlock(b1);
+ PreamblePatcher::FreePreambleBlock(b2);
+ b1 = PreamblePatcher::AllocPreambleBlockNear(p1);
+ SIDESTEP_EXPECT_TRUE(b1 == b3);
+ b2 = PreamblePatcher::AllocPreambleBlockNear(p2);
+ SIDESTEP_EXPECT_TRUE(b2 == b4);
+ PreamblePatcher::FreePreambleBlock(b1);
+ PreamblePatcher::FreePreambleBlock(b2);
+
+ return true;
+}
+
+bool UnitTests() {
+ return TestPatchWithPreambleNearRelativeCall() &&
+ TestPatchWithPreambleAbsoluteJump() &&
+ TestPatchWithPreambleNearRelativeCondJump() &&
+ TestPatchWithPreambleShortCondJump() &&
+ TestDisassembler() && TestPatchWithLongJump() &&
+ TestPatchUsingDynamicStub() && PatchThenUnpatch() &&
+ AutoTestingHookTest() && AutoTestingHookInContainerTest() &&
+ TestPreambleAllocation();
+}
+
+}; // namespace sidestep
+
+int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
+ if (size == 0) // not even room for a \0?
+ return -1; // not what C99 says to do, but what windows does
+ str[size-1] = '\0';
+ return _vsnprintf(str, size-1, format, ap);
+}
+
+int _tmain(int argc, _TCHAR* argv[])
+{
+ bool ret = sidestep::UnitTests();
+ printf("%s\n", ret ? "PASS" : "FAIL");
+ return ret ? 0 : -1;
+}
+
+#pragma optimize("", on)
diff --git a/src/windows/preamble_patcher_with_stub.cc b/src/windows/preamble_patcher_with_stub.cc index 4eb391d..b0dc393 100644 --- a/src/windows/preamble_patcher_with_stub.cc +++ b/src/windows/preamble_patcher_with_stub.cc @@ -29,6 +29,7 @@ * * --- * Author: Joi Sigurdsson + * Author: Scott Francis * * Implementation of PreamblePatcher */ @@ -40,12 +41,20 @@ // Definitions of assembly statements we need #define ASM_JMP32REL 0xE9 #define ASM_INT3 0xCC +#define ASM_NOP 0x90 +// X64 opcodes +#define ASM_MOVRAX_IMM 0xB8 +#define ASM_REXW 0x48 +#define ASM_JMP 0xFF +#define ASM_JMP_RAX 0xE0 +#define ASM_PUSH 0x68 +#define ASM_RET 0xC3 namespace sidestep { SideStepError PreamblePatcher::RawPatchWithStub( void* target_function, - void *replacement_function, + void* replacement_function, unsigned char* preamble_stub, unsigned long stub_size, unsigned long* bytes_needed) { @@ -75,23 +84,52 @@ SideStepError PreamblePatcher::RawPatchWithStub( // doing it atomically does not help if one of the other threads happens // to have its eip in the middle of the bytes you change while you change // them. + unsigned char* target = reinterpret_cast<unsigned char*>(target_function); + unsigned int required_trampoline_bytes = 0; + const unsigned int kRequiredStubJumpBytes = 5; + const unsigned int kRequiredTargetPatchBytes = 5; - // First, deal with a special case that we see with functions that - // point into an IAT table (including functions linked statically - // into the application): these function already starts with - // ASM_JMP32REL. For instance, malloc() might be implemented as a - // JMP to __malloc(). In that case, we replace the destination of - // the JMP (__malloc), rather than the JMP itself (malloc). This - // way we get the correct behavior no matter how malloc gets called. - void *new_target = ResolveTarget(target_function); - if (new_target != target_function) { // we're in the IAT case - // I'd like to just say "target = new_target", but I can't, - // because the new target will need to have its protections set. - return RawPatchWithStubAndProtections(new_target, replacement_function, - preamble_stub, stub_size, - bytes_needed); + // Initialize the stub with INT3's just in case. + if (stub_size) { + memset(preamble_stub, 0xcc, stub_size); + } + if (kIs64BitBinary) { + // In 64-bit mode JMP instructions are always relative to RIP. If the + // replacement - target offset is > 2GB, we can't JMP to the replacement + // function. In this case, we're going to use a trampoline - that is, + // we're going to do a relative jump to a small chunk of code in the stub + // that will then do the absolute jump to the replacement function. By + // doing this, we only need to patch 5 bytes in the target function, as + // opposed to patching 12 bytes if we were to do an absolute jump. + // + // Note that the first byte of the trampoline is a NOP instruction. This + // is used as a trampoline signature that will be detected when unpatching + // the function. + // + // jmp <trampoline> + // + // trampoline: + // nop + // mov rax, <replacement_function> + // jmp rax + // + __int64 replacement_target_offset = reinterpret_cast<__int64>( + replacement_function) - reinterpret_cast<__int64>(target) - 5; + if (replacement_target_offset > INT_MAX + || replacement_target_offset < INT_MIN) { + // The stub needs to be within 2GB of the target for the trampoline to + // work! + __int64 trampoline_offset = reinterpret_cast<__int64>(preamble_stub) + - reinterpret_cast<__int64>(target) - 5; + if (trampoline_offset > INT_MAX || trampoline_offset < INT_MIN) { + // We're screwed. + SIDESTEP_ASSERT(false + && "Preamble stub is too far from target to patch."); + return SIDESTEP_UNEXPECTED; + } + required_trampoline_bytes = 13; + } } - unsigned char* target = reinterpret_cast<unsigned char*>(new_target); // Let's disassemble the preamble of the target function to see if we can // patch, and to see how much of the preamble we need to take. We need 5 @@ -99,42 +137,76 @@ SideStepError PreamblePatcher::RawPatchWithStub( // instructions to get 5 bytes. MiniDisassembler disassembler; unsigned int preamble_bytes = 0; - while (preamble_bytes < 5) { + unsigned int stub_bytes = 0; + while (preamble_bytes < kRequiredTargetPatchBytes) { + unsigned int cur_bytes = 0; InstructionType instruction_type = - disassembler.Disassemble(target + preamble_bytes, preamble_bytes); + disassembler.Disassemble(target + preamble_bytes, cur_bytes); if (IT_JUMP == instruction_type) { - SIDESTEP_ASSERT(false && - "Unable to patch because there is a jump instruction " - "in the first 5 bytes."); - return SIDESTEP_JUMP_INSTRUCTION; + unsigned int jump_bytes = 0; + SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION; + if (IsShortConditionalJump(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchShortConditionalJump(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, + &jump_bytes, + stub_size - stub_bytes); + } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) || + IsNearRelativeJump(target + preamble_bytes, cur_bytes) || + IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) || + IsNearRelativeCall(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchNearJumpOrCall(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, &jump_bytes, + stub_size - stub_bytes); + } + if (jump_ret != SIDESTEP_SUCCESS) { + SIDESTEP_ASSERT(false && + "Unable to patch because there is an unhandled branch " + "instruction in the initial preamble bytes."); + return SIDESTEP_JUMP_INSTRUCTION; + } + stub_bytes += jump_bytes; } else if (IT_RETURN == instruction_type) { SIDESTEP_ASSERT(false && "Unable to patch because function is too short"); return SIDESTEP_FUNCTION_TOO_SMALL; - } else if (IT_GENERIC != instruction_type) { + } else if (IT_GENERIC == instruction_type) { + if (IsMovWithDisplacement(target + preamble_bytes, cur_bytes)) { + unsigned int mov_bytes = 0; + if (PatchMovWithDisplacement(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, &mov_bytes, + stub_size - stub_bytes) + != SIDESTEP_SUCCESS) { + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + stub_bytes += mov_bytes; + } else { + memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes), + reinterpret_cast<void*>(target + preamble_bytes), cur_bytes); + stub_bytes += cur_bytes; + } + } else { SIDESTEP_ASSERT(false && "Disassembler encountered unsupported instruction " "(either unused or unknown"); return SIDESTEP_UNSUPPORTED_INSTRUCTION; } + preamble_bytes += cur_bytes; } if (NULL != bytes_needed) - *bytes_needed = preamble_bytes + 5; + *bytes_needed = stub_bytes + kRequiredStubJumpBytes + + required_trampoline_bytes; // Inv: cbPreamble is the number of bytes (at least 5) that we need to take // from the preamble to have whole instructions that are 5 bytes or more - // in size total. The size of the stub required is cbPreamble + size of - // jmp (5) - if (preamble_bytes + 5 > stub_size) { + // in size total. The size of the stub required is cbPreamble + + // kRequiredStubJumpBytes (5) + required_trampoline_bytes (0 or 13) + if (stub_bytes + kRequiredStubJumpBytes + required_trampoline_bytes + > stub_size) { SIDESTEP_ASSERT(false); return SIDESTEP_INSUFFICIENT_BUFFER; } - // First, copy the preamble that we will overwrite. - memcpy(reinterpret_cast<void*>(preamble_stub), - reinterpret_cast<void*>(target), preamble_bytes); - // Now, make a jmp instruction to the rest of the target function (minus the // preamble bytes we moved into the stub) and copy it into our preamble-stub. // find address to jump to, relative to next address after jmp instruction @@ -144,16 +216,32 @@ SideStepError PreamblePatcher::RawPatchWithStub( #endif int relative_offset_to_target_rest = ((reinterpret_cast<unsigned char*>(target) + preamble_bytes) - - (preamble_stub + preamble_bytes + 5)); + (preamble_stub + stub_bytes + kRequiredStubJumpBytes)); #ifdef _MSC_VER #pragma warning(pop) #endif // jmp (Jump near, relative, displacement relative to next instruction) - preamble_stub[preamble_bytes] = ASM_JMP32REL; + preamble_stub[stub_bytes] = ASM_JMP32REL; // copy the address - memcpy(reinterpret_cast<void*>(preamble_stub + preamble_bytes + 1), + memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes + 1), reinterpret_cast<void*>(&relative_offset_to_target_rest), 4); + if (kIs64BitBinary && required_trampoline_bytes != 0) { + // Construct the trampoline + unsigned int trampoline_pos = stub_bytes + kRequiredStubJumpBytes; + preamble_stub[trampoline_pos] = ASM_NOP; + preamble_stub[trampoline_pos + 1] = ASM_REXW; + preamble_stub[trampoline_pos + 2] = ASM_MOVRAX_IMM; + memcpy(reinterpret_cast<void*>(preamble_stub + trampoline_pos + 3), + reinterpret_cast<void*>(&replacement_function), + sizeof(void *)); + preamble_stub[trampoline_pos + 11] = ASM_JMP; + preamble_stub[trampoline_pos + 12] = ASM_JMP_RAX; + + // Now update replacement_function to point to the trampoline + replacement_function = preamble_stub + trampoline_pos; + } + // Inv: preamble_stub points to assembly code that will execute the // original function by first executing the first cbPreamble bytes of the // preamble, then jumping to the rest of the function. @@ -177,6 +265,7 @@ SideStepError PreamblePatcher::RawPatchWithStub( // complete the jmp instruction memcpy(reinterpret_cast<void*>(target + 1), reinterpret_cast<void*>(&offset_to_replacement_function), 4); + // Set any remaining bytes that were moved to the preamble-stub to INT3 so // as not to cause confusion (otherwise you might see some strange // instructions if you look at the disassembly, or even invalid @@ -184,8 +273,9 @@ SideStepError PreamblePatcher::RawPatchWithStub( // some code calls into this portion of the code. If this happens, it // means that this function cannot be patched using this patcher without // further thought. - if (preamble_bytes > 5) { - memset(reinterpret_cast<void*>(target + 5), ASM_INT3, preamble_bytes - 5); + if (preamble_bytes > kRequiredTargetPatchBytes) { + memset(reinterpret_cast<void*>(target + kRequiredTargetPatchBytes), + ASM_INT3, preamble_bytes - kRequiredTargetPatchBytes); } // Inv: The memory pointed to by target_function now points to a relative @@ -193,7 +283,13 @@ SideStepError PreamblePatcher::RawPatchWithStub( // stub contains the first stub_size bytes of the original target // function's preamble code, followed by a relative jump back to the next // instruction after the first cbPreamble bytes. - + // + // In 64-bit mode the memory pointed to by target_function *may* point to a + // relative jump instruction that jumps to a trampoline which will then + // perform an absolute jump to the replacement function. The preamble stub + // still contains the original target function's preamble code, followed by a + // jump back to the instructions after the first preamble bytes. + // return SIDESTEP_SUCCESS; } diff --git a/src/windows/shortproc.asm b/src/windows/shortproc.asm new file mode 100644 index 0000000..757fd43 --- /dev/null +++ b/src/windows/shortproc.asm @@ -0,0 +1,167 @@ +; Copyright (c) 2011, Google Inc. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above +; copyright notice, this list of conditions and the following disclaimer +; in the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Google Inc. nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; +; --- +; Author: Scott Francis +; +; Unit tests for PreamblePatcher
+
+.CODE
+
+TooShortFunction PROC
+ ret
+TooShortFunction ENDP
+
+JumpShortCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+jumpspot:
+ nop
+ nop
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpShortCondFunction ENDP
+
+JumpNearCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpNearCondFunction ENDP
+
+JumpAbsoluteFunction PROC
+ test cl, 1
+ jmp jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpAbsoluteFunction ENDP
+
+CallNearRelativeFunction PROC
+ test cl, 1
+ call TooShortFunction
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ nop
+ nop
+ nop
+ ret
+CallNearRelativeFunction ENDP
+
+END
|