summaryrefslogtreecommitdiff
path: root/sysdeps/s390/s390-64/dl-trampoline.S
diff options
context:
space:
mode:
authorStefan Liebler <stli@linux.vnet.ibm.com>2016-03-31 17:37:16 +0200
committerStefan Liebler <stli@linux.vnet.ibm.com>2016-03-31 17:37:16 +0200
commit4603c51ef7989d7eb800cdd6f42aab206f891077 (patch)
treec15dc0e82a1b8b16eac6fdef4fef8c70450921eb /sysdeps/s390/s390-64/dl-trampoline.S
parente91bd7465816f474617dcb4bbfe72f3594c5783c (diff)
downloadglibc-4603c51ef7989d7eb800cdd6f42aab206f891077.tar.gz
S390: Save and restore fprs/vrs while resolving symbols.
On s390, no fpr/vrs were saved while resolving a symbol via _dl_runtime_resolve/_dl_runtime_profile. According to the abi, the fpr-arguments are defined as call clobbered. In leaf-functions, gcc 4.9 and newer can use fprs for saving/restoring gprs instead of saving them to the stack. If gcc do this in one of the resolver-functions, then the floating point arguments of a library-function are invalid for the first library-function-call. Thus, this patch saves/restores the fprs around the resolving code. The same could occur for vector registers. Furthermore an ifunc-resolver could also clobber the vector/floating point argument registers. Thus this patch provides the further variants _dl_runtime_resolve_vx/ _dl_runtime_profile_vx, which are used if the kernel claims, that we run on a machine with vector registers. Furthermore, if _dl_runtime_profile calls _dl_call_pltexit, the pointers to inregs-/outregs-structs were setup invalid. Now they point to the correct location in the stack-frame. Before branching back to the caller, the return values are now restored instead of containing the return values of the _dl_call_pltexit() call. On s390-32, an endless loop occurs if _dl_call_pltexit() should be called. Now, this code-path branches to this function instead of just after the preceding basr-instruction. ChangeLog: * sysdeps/s390/s390-32/dl-trampoline.S: Include dl-trampoline.h twice to create a non-vector/vector version for _dl_runtime_resolve and _dl_runtime_profile. Move implementation to ... * sysdeps/s390/s390-32/dl-trampoline.h: ... here. (_dl_runtime_resolve) Save and restore fpr/vrs. (_dl_runtime_profile) Save and restore vrs and fix some issues if _dl_call_pltexit is called. * sysdeps/s390/s390-32/dl-machine.h (elf_machine_runtime_setup): Choose the correct resolver function if running on a machine with vx. * sysdeps/s390/s390-64/dl-trampoline.S: Include dl-trampoline.h twice to create a non-vector/vector version for _dl_runtime_resolve and _dl_runtime_profile. Move implementation to ... * sysdeps/s390/s390-64/dl-trampoline.h: ... here. (_dl_runtime_resolve) Save and restore fpr/vrs. (_dl_runtime_profile) Save and restore vrs and fix some issues * sysdeps/s390/s390-64/dl-machine.h: (elf_machine_runtime_setup): Choose the correct resolver function if running on a machine with vx.
Diffstat (limited to 'sysdeps/s390/s390-64/dl-trampoline.S')
-rw-r--r--sysdeps/s390/s390-64/dl-trampoline.S130
1 files changed, 11 insertions, 119 deletions
diff --git a/sysdeps/s390/s390-64/dl-trampoline.S b/sysdeps/s390/s390-64/dl-trampoline.S
index 6919ed0138..1b0c9e2680 100644
--- a/sysdeps/s390/s390-64/dl-trampoline.S
+++ b/sysdeps/s390/s390-64/dl-trampoline.S
@@ -16,126 +16,18 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* The PLT stubs will call _dl_runtime_resolve/_dl_runtime_profile
- * with the following linkage:
- * r2 - r6 : parameter registers
- * f0, f2, f4, f6 : floating point parameter registers
- * 48(r15), 56(r15) : PLT arguments PLT1, PLT2
- * 160(r15) : additional stack parameters
- * The normal clobber rules for function calls apply:
- * r0 - r5 : call clobbered
- * r6 - r13 : call saved
- * r14 : return address (call clobbered)
- * r15 : stack pointer (call saved)
- * f1, f3, f5, f7 : call saved
- * f0 - f3, f5, f7 - f15 : call clobbered
- */
-
#include <sysdep.h>
.text
- .globl _dl_runtime_resolve
- .type _dl_runtime_resolve, @function
- cfi_startproc
- .align 16
-_dl_runtime_resolve:
- stmg %r2,%r5,64(15) # save call-clobbered argument registers
- stg %r14,96(15)
- cfi_offset (r14, -64)
- lgr %r0,%r15
- aghi %r15,-160 # create stack frame
- cfi_adjust_cfa_offset (160)
- stg %r0,0(%r15) # write backchain
- lmg %r2,%r3,208(%r15)# load args saved by PLT
- brasl %r14,_dl_fixup # call fixup
- lgr %r1,%r2 # function addr returned in r2
- aghi %r15,160 # remove stack frame
- cfi_adjust_cfa_offset (-160)
- lg %r14,96(15) # restore registers
- lmg %r2,%r5,64(15)
- br %r1
- cfi_endproc
- .size _dl_runtime_resolve, .-_dl_runtime_resolve
-
-
-#ifndef PROF
- .globl _dl_runtime_profile
- .type _dl_runtime_profile, @function
- cfi_startproc
- .align 16
-_dl_runtime_profile:
- stmg %r2,%r6,64(%r15) # save call-clobbered arg regs
- std %f0,104(%r15) # + r6 needed as arg for
- std %f2,112(%r15) # _dl_profile_fixup
- std %f4,120(%r15)
- std %f6,128(%r15)
- stg %r12,24(%r15) # r12 is used as backup of r15
- stg %r14,32(%r15)
- cfi_offset (r6, -96)
- cfi_offset (f0, -56)
- cfi_offset (f2, -48)
- cfi_offset (f4, -40)
- cfi_offset (f6, -32)
- cfi_offset (r12, -136)
- cfi_offset (r14, -128)
- lgr %r12,%r15 # backup stack pointer
- cfi_def_cfa_register (12)
- aghi %r15,-160 # create stack frame
- stg %r12,0(%r15) # save backchain
- lmg %r2,%r3,48(%r12) # load arguments saved by PLT
- lgr %r4,%r14 # return address as third parameter
- la %r5,64(%r12) # pointer to struct La_s390_32_regs
- la %r6,40(%r12) # long int * framesize
- brasl %r14,_dl_profile_fixup # call resolver
- lgr %r1,%r2 # function addr returned in r2
- lg %r0,40(%r12) # load framesize
- ltgr %r0,%r0
- jnm 1f
-
- lmg %r2,%r6,64(%r12) # framesize < 0 means no pltexit call
- ld %f0,104(%r12) # so we can do a tail call without
- ld %f2,112(%r12) # copying the arg overflow area
- ld %f4,120(%r12)
- ld %f6,128(%r12)
-
- lgr %r15,%r12 # remove stack frame
- cfi_def_cfa_register (15)
- lg %r14,32(%r15) # restore registers
- lg %r12,24(%r15)
- br %r1 # tail-call to resolved function
-
- cfi_def_cfa_register (12)
-1: jz 4f # framesize == 0 ?
- aghi %r0,7 # align framesize to 8
- nill %r0,0xfff8
- slgr %r15,%r0 # make room for framesize bytes
- stg %r12,0(%r15)
- la %r2,160(%r15)
- la %r3,160(%r12)
- srlg %r0,%r0,3
-3: mvc 0(8,%r2),0(%r3) # copy additional parameters
- la %r2,8(%r2)
- la %r3,8(%r3)
- brctg %r0,3b
-4: lmg %r2,%r6,64(%r12) # load register parameters
- ld %f0,104(%r12) # restore call-clobbered arg regs
- ld %f2,112(%r12)
- ld %f4,120(%r12)
- ld %f6,128(%r12)
- basr %r14,%r1 # call resolved function
- stg %r2,136(%r12)
- std %f0,144(%r12)
- lmg %r2,%r3,48(%r12) # load arguments saved by PLT
- la %r4,32(%r12) # pointer to struct La_s390_32_regs
- la %r5,72(%r12) # pointer to struct La_s390_32_retval
- brasl %r14,_dl_call_pltexit
-
- lgr %r15,%r12 # remove stack frame
- cfi_def_cfa_register (15)
- lg %r14,32(%r15) # restore registers
- lg %r12,24(%r15)
- br %r14
-
- cfi_endproc
- .size _dl_runtime_profile, .-_dl_runtime_profile
+/* Create variant of _dl_runtime_resolve/profile for machines before z13.
+ No vector registers are saved/restored. */
+#include <dl-trampoline.h>
+
+#if defined HAVE_S390_VX_ASM_SUPPORT
+/* Create variant of _dl_runtime_resolve/profile for z13 and newer.
+ The vector registers are saved/restored, too.*/
+# define _dl_runtime_resolve _dl_runtime_resolve_vx
+# define _dl_runtime_profile _dl_runtime_profile_vx
+# define RESTORE_VRS
+# include <dl-trampoline.h>
#endif