summaryrefslogtreecommitdiff
path: root/sysdeps/aarch64/tst-auditmod28.c
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2021-08-04 15:30:56 +0000
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2021-09-10 15:07:38 -0300
commitc8315ccd30fcecc1b93a9bc3f073010190a86e05 (patch)
tree5f8cc1af7a951206e25b72eeca7875561f024a4f /sysdeps/aarch64/tst-auditmod28.c
parent171fdd4bd4f337001db053721477add60d205ed8 (diff)
downloadglibc-c8315ccd30fcecc1b93a9bc3f073010190a86e05.tar.gz
elf: Add SVE support for aarch64 rtld-auditazanella/ld-audit-fixes
To implement lazy binding is enabled when profiling or auditing used, even when STO_AARCH64_VARIANT_PCS is set. Also, to not incur in performance penalties on architecture without SVE, the PLT entrypoint is set to a newer one, _dl_runtime_profile_sve, which is used iff 'hwcap' has HWCAP_SVE bit set. This should be a fair assumption since SVE has a defined set of registers for argument passing and return values. A new ABI with either different argument passing or different registers would require a different PLT entry, but I assume this would require another symbol flag anyway (or at least a different ELF mark to indicate so). The profile '_dl_runtime_profile_sve' entrypoint assumes the largest SVE register size possible (2048 bits) and thus it requires a quite large stack (8976 bytes). I think it would be possible make the stack requirement dynamic depending of the vector length, but it would make the PLT audit function way more complex. It extends the La_aarch64_vector with a long double pointer to a stack alloced buffer to hold the SVE Z register, along with a pointer to hold the P registers on La_aarch64_regs. It means the if 'lr_sve' is 0 in either La_aarch64_regs or La_aarch64_retval the La_aarch64_vector contains the floating-pointer registers that can be accessed directly (non SVE hardware). Otherwise, 'La_aarch64_vector.z' points to a memory area that holds up to 'lr_sve' bytes for the Z registers, which can be loaded with svld1 intrinsic for instance (as tst-audit28.c does). The P register follows the same logic, with each La_aarch64_regs.lr_sve_pregs pointing to an area of memory 'lr_sve/8' in size. So, to access the FP register as float you can use: static inline float regs_vec_to_float (const La_aarch64_regs *regs, int idx) { float r; if (regs->lr_sve == 0) r = regs->lr_vreg[idx].s; else memcpy (&r, &regs->lr_vreg[idx].z[0], sizeof (r)); return r; } This patch is not complete yet: the tst-audit28 does not check if compiler supports SVE (we would need a configure check to disable for such case), I need to add a proper comment for the _dl_runtime_profile_sve stack layout, the test need to check for the P register state clobbering. I also haven't check the performance penalties with this approach, and maybe the way I am saving/restoring the SVE register might be optimized. In any case, I checked on a SVE machine and at least the testcase work as expected without any regressions. I also did a sniff test on a non SVE machine.
Diffstat (limited to 'sysdeps/aarch64/tst-auditmod28.c')
-rw-r--r--sysdeps/aarch64/tst-auditmod28.c193
1 files changed, 193 insertions, 0 deletions
diff --git a/sysdeps/aarch64/tst-auditmod28.c b/sysdeps/aarch64/tst-auditmod28.c
new file mode 100644
index 0000000000..53a2162bfb
--- /dev/null
+++ b/sysdeps/aarch64/tst-auditmod28.c
@@ -0,0 +1,193 @@
+/* Check DT_AUDIT for aarch64 specific ABI.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <assert.h>
+#include <link.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "tst-audit28mod.h"
+
+#define TEST_NAME "tst-audit28"
+#define TEST_FUNC "tst_audit28_func"
+
+#define AUDIT28_COOKIE 0
+
+unsigned int
+la_version (unsigned int v)
+{
+ return v;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *p = strrchr (map->l_name, '/');
+ const char *l_name = p == NULL ? map->l_name : p + 1;
+ uintptr_t ck = -1;
+ if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0)
+ ck = AUDIT28_COOKIE;
+ *cookie = ck;
+ printf ("objopen: %ld, %s [%ld]\n", lmid, l_name, ck);
+ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+ElfW(Addr)
+la_aarch64_gnu_pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_aarch64_regs *regs,
+ unsigned int *flags, const char *symname,
+ long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+ printf (" regs->lr_sve=%d\n", regs->lr_sve);
+ if (regs->lr_sve > 0)
+ for (int i = 0; i < array_length (regs->lr_vreg); i++)
+ printf (" inregs->lr_vreg[%d]=%p\n", i, regs->lr_vreg[i].z);
+
+
+ if (strcmp (symname, TEST_FUNC "_sve_args") == 0)
+ {
+ svint8_t z0 = svld1_s8 (svptrue_b8 (),
+ (const int8_t *) regs->lr_vreg[0].z);
+ svint16_t z1 = svld1_s16 (svptrue_b16 (),
+ (const int16_t *) regs->lr_vreg[1].z);
+ svint32_t z2 = svld1_s32 (svptrue_b32 (),
+ (const int32_t *) regs->lr_vreg[2].z);
+ svint64_t z3 = svld1_s64 (svptrue_b64 (),
+ (const int64_t *) regs->lr_vreg[3].z);
+ svuint8_t z4 = svld1_u8 (svptrue_b8 (),
+ (const uint8_t *) regs->lr_vreg[4].z);
+ svuint16_t z5 = svld1_u16 (svptrue_b16 (),
+ (const uint16_t *) regs->lr_vreg[5].z);
+ svuint32_t z6 = svld1_u32 (svptrue_b32 (),
+ (const uint32_t *) regs->lr_vreg[6].z);
+ svuint64_t z7 = svld1_u64 (svptrue_b64 (),
+ (const uint64_t *) regs->lr_vreg[7].z);
+ assert (svptest_any (svptrue_b8 (), svcmpeq_s8 (svptrue_b8 (),
+ z0, sve_args_z0 ())));
+ assert (svptest_any (svptrue_b16 (), svcmpeq_s16 (svptrue_b16 (),
+ z1, sve_args_z1 ())));
+ assert (svptest_any (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (),
+ z2, sve_args_z2 ())));
+ assert (svptest_any (svptrue_b64 (), svcmpeq_s64 (svptrue_b64 (),
+ z3, sve_args_z3 ())));
+ assert (svptest_any (svptrue_b8 (), svcmpeq_u8 (svptrue_b8 (),
+ z4, sve_args_z4 ())));
+ assert (svptest_any (svptrue_b16 (), svcmpeq_u16 (svptrue_b16 (),
+ z5, sve_args_z5 ())));
+ assert (svptest_any (svptrue_b32 (), svcmpeq_u32 (svptrue_b32 (),
+ z6, sve_args_z6 ())));
+ assert (svptest_any (svptrue_b64 (), svcmpeq_u64 (svptrue_b64 (),
+ z7, sve_args_z7 ())));
+ }
+ else
+ abort ();
+
+ /* Clobber the q registers on exit. */
+ uint8_t v = 0xff;
+ asm volatile ("dup z0.b, %w0" : : "r" (v) : "z0");
+ asm volatile ("dup z1.b, %w0" : : "r" (v) : "z1");
+ asm volatile ("dup z2.b, %w0" : : "r" (v) : "z2");
+ asm volatile ("dup z3.b, %w0" : : "r" (v) : "z3");
+ asm volatile ("dup z4.b, %w0" : : "r" (v) : "z4");
+ asm volatile ("dup z5.b, %w0" : : "r" (v) : "z5");
+ asm volatile ("dup z6.b, %w0" : : "r" (v) : "z6");
+ asm volatile ("dup z7.b, %w0" : : "r" (v) : "z7");
+
+ *framesizep = 1024;
+
+ return sym->st_value;
+}
+
+unsigned int
+la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook,
+ const struct La_aarch64_regs *inregs,
+ struct La_aarch64_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n",
+ symname, (long int) sym->st_value, ndx);
+ printf (" inregs->lr_sve=%d\n", inregs->lr_sve);
+ if (inregs->lr_sve > 0)
+ for (int i = 0; i < array_length (inregs->lr_vreg); i++)
+ printf (" inregs->lr_vreg[%d]=%p\n", i, inregs->lr_vreg[i].z);
+ printf (" outregs->lr_sve=%d\n", outregs->lrv_sve);
+ if (outregs->lrv_sve > 0)
+ for (int i = 0; i < array_length (outregs->lrv_vreg); i++)
+ printf (" outregs->lr_vreg[%d]=%p\n", i, outregs->lrv_vreg[i].z);
+
+ if (strcmp (symname, TEST_FUNC "_sve_args") == 0)
+ {
+ svint8_t z0 = svld1_s8 (svptrue_b8 (),
+ (const int8_t *) inregs->lr_vreg[0].z);
+ svint16_t z1 = svld1_s16 (svptrue_b16 (),
+ (const int16_t *) inregs->lr_vreg[1].z);
+ svint32_t z2 = svld1_s32 (svptrue_b32 (),
+ (const int32_t *) inregs->lr_vreg[2].z);
+ svint64_t z3 = svld1_s64 (svptrue_b64 (),
+ (const int64_t *) inregs->lr_vreg[3].z);
+ svuint8_t z4 = svld1_u8 (svptrue_b8 (),
+ (const uint8_t *) inregs->lr_vreg[4].z);
+ svuint16_t z5 = svld1_u16 (svptrue_b16 (),
+ (const uint16_t *) inregs->lr_vreg[5].z);
+ svuint32_t z6 = svld1_u32 (svptrue_b32 (),
+ (const uint32_t *) inregs->lr_vreg[6].z);
+ svuint64_t z7 = svld1_u64 (svptrue_b64 (),
+ (const uint64_t *) inregs->lr_vreg[7].z);
+ assert (svptest_any (svptrue_b8 (), svcmpeq_s8 (svptrue_b8 (),
+ z0, sve_args_z0 ())));
+ assert (svptest_any (svptrue_b16 (), svcmpeq_s16 (svptrue_b16 (),
+ z1, sve_args_z1 ())));
+ assert (svptest_any (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (),
+ z2, sve_args_z2 ())));
+ assert (svptest_any (svptrue_b64 (), svcmpeq_s64 (svptrue_b64 (),
+ z3, sve_args_z3 ())));
+ assert (svptest_any (svptrue_b8 (), svcmpeq_u8 (svptrue_b8 (),
+ z4, sve_args_z4 ())));
+ assert (svptest_any (svptrue_b16 (), svcmpeq_u16 (svptrue_b16 (),
+ z5, sve_args_z5 ())));
+ assert (svptest_any (svptrue_b32 (), svcmpeq_u32 (svptrue_b32 (),
+ z6, sve_args_z6 ())));
+ assert (svptest_any (svptrue_b64 (), svcmpeq_u64 (svptrue_b64 (),
+ z7, sve_args_z7 ())));
+
+ svint8_t r0 = svld1_s8 (svptrue_b8 (),
+ (const int8_t *) outregs->lrv_vreg[0].z);
+ assert (svptest_any (svptrue_b8 (), svcmpeq_s8 (svptrue_b8 (),
+ r0, sve_ret ())));
+ }
+ else
+ abort ();
+
+ /* Clobber the q registers on exit. */
+ uint8_t v = 0xff;
+ asm volatile ("dup z0.b, %w0" : : "r" (v) : "z0");
+ asm volatile ("dup z1.b, %w0" : : "r" (v) : "z1");
+ asm volatile ("dup z2.b, %w0" : : "r" (v) : "z2");
+ asm volatile ("dup z3.b, %w0" : : "r" (v) : "z3");
+ asm volatile ("dup z4.b, %w0" : : "r" (v) : "z4");
+ asm volatile ("dup z5.b, %w0" : : "r" (v) : "z5");
+ asm volatile ("dup z6.b, %w0" : : "r" (v) : "z6");
+ asm volatile ("dup z7.b, %w0" : : "r" (v) : "z7");
+
+ return 0;
+}