diff options
Diffstat (limited to 'gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c')
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c new file mode 100644 index 00000000000..a834989091d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define INDEX8 uint8_t +#define INDEX16 uint16_t +#define INDEX32 uint32_t +#define INDEX64 uint64_t + +#include "sve_strided_load_1.c" + +/* 8 and 16 bits are signed because the multiplication promotes to int. + Using uxtw for all 9 would be OK. */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */ +/* The 32-bit loop needs to honor the defined overflow in uint32_t, + so we vectorize the offset calculation. This means that the + 64-bit version needs two copies. */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */ |