summaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c')
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c69
1 files changed, 14 insertions, 55 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
index 48db58ffefd..8dd48462b51 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c
@@ -1,60 +1,19 @@
/* { dg-do assemble } */
-/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */
-#include <stdint.h>
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
-#define NUM_ELEMS(TYPE) (4 * (32 / sizeof (TYPE)))
-#define INVALID_INDEX(TYPE) ((TYPE) 107)
-#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE))
+#include "sve_mask_gather_load_1.c"
-/* TODO: This is a bit ugly for floating point types as it involves FP<>INT
- conversions, but I can't find another way of auto-vectorizing the code to
- make use of SVE gather instructions. */
-#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\
-void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\
- LOOKUPTYPE *__restrict lookup,\
- INDEXTYPE *__restrict index, INDEXTYPE n)\
-{\
- INDEXTYPE i;\
- for (i = 0; i < n; ++i)\
- {\
- LOOKUPTYPE x = index[i];\
- if (IS_VALID_INDEX (LOOKUPTYPE, x))\
- x = lookup[x];\
- out[i] = x;\
- }\
-}\
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int8_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int8_t)
-DEF_MASK_GATHER_LOAD (int32_t, int32_t, int16_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int16_t)
-DEF_MASK_GATHER_LOAD (int64_t, int64_t, int32_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t)
-DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t)
-DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t)
-
-/* At present we only use predicate unpacks when the index type is
- half the size of the result type. */
-/* { dg-final { scan-assembler-times "\tpunpklo\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */
-/* { dg-final { scan-assembler-times "\tpunpkhi\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */
-
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */
-/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */
-
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 6 } } */
-/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 6 } } */
-/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 28 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */