diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2017-11-20 16:02:55 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@linaro.org> | 2017-11-20 16:02:55 +0000 |
commit | d58952aefb03632bbb5b441d5c0bd330711f0af1 (patch) | |
tree | d046e56bfbd6a40106ae6ab96fafc954f1dfc955 /gcc/testsuite/gcc.target/aarch64 | |
parent | 648f8fc59b2cc39abd24f4c22388b346cdebcc31 (diff) | |
parent | 50221fae802a10fafe95e61d40504a58da33e98f (diff) | |
download | gcc-linaro-dev/sve.tar.gz |
Merge trunk into svelinaro-dev/sve
Diffstat (limited to 'gcc/testsuite/gcc.target/aarch64')
272 files changed, 3993 insertions, 4414 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c b/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c new file mode 100644 index 00000000000..8151387600f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bsl-idiom.c @@ -0,0 +1,88 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-rtl-combine --save-temps" } */ + +/* Test that we don't generate BSL when in DImode with values in integer + registers, and do generate it where we have values in floating-point + registers. This is useful, as it allows us to avoid register moves + in the general case. + + We want: + eor x0, x0, x1 + and x0, x0, x2 + eor x0, x0, x1 + ret + + Rather than: + fmov d2, x0 + fmov d0, x2 + fmov d1, x1 + bsl v0.8b, v2.8b, v1.8b + fmov x0, d0 + ret */ + +extern void abort (void); + +unsigned long long __attribute__ ((noinline)) +foo (unsigned long long a, unsigned long long b, unsigned long long c) +{ + return ((a ^ b) & c) ^ b; +} + +unsigned long long __attribute__ ((noinline)) +foo2 (unsigned long long a, unsigned long long b, unsigned long long c) +{ + return ((a ^ b) & c) ^ a; +} + +#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \ + : "=w"(V1) \ + : "w"(V1) \ + : /* No clobbers */); + +unsigned long long __attribute__ ((noinline)) +bar (unsigned long long a, unsigned long long b, unsigned long long c) +{ + force_simd (a); + force_simd (b); + force_simd (c); + c = ((a ^ b) & c) ^ b; + force_simd (c); + return c; +} + +unsigned long long __attribute__ ((noinline)) +bar2 (unsigned long long a, unsigned long long b, unsigned long long c) +{ + force_simd (a); + force_simd (b); + force_simd (c); + c = ((a ^ b) & c) ^ a; + force_simd (c); + return c; +} + +int +main (int argc, char** argv) +{ + unsigned long long a = 0x0123456789abcdefULL; + unsigned long long b = 0xfedcba9876543210ULL; + unsigned long long c = 0xaabbccddeeff7777ULL; + if (foo (a, b, c) != bar (a, b, c)) + abort (); + if (foo2 (a, b, c) != bar2 (a, b, c)) + abort (); + return 0; +} + +/* 2 BSL, 6 FMOV (to floating-point registers), and 2 FMOV (to general +purpose registers) for the "bar" tests, which should still use BSL. */ +/* { dg-final { scan-assembler-times "bsl\tv\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "fmov\td\[0-9\]" 6 } } */ +/* { dg-final { scan-assembler-times "fmov\tx\[0-9\]" 2 } } */ + +/* { dg-final { scan-assembler-not "bif\tv\[0-9\]" } } */ +/* { dg-final { scan-assembler-not "bit\tv\[0-9\]" } } */ + +/* We always match the idiom during combine. */ +/* { dg-final { scan-rtl-dump-times "aarch64_simd_bsldi_internal" 2 "combine" } } */ +/* { dg-final { scan-rtl-dump-times "aarch64_simd_bsldi_alt" 2 "combine" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c new file mode 100644 index 00000000000..d87f3290828 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/construct_lane_zero_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef long long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +v2di +construct_lanedi (long long *y) +{ + v2di x = +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + { 0, y[0] } +#else + { y[0], 0 } +#endif + ; + return x; +} + +v2df +construct_lanedf (double *y) +{ + v2df x = +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + { 0.0, y[0] } +#else + { y[0], 0.0 } +#endif + ; + return x; +} + +/* Check that creating V2DI and V2DF vectors from a lane with a zero + makes use of the D-reg LDR rather than doing explicit lane inserts. */ + +/* { dg-final { scan-assembler-times "ldr\td\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-not "ins\t" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c b/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c new file mode 100644 index 00000000000..0ec7109c738 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/copysign-bsl.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Test that we can generate DImode BSL when we are using + copysign. */ + +double +foo (double a, double b) +{ + return __builtin_copysign (a, b); +} + +/* { dg-final { scan-assembler "b\(sl|it|if\)\tv\[0-9\]" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c index cce88155aca..ae5b3797021 100644 --- a/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c +++ b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c @@ -3,7 +3,7 @@ /* { dg-options "-O0 -gdwarf-2" } */ /* { dg-final { scan-assembler ".cfi_restore 30" } } */ /* { dg-final { scan-assembler ".cfi_restore 29" } } */ -/* { dg-final { scan-assembler ".cfi_def_cfa 31, 0" } } */ +/* { dg-final { scan-assembler ".cfi_def_cfa_offset 0" } } */ /* { dg-final { scan-assembler "ret" } } */ int bar (unsigned int); diff --git a/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c new file mode 100644 index 00000000000..3c31b340154 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef long long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +v2di +construct_lanedi (long long *y) +{ + v2di x = { y[0], y[1] }; + return x; +} + +v2df +construct_lanedf (double *y) +{ + v2df x = { y[0], y[1] }; + return x; +} + +/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF + values from consecutive memory into a 2-element vector by using + a Q-reg LDR. */ + +/* { dg-final { scan-assembler-times "ldr\tq\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-not "ins\t" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c new file mode 100644 index 00000000000..6810db3c54d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef long long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +void +construct_lane_1 (double *y, v2df *z) +{ + double y0 = y[0] + 1; + double y1 = y[1] + 2; + v2df x = {y0, y1}; + z[2] = x; +} + +void +construct_lane_2 (long long *y, v2di *z) +{ + long long y0 = y[0] + 1; + long long y1 = y[1] + 2; + v2di x = {y0, y1}; + z[2] = x; +} + +/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF + values from consecutive memory into a 2-element vector by using + a Q-reg LDR. */ + +/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-not "ins\t" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c index b22828d621b..c3bf2f326d3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c @@ -36,7 +36,7 @@ LOOP (double) /* { dg-final { scan-assembler-times {\tstr\td[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 4 } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]} 4 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c index a176d9ce251..4651c70afda 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c @@ -1,11 +1,11 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define N 32 /* Simple condition reduction. */ -int +int __attribute__ ((noinline, noclone)) condition_reduction (int *a, int min_v) { int last = 66; /* High start value. */ @@ -17,6 +17,4 @@ condition_reduction (int *a, int min_v) return last; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c index 8e6444e4239..0dcba03b61c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c @@ -1,24 +1,22 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_clastb_1.c" -extern void abort (void) __attribute__ ((noreturn)); - -int +int __attribute__ ((optimize (1))) main (void) { int a[N] = { - 11, -12, 13, 14, 15, 16, 17, 18, 19, 20, - 1, 2, -3, 4, 5, 6, 7, -8, 9, 10, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32 + 11, -12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, -3, 4, 5, 6, 7, -8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 }; int ret = condition_reduction (a, 1); if (ret != 17) - abort (); + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c index dcae41f5425..381cbd17577 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c @@ -1,15 +1,17 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> #if !defined(TYPE) -#define TYPE unsigned int +#define TYPE uint32_t #endif #define N 254 /* Non-simple condition reduction. */ -TYPE +TYPE __attribute__ ((noinline, noclone)) condition_reduction (TYPE *a, TYPE min_v) { TYPE last = 65; @@ -21,7 +23,4 @@ condition_reduction (TYPE *a, TYPE min_v) return last; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ -/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c index 0503ba36c3d..0d5187ba3ae 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c @@ -1,25 +1,23 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_clastb_2.c" -extern void abort (void) __attribute__ ((noreturn)); - -int +int __attribute__ ((optimize (1))) main (void) { unsigned int a[N] = { - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32 + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 }; - __builtin_memset (a+32, 43, (N-32)*sizeof (int)); + __builtin_memset (a + 32, 43, (N - 32) * sizeof (int)); unsigned int ret = condition_reduction (a, 16); if (ret != 10) - abort (); + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c index 1061194a08e..90a3b938593 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c @@ -1,11 +1,8 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TYPE unsigned char +#define TYPE uint8_t #include "sve_clastb_2.c" -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ -/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c index 90c3e4a0cf3..f90fbfc5e9b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c @@ -1,25 +1,23 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_clastb_3.c" -extern void abort (void) __attribute__ ((noreturn)); - -int +int __attribute__ ((optimize (1))) main (void) { unsigned char a[N] = { - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32 + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 }; - __builtin_memset (a+32, 43, N-32); + __builtin_memset (a + 32, 43, N - 32); unsigned char ret = condition_reduction (a, 16); if (ret != 10) - abort (); + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c index 698d958693a..dc01b21c273 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c @@ -1,11 +1,8 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TYPE short +#define TYPE int16_t #include "sve_clastb_2.c" -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ -/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c index d0337ab300d..e17199f3672 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c @@ -5,7 +5,7 @@ extern void abort (void) __attribute__ ((noreturn)); -int +int __attribute__ ((optimize (1))) main (void) { short a[N] = { diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c index 655f95f410a..aef2a80c68f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c @@ -1,11 +1,8 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TYPE long +#define TYPE uint64_t #include "sve_clastb_2.c" -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ -/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ /* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c index 573787233d8..e251db0bb76 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c @@ -1,25 +1,23 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_clastb_5.c" -extern void abort (void) __attribute__ ((noreturn)); - -int +int __attribute__ ((optimize (1))) main (void) { long a[N] = { - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32 + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 }; - __builtin_memset (a+32, 43, (N-32)*sizeof (long)); + __builtin_memset (a + 32, 43, (N - 32) * sizeof (long)); long ret = condition_reduction (a, 16); if (ret != 10) - abort (); + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c index bf1bc1a346a..93fec6396a2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c @@ -1,5 +1,5 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define N 32 @@ -9,7 +9,7 @@ /* Non-integer data types. */ -TYPE +TYPE __attribute__ ((noinline, noclone)) condition_reduction (TYPE *a, TYPE min_v) { TYPE last = 0; @@ -21,8 +21,4 @@ condition_reduction (TYPE *a, TYPE min_v) return last; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ -/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ /* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */ - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c index 4c760daba89..c204ed4c4f0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c @@ -1,24 +1,22 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_clastb_6.c" -extern void abort (void) __attribute__ ((noreturn)); - -int +int __attribute__ ((optimize (1))) main (void) { float a[N] = { - 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20, - 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6, - 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30, - 31.111, 32.322 + 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20, + 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6, + 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30, + 31.111, 32.322 }; float ret = condition_reduction (a, 16.7); - if (ret != (float)10.6) - abort (); + if (ret != (float) 10.6) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c index 12e53b75e8a..d232a87e41d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c @@ -1,11 +1,7 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE double #include "sve_clastb_6.c" -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ -/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ /* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */ - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c index d0001a923e8..2f87a4766e0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c @@ -1,24 +1,22 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_clastb_7.c" -extern void abort (void) __attribute__ ((noreturn)); - -int +int __attribute__ ((optimize (1))) main (void) { double a[N] = { - 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20, - 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6, - 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30, - 31.111, 32.322 + 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20, + 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6, + 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30, + 31.111, 32.322 }; double ret = condition_reduction (a, 16.7); - if (ret != (double)10.6) - abort (); + if (ret != 10.6) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C index 4937e7f10e5..3f30a527cae 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C @@ -1,15 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef signed char v32qi __attribute__((vector_size(32))); +typedef signed char vnx16qi __attribute__((vector_size(32))); -v32qi -foo (v32qi x, v32qi y) +vnx16qi +foo (vnx16qi x, vnx16qi y) { - return (v32qi) { -1, 0, 0, -1, -1, -1, 0, 0, - -1, -1, -1, -1, 0, 0, 0, 0, - -1, -1, -1, -1, -1, -1, -1, -1, - 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y; + return (vnx16qi) { -1, 0, 0, -1, -1, -1, 0, 0, + -1, -1, -1, -1, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y; } /* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C index 3de4a8ccd00..ec8a0ab9d69 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C @@ -1,13 +1,13 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef short v16hi __attribute__((vector_size(32))); +typedef short vnx8hi __attribute__((vector_size(32))); -v16hi -foo (v16hi x, v16hi y) +vnx8hi +foo (vnx8hi x, vnx8hi y) { - return (v16hi) { -1, 0, 0, -1, -1, -1, 0, 0, - -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y; + return (vnx8hi) { -1, 0, 0, -1, -1, -1, 0, 0, + -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y; } /* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C index 8185f7baa76..ab1429d4e40 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C @@ -1,12 +1,12 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef int v8si __attribute__((vector_size(32))); +typedef int vnx4si __attribute__((vector_size(32))); -v8si -foo (v8si x, v8si y) +vnx4si +foo (vnx4si x, vnx4si y) { - return (v8si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y; + return (vnx4si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y; } /* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C index b15da8a59e2..3ad39b9df7d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C @@ -1,12 +1,12 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long long v4di __attribute__((vector_size(32))); +typedef long long vnx2di __attribute__((vector_size(32))); -v4di -foo (v4di x, v4di y) +vnx2di +foo (vnx2di x, vnx2di y) { - return (v4di) { -1, 0, 0, -1 } ? x : y; + return (vnx2di) { -1, 0, 0, -1 } ? x : y; } /* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c index ea977207226..8df86eb6b1b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define MASK_2(X) X, X #define MASK_4(X) MASK_2 (X), MASK_2 (X) @@ -17,10 +17,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_16(X) MASK_8 (X), MASK_8 (X) #define MASK_32(X) MASK_16 (X), MASK_16 (X) -#define INDEX_4 v4di -#define INDEX_8 v8si -#define INDEX_16 v16hi -#define INDEX_32 v32qi +#define INDEX_4 vnx2di +#define INDEX_8 vnx4si +#define INDEX_16 vnx8hi +#define INDEX_32 vnx16qi #define DUP_LANE(TYPE, NUNITS, INDEX) \ TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \ @@ -30,27 +30,27 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (v4di, 4, 0) \ - T (v4di, 4, 2) \ - T (v4di, 4, 3) \ - T (v8si, 8, 0) \ - T (v8si, 8, 5) \ - T (v8si, 8, 7) \ - T (v16hi, 16, 0) \ - T (v16hi, 16, 6) \ - T (v16hi, 16, 15) \ - T (v32qi, 32, 0) \ - T (v32qi, 32, 19) \ - T (v32qi, 32, 31) \ - T (v4df, 4, 0) \ - T (v4df, 4, 2) \ - T (v4df, 4, 3) \ - T (v8sf, 8, 0) \ - T (v8sf, 8, 5) \ - T (v8sf, 8, 7) \ - T (v16hf, 16, 0) \ - T (v16hf, 16, 6) \ - T (v16hf, 16, 15) \ + T (vnx2di, 4, 0) \ + T (vnx2di, 4, 2) \ + T (vnx2di, 4, 3) \ + T (vnx4si, 8, 0) \ + T (vnx4si, 8, 5) \ + T (vnx4si, 8, 7) \ + T (vnx8hi, 16, 0) \ + T (vnx8hi, 16, 6) \ + T (vnx8hi, 16, 15) \ + T (vnx16qi, 32, 0) \ + T (vnx16qi, 32, 19) \ + T (vnx16qi, 32, 31) \ + T (vnx2df, 4, 0) \ + T (vnx2df, 4, 2) \ + T (vnx2df, 4, 3) \ + T (vnx4sf, 8, 0) \ + T (vnx4sf, 8, 5) \ + T (vnx4sf, 8, 7) \ + T (vnx8hf, 16, 0) \ + T (vnx8hf, 16, 6) \ + T (vnx8hf, 16, 15) \ TEST_ALL (DUP_LANE) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c index 1ec51aa2eaf..05bd6dc8f65 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define MASK_2(X) X, X + 1 #define MASK_4(X) MASK_2 (X), MASK_2 (X + 2) @@ -17,10 +17,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_16(X) MASK_8 (X), MASK_8 (X + 8) #define MASK_32(X) MASK_16 (X), MASK_16 (X + 16) -#define INDEX_4 v4di -#define INDEX_8 v8si -#define INDEX_16 v16hi -#define INDEX_32 v32qi +#define INDEX_4 vnx2di +#define INDEX_8 vnx4si +#define INDEX_16 vnx8hi +#define INDEX_32 vnx16qi #define DUP_LANE(TYPE, NUNITS, INDEX) \ TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \ @@ -30,27 +30,27 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (v4di, 4, 1) \ - T (v4di, 4, 2) \ - T (v4di, 4, 3) \ - T (v8si, 8, 1) \ - T (v8si, 8, 5) \ - T (v8si, 8, 7) \ - T (v16hi, 16, 1) \ - T (v16hi, 16, 6) \ - T (v16hi, 16, 15) \ - T (v32qi, 32, 1) \ - T (v32qi, 32, 19) \ - T (v32qi, 32, 31) \ - T (v4df, 4, 1) \ - T (v4df, 4, 2) \ - T (v4df, 4, 3) \ - T (v8sf, 8, 1) \ - T (v8sf, 8, 5) \ - T (v8sf, 8, 7) \ - T (v16hf, 16, 1) \ - T (v16hf, 16, 6) \ - T (v16hf, 16, 15) \ + T (vnx2di, 4, 1) \ + T (vnx2di, 4, 2) \ + T (vnx2di, 4, 3) \ + T (vnx4si, 8, 1) \ + T (vnx4si, 8, 5) \ + T (vnx4si, 8, 7) \ + T (vnx8hi, 16, 1) \ + T (vnx8hi, 16, 6) \ + T (vnx8hi, 16, 15) \ + T (vnx16qi, 32, 1) \ + T (vnx16qi, 32, 19) \ + T (vnx16qi, 32, 31) \ + T (vnx2df, 4, 1) \ + T (vnx2df, 4, 2) \ + T (vnx2df, 4, 3) \ + T (vnx4sf, 8, 1) \ + T (vnx4sf, 8, 5) \ + T (vnx4sf, 8, 7) \ + T (vnx8hf, 16, 1) \ + T (vnx8hf, 16, 6) \ + T (vnx8hf, 16, 15) \ TEST_ALL (DUP_LANE) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c index b93574e50f7..047d4c59651 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c @@ -1,16 +1,16 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef int v8si __attribute__((vector_size (32))); +typedef int vnx4si __attribute__((vector_size (32))); void foo (void) { - register v8si x asm ("z0"); - register v8si y asm ("z1"); + register vnx4si x asm ("z0"); + register vnx4si y asm ("z1"); asm volatile ("" : "=w" (y)); - x = __builtin_shuffle (y, y, (v8si) { 1, 2, 3, 4, 5, 6, 7, 8 }); + x = __builtin_shuffle (y, y, (vnx4si) { 1, 2, 3, 4, 5, 6, 7, 8 }); asm volatile ("" :: "w" (x)); } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c index 1ba277ffa6d..f9cd8d2998e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define EXTRACT(ELT_TYPE, TYPE, INDEX) \ ELT_TYPE permute_##TYPE##_##INDEX (void) \ @@ -20,39 +20,39 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (int64_t, v4di, 0) \ - T (int64_t, v4di, 1) \ - T (int64_t, v4di, 2) \ - T (int64_t, v4di, 3) \ - T (int32_t, v8si, 0) \ - T (int32_t, v8si, 1) \ - T (int32_t, v8si, 3) \ - T (int32_t, v8si, 4) \ - T (int32_t, v8si, 7) \ - T (int16_t, v16hi, 0) \ - T (int16_t, v16hi, 1) \ - T (int16_t, v16hi, 7) \ - T (int16_t, v16hi, 8) \ - T (int16_t, v16hi, 15) \ - T (int8_t, v32qi, 0) \ - T (int8_t, v32qi, 1) \ - T (int8_t, v32qi, 15) \ - T (int8_t, v32qi, 16) \ - T (int8_t, v32qi, 31) \ - T (double, v4df, 0) \ - T (double, v4df, 1) \ - T (double, v4df, 2) \ - T (double, v4df, 3) \ - T (float, v8sf, 0) \ - T (float, v8sf, 1) \ - T (float, v8sf, 3) \ - T (float, v8sf, 4) \ - T (float, v8sf, 7) \ - T (_Float16, v16hf, 0) \ - T (_Float16, v16hf, 1) \ - T (_Float16, v16hf, 7) \ - T (_Float16, v16hf, 8) \ - T (_Float16, v16hf, 15) + T (int64_t, vnx2di, 0) \ + T (int64_t, vnx2di, 1) \ + T (int64_t, vnx2di, 2) \ + T (int64_t, vnx2di, 3) \ + T (int32_t, vnx4si, 0) \ + T (int32_t, vnx4si, 1) \ + T (int32_t, vnx4si, 3) \ + T (int32_t, vnx4si, 4) \ + T (int32_t, vnx4si, 7) \ + T (int16_t, vnx8hi, 0) \ + T (int16_t, vnx8hi, 1) \ + T (int16_t, vnx8hi, 7) \ + T (int16_t, vnx8hi, 8) \ + T (int16_t, vnx8hi, 15) \ + T (int8_t, vnx16qi, 0) \ + T (int8_t, vnx16qi, 1) \ + T (int8_t, vnx16qi, 15) \ + T (int8_t, vnx16qi, 16) \ + T (int8_t, vnx16qi, 31) \ + T (double, vnx2df, 0) \ + T (double, vnx2df, 1) \ + T (double, vnx2df, 2) \ + T (double, vnx2df, 3) \ + T (float, vnx4sf, 0) \ + T (float, vnx4sf, 1) \ + T (float, vnx4sf, 3) \ + T (float, vnx4sf, 4) \ + T (float, vnx4sf, 7) \ + T (_Float16, vnx8hf, 0) \ + T (_Float16, vnx8hf, 1) \ + T (_Float16, vnx8hf, 7) \ + T (_Float16, vnx8hf, 8) \ + T (_Float16, vnx8hf, 15) TEST_ALL (EXTRACT) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c index b163f28ef28..717546997b3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v8di __attribute__((vector_size (64))); -typedef int32_t v16si __attribute__((vector_size (64))); -typedef int16_t v32hi __attribute__((vector_size (64))); -typedef int8_t v64qi __attribute__((vector_size (64))); -typedef double v8df __attribute__((vector_size (64))); -typedef float v16sf __attribute__((vector_size (64))); -typedef _Float16 v32hf __attribute__((vector_size (64))); +typedef int64_t vnx4di __attribute__((vector_size (64))); +typedef int32_t vnx8si __attribute__((vector_size (64))); +typedef int16_t vnx16hi __attribute__((vector_size (64))); +typedef int8_t vnx32qi __attribute__((vector_size (64))); +typedef double vnx4df __attribute__((vector_size (64))); +typedef float vnx8sf __attribute__((vector_size (64))); +typedef _Float16 vnx16hf __attribute__((vector_size (64))); #define EXTRACT(ELT_TYPE, TYPE, INDEX) \ ELT_TYPE permute_##TYPE##_##INDEX (void) \ @@ -20,39 +20,39 @@ typedef _Float16 v32hf __attribute__((vector_size (64))); } #define TEST_ALL(T) \ - T (int64_t, v8di, 0) \ - T (int64_t, v8di, 1) \ - T (int64_t, v8di, 2) \ - T (int64_t, v8di, 7) \ - T (int32_t, v16si, 0) \ - T (int32_t, v16si, 1) \ - T (int32_t, v16si, 3) \ - T (int32_t, v16si, 4) \ - T (int32_t, v16si, 15) \ - T (int16_t, v32hi, 0) \ - T (int16_t, v32hi, 1) \ - T (int16_t, v32hi, 7) \ - T (int16_t, v32hi, 8) \ - T (int16_t, v32hi, 31) \ - T (int8_t, v64qi, 0) \ - T (int8_t, v64qi, 1) \ - T (int8_t, v64qi, 15) \ - T (int8_t, v64qi, 16) \ - T (int8_t, v64qi, 63) \ - T (double, v8df, 0) \ - T (double, v8df, 1) \ - T (double, v8df, 2) \ - T (double, v8df, 7) \ - T (float, v16sf, 0) \ - T (float, v16sf, 1) \ - T (float, v16sf, 3) \ - T (float, v16sf, 4) \ - T (float, v16sf, 15) \ - T (_Float16, v32hf, 0) \ - T (_Float16, v32hf, 1) \ - T (_Float16, v32hf, 7) \ - T (_Float16, v32hf, 8) \ - T (_Float16, v32hf, 31) + T (int64_t, vnx4di, 0) \ + T (int64_t, vnx4di, 1) \ + T (int64_t, vnx4di, 2) \ + T (int64_t, vnx4di, 7) \ + T (int32_t, vnx8si, 0) \ + T (int32_t, vnx8si, 1) \ + T (int32_t, vnx8si, 3) \ + T (int32_t, vnx8si, 4) \ + T (int32_t, vnx8si, 15) \ + T (int16_t, vnx16hi, 0) \ + T (int16_t, vnx16hi, 1) \ + T (int16_t, vnx16hi, 7) \ + T (int16_t, vnx16hi, 8) \ + T (int16_t, vnx16hi, 31) \ + T (int8_t, vnx32qi, 0) \ + T (int8_t, vnx32qi, 1) \ + T (int8_t, vnx32qi, 15) \ + T (int8_t, vnx32qi, 16) \ + T (int8_t, vnx32qi, 63) \ + T (double, vnx4df, 0) \ + T (double, vnx4df, 1) \ + T (double, vnx4df, 2) \ + T (double, vnx4df, 7) \ + T (float, vnx8sf, 0) \ + T (float, vnx8sf, 1) \ + T (float, vnx8sf, 3) \ + T (float, vnx8sf, 4) \ + T (float, vnx8sf, 15) \ + T (_Float16, vnx16hf, 0) \ + T (_Float16, vnx16hf, 1) \ + T (_Float16, vnx16hf, 7) \ + T (_Float16, vnx16hf, 8) \ + T (_Float16, vnx16hf, 31) TEST_ALL (EXTRACT) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c index 87ac2351768..19a22cdd7b7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v16di __attribute__((vector_size (128))); -typedef int32_t v32si __attribute__((vector_size (128))); -typedef int16_t v64hi __attribute__((vector_size (128))); -typedef int8_t v128qi __attribute__((vector_size (128))); -typedef double v16df __attribute__((vector_size (128))); -typedef float v32sf __attribute__((vector_size (128))); -typedef _Float16 v64hf __attribute__((vector_size (128))); +typedef int64_t vnx8di __attribute__((vector_size (128))); +typedef int32_t vnx16si __attribute__((vector_size (128))); +typedef int16_t vnx32hi __attribute__((vector_size (128))); +typedef int8_t vnx64qi __attribute__((vector_size (128))); +typedef double vnx8df __attribute__((vector_size (128))); +typedef float vnx16sf __attribute__((vector_size (128))); +typedef _Float16 vnx32hf __attribute__((vector_size (128))); #define EXTRACT(ELT_TYPE, TYPE, INDEX) \ ELT_TYPE permute_##TYPE##_##INDEX (void) \ @@ -20,60 +20,60 @@ typedef _Float16 v64hf __attribute__((vector_size (128))); } #define TEST_ALL(T) \ - T (int64_t, v16di, 0) \ - T (int64_t, v16di, 1) \ - T (int64_t, v16di, 2) \ - T (int64_t, v16di, 7) \ - T (int64_t, v16di, 8) \ - T (int64_t, v16di, 9) \ - T (int64_t, v16di, 15) \ - T (int32_t, v32si, 0) \ - T (int32_t, v32si, 1) \ - T (int32_t, v32si, 3) \ - T (int32_t, v32si, 4) \ - T (int32_t, v32si, 15) \ - T (int32_t, v32si, 16) \ - T (int32_t, v32si, 21) \ - T (int32_t, v32si, 31) \ - T (int16_t, v64hi, 0) \ - T (int16_t, v64hi, 1) \ - T (int16_t, v64hi, 7) \ - T (int16_t, v64hi, 8) \ - T (int16_t, v64hi, 31) \ - T (int16_t, v64hi, 32) \ - T (int16_t, v64hi, 47) \ - T (int16_t, v64hi, 63) \ - T (int8_t, v128qi, 0) \ - T (int8_t, v128qi, 1) \ - T (int8_t, v128qi, 15) \ - T (int8_t, v128qi, 16) \ - T (int8_t, v128qi, 63) \ - T (int8_t, v128qi, 64) \ - T (int8_t, v128qi, 100) \ - T (int8_t, v128qi, 127) \ - T (double, v16df, 0) \ - T (double, v16df, 1) \ - T (double, v16df, 2) \ - T (double, v16df, 7) \ - T (double, v16df, 8) \ - T (double, v16df, 9) \ - T (double, v16df, 15) \ - T (float, v32sf, 0) \ - T (float, v32sf, 1) \ - T (float, v32sf, 3) \ - T (float, v32sf, 4) \ - T (float, v32sf, 15) \ - T (float, v32sf, 16) \ - T (float, v32sf, 21) \ - T (float, v32sf, 31) \ - T (_Float16, v64hf, 0) \ - T (_Float16, v64hf, 1) \ - T (_Float16, v64hf, 7) \ - T (_Float16, v64hf, 8) \ - T (_Float16, v64hf, 31) \ - T (_Float16, v64hf, 32) \ - T (_Float16, v64hf, 47) \ - T (_Float16, v64hf, 63) + T (int64_t, vnx8di, 0) \ + T (int64_t, vnx8di, 1) \ + T (int64_t, vnx8di, 2) \ + T (int64_t, vnx8di, 7) \ + T (int64_t, vnx8di, 8) \ + T (int64_t, vnx8di, 9) \ + T (int64_t, vnx8di, 15) \ + T (int32_t, vnx16si, 0) \ + T (int32_t, vnx16si, 1) \ + T (int32_t, vnx16si, 3) \ + T (int32_t, vnx16si, 4) \ + T (int32_t, vnx16si, 15) \ + T (int32_t, vnx16si, 16) \ + T (int32_t, vnx16si, 21) \ + T (int32_t, vnx16si, 31) \ + T (int16_t, vnx32hi, 0) \ + T (int16_t, vnx32hi, 1) \ + T (int16_t, vnx32hi, 7) \ + T (int16_t, vnx32hi, 8) \ + T (int16_t, vnx32hi, 31) \ + T (int16_t, vnx32hi, 32) \ + T (int16_t, vnx32hi, 47) \ + T (int16_t, vnx32hi, 63) \ + T (int8_t, vnx64qi, 0) \ + T (int8_t, vnx64qi, 1) \ + T (int8_t, vnx64qi, 15) \ + T (int8_t, vnx64qi, 16) \ + T (int8_t, vnx64qi, 63) \ + T (int8_t, vnx64qi, 64) \ + T (int8_t, vnx64qi, 100) \ + T (int8_t, vnx64qi, 127) \ + T (double, vnx8df, 0) \ + T (double, vnx8df, 1) \ + T (double, vnx8df, 2) \ + T (double, vnx8df, 7) \ + T (double, vnx8df, 8) \ + T (double, vnx8df, 9) \ + T (double, vnx8df, 15) \ + T (float, vnx16sf, 0) \ + T (float, vnx16sf, 1) \ + T (float, vnx16sf, 3) \ + T (float, vnx16sf, 4) \ + T (float, vnx16sf, 15) \ + T (float, vnx16sf, 16) \ + T (float, vnx16sf, 21) \ + T (float, vnx16sf, 31) \ + T (_Float16, vnx32hf, 0) \ + T (_Float16, vnx32hf, 1) \ + T (_Float16, vnx32hf, 7) \ + T (_Float16, vnx32hf, 8) \ + T (_Float16, vnx32hf, 31) \ + T (_Float16, vnx32hf, 32) \ + T (_Float16, vnx32hf, 47) \ + T (_Float16, vnx32hf, 63) TEST_ALL (EXTRACT) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c index b193726ea0a..5934b2dfb12 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vdiv_##TYPE (TYPE *x, TYPE y) \ @@ -29,9 +29,9 @@ void vdivr_##TYPE (TYPE *x, TYPE y) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c index 2b1dbb087bc..7b1575f9ee4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c index d5e4df266bf..381af4c8517 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c index c3f2c8a5823..744d0bb7bcc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c index 30e1895c8d5..e1251bd9cf6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c index 84a95187314..238bd852117 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfnmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfnmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c index dcc4811f1d8..f258a7454da 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfnmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfnmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c index 7a89399f4be..4d859d4b0a1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfnmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfnmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c index 6c95b0abc8e..2510a6f2831 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c @@ -1,9 +1,9 @@ /* { dg-do assemble } */ /* { dg-options " -O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef _Float16 v16hf __attribute__((vector_size(32))); -typedef float v8sf __attribute__((vector_size(32))); -typedef double v4df __attribute__((vector_size(32))); +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef float vnx4sf __attribute__((vector_size(32))); +typedef double vnx2df __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -20,9 +20,9 @@ void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v16hf) -DO_OP (v8sf) -DO_OP (v4df) +DO_OP (vnx8hf) +DO_OP (vnx4sf) +DO_OP (vnx2df) /* { dg-final { scan-assembler-times {\tfnmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfnmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c index 096a969d756..6ed5c06bd51 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c @@ -1,72 +1,32 @@ /* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ - -void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, unsigned long * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, unsigned int * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, unsigned short * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, unsigned char * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load64s(signed long * restrict dst, signed long * restrict src, unsigned long * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load32s(signed int * restrict dst, signed int * restrict src, unsigned int * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load16s(signed short * restrict dst, signed short * restrict src, unsigned short * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load8s(signed char * restrict dst, signed char * restrict src, unsigned char * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load_double(double * restrict dst, double * restrict src, unsigned long * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load_float(float * restrict dst, float * restrict src, unsigned int * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */ -/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 3 } } */ -/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */ -/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +/* Invoked 18 times for each data size. */ +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, int n) \ + { \ + for (int i = 9; i < n; ++i) \ + dest[i] += src[indices[i]]; \ + } + +#define TEST_ALL(T) \ + T (int32_t, 32) \ + T (uint32_t, 32) \ + T (float, 32) \ + T (int64_t, 64) \ + T (uint64_t, 64) \ + T (double, 64) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c deleted file mode 100644 index b31b4508114..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c +++ /dev/null @@ -1,72 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ - -void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, signed long * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, signed int * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, signed short * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, signed char * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load64s(signed long * restrict dst, signed long * restrict src, signed long * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load32s(signed int * restrict dst, signed int * restrict src, signed int * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load16s(signed short * restrict dst, signed short * restrict src, signed short * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load8s(signed char * restrict dst, signed char * restrict src, signed char * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load_double(double * restrict dst, double * restrict src, signed long * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -void gather_load_float(float * restrict dst, float * restrict src, signed int * restrict indices, int count) -{ - for (int i=0; i<count; i++) - dst[i] = src[indices[i]]; -} - -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */ -/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 3 } } */ -/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */ -/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c deleted file mode 100644 index d8a85396eb4..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c +++ /dev/null @@ -1,14 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ - -void -f (double *restrict a, double *restrict b, short *c, int *d, int n) -{ - for (int i = 0; i < n; i++) - a[i] = b[c[i] + d[i]]; -} - -/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+.h,} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+.s,} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d,} 4 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+.d,} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c index 9b62b12904e..4e348db3bf1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c @@ -1,72 +1,10 @@ /* { dg-do assemble } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -void gather_loadu64_s16(unsigned long * restrict dst, unsigned long * restrict src, - short int * restrict indices, short n) -{ - for (short i=0; i<n; i++) - dst[i] = src[indices[i]]; -} +#define INDEX32 uint32_t +#define INDEX64 uint64_t -void gather_loadu64_u16(unsigned long * restrict dst, unsigned long * restrict src, - unsigned short int * restrict indices, short n) -{ - for (short i=0; i<n; i++) - dst[i] = src[indices[i]]; -} +#include "sve_gather_load_1.c" -void gather_loadd_s16(double * restrict dst, double * restrict src, - short * restrict indices, short n) -{ - for (short i=0; i<n; i++) - dst[i] = src[indices[i]]; -} - -void gather_loadd_u16(double * restrict dst, double * restrict src, - unsigned short * restrict indices, short n) -{ - for (short i=0; i<n; i++) - dst[i] = src[indices[i]]; -} - -void gather_loadu64_s32(unsigned long * restrict dst, unsigned long * restrict src, - int * restrict indices, int n) -{ - for (int i=0; i<n; i++) - dst[i] = src[indices[i]]; -} - -void gather_loadu64_u32(unsigned long * restrict dst, unsigned long * restrict src, - unsigned int * restrict indices, int n) -{ - for (int i=0; i<n; i++) - dst[i] = src[indices[i]]; -} - -void gather_loadd_s32(double * restrict dst, double * restrict src, - int * restrict indices, int n) -{ - for (int i=0; i<n; i++) - dst[i] = src[indices[i]]; -} - -void gather_loadd_u32(double * restrict dst, double * restrict src, - unsigned int * restrict indices, int n) -{ - for (int i=0; i<n; i++) - dst[i] = src[indices[i]]; -} - -/* At present we only use unpacks for the 32/64 combinations. */ -/* { dg-final { scan-assembler-times {\tpunpklo\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */ -/* { dg-final { scan-assembler-times {\tpunpkhi\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */ - -/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ -/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ -/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ -/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ -/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c index 0a8f802ce56..a113a0faeb9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c @@ -1,45 +1,32 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ - DATA_TYPE __attribute__ ((noinline)) \ - NAME (char *data, INDEX_TYPE *indices, int n) \ - { \ - DATA_TYPE sum = 0; \ - for (int i = 0; i < n; ++i) \ - sum += *(DATA_TYPE *) (data + indices[i]); \ - return sum; \ - } +#include <stdint.h> -#define TEST32(NAME, DATA_TYPE) \ - TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \ - TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \ - TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \ - TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \ - TEST_LOOP (NAME ## _s16, DATA_TYPE, signed short) \ - TEST_LOOP (NAME ## _s32, DATA_TYPE, signed int) +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif -#define TEST64(NAME, DATA_TYPE) \ - TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \ - TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \ - TEST_LOOP (NAME ## _s16, DATA_TYPE, short) \ - TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \ - TEST_LOOP (NAME ## _s32, DATA_TYPE, int) \ - TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \ - TEST_LOOP (NAME ## _s64, DATA_TYPE, long) \ - TEST_LOOP (NAME ## _u64, DATA_TYPE, unsigned long) +/* Invoked 18 times for each data size. */ +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, int n) \ + { \ + for (int i = 9; i < n; ++i) \ + dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]); \ + } -TEST32 (f_s32, int) -TEST32 (f_u32, unsigned int) -TEST32 (f_f32, float) +#define TEST_ALL(T) \ + T (int32_t, 32) \ + T (uint32_t, 32) \ + T (float, 32) \ + T (int64_t, 64) \ + T (uint64_t, 64) \ + T (double, 64) -TEST64 (f_s64, long) -TEST64 (f_u64, unsigned long) -TEST64 (f_f64, double) +TEST_ALL (TEST_LOOP) -/* (4 + 2 + 1) * 3 */ -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]} 21 } } */ -/* (4 + 2 + 1) * 3 */ -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 21 } } */ -/* (8 + 8 + 4 + 4 + 2 + 2 + 1 + 1) * 3 */ -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 90 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c deleted file mode 100644 index baa90d5d5fc..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c +++ /dev/null @@ -1,41 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ - -#include "sve_gather_load_3.c" - -extern void abort (void); - -#define N 57 - -#undef TEST_LOOP -#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ - { \ - INDEX_TYPE indices[N]; \ - DATA_TYPE data[N * 2]; \ - for (int i = 0; i < N * 2; ++i) \ - data[i] = (i / 2) * 4 + i % 2; \ - DATA_TYPE sum = 0; \ - for (int i = 0; i < N; ++i) \ - { \ - INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \ - j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \ - sum += data[j / sizeof (DATA_TYPE)]; \ - indices[i] = j; \ - } \ - DATA_TYPE res = NAME ((char *) data, indices, N); \ - if (res != sum) \ - abort (); \ - } - -int __attribute__ ((optimize (1))) -main () -{ - TEST32 (f_s32, int) - TEST32 (f_u32, unsigned int) - TEST32 (f_f32, float) - - TEST64 (f_s64, long) - TEST64 (f_u64, unsigned long) - TEST64 (f_f64, double) - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c index 4d0da987d30..5382e523689 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c @@ -1,18 +1,10 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TEST_LOOP(NAME, TYPE) \ - TYPE __attribute__ ((noinline)) \ - NAME (TYPE **indices, int n) \ - { \ - TYPE sum = 0; \ - for (int i = 0; i < n; ++i) \ - sum += *indices[i]; \ - return sum; \ - } +#define INDEX32 uint32_t +#define INDEX64 uint64_t -TEST_LOOP (f_s64, long) -TEST_LOOP (f_u64, unsigned long) -TEST_LOOP (f_f64, double) +#include "sve_gather_load_3.c" -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c deleted file mode 100644 index 00d3dea6acd..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c +++ /dev/null @@ -1,35 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ - -#include "sve_gather_load_4.c" - -extern void abort (void); - -#define N 57 - -#undef TEST_LOOP -#define TEST_LOOP(NAME, TYPE) \ - { \ - TYPE *ptrs[N]; \ - TYPE data[N * 2]; \ - for (int i = 0; i < N * 2; ++i) \ - data[i] = (i / 2) * 4 + i % 2; \ - TYPE sum = 0; \ - for (int i = 0; i < N; ++i) \ - { \ - ptrs[i] = &data[i * 3 / 2]; \ - sum += *ptrs[i]; \ - } \ - TYPE res = NAME (ptrs, N); \ - if (res != sum) \ - abort (); \ - } - -int __attribute__ ((optimize (1))) -main () -{ - TEST_LOOP (f_s64, long) - TEST_LOOP (f_u64, unsigned long) - TEST_LOOP (f_f64, double) - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c index 0aaf9553a11..8e4f689243b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c @@ -1,113 +1,23 @@ /* { dg-do assemble } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE)\ -void gather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - dst[i] = src[i * STRIDE];\ -} - -#define GATHER_LOAD2(OBJTYPE,STRIDETYPE)\ -void gather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - dst[i] = src[i * stride];\ -} - -#define GATHER_LOAD3(OBJTYPE,STRIDETYPE)\ -void gather_load3s5##OBJTYPE##STRIDETYPE\ - (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\ - OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\ - STRIDETYPE count)\ -{\ - const STRIDETYPE STRIDE = 5;\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - d1[i] = src[0 + (i * STRIDE)];\ - d2[i] = src[1 + (i * STRIDE)];\ - d3[i] = src[2 + (i * STRIDE)];\ - d4[i] = src[3 + (i * STRIDE)];\ - d5[i] = src[4 + (i * STRIDE)];\ - }\ -} - -#define GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE)\ -void gather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - *dst = *src;\ - dst += 1;\ - src += STRIDE;\ - }\ -} - -#define GATHER_LOAD5(OBJTYPE,STRIDETYPE)\ -void gather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - *dst = *src;\ - dst += 1;\ - src += stride;\ - }\ -} - -GATHER_LOAD1 (double, long, 5) -GATHER_LOAD1 (double, long, 8) -GATHER_LOAD1 (double, long, 21) -GATHER_LOAD1 (double, long, 1009) - -GATHER_LOAD1 (float, int, 5) -GATHER_LOAD1 (float, int, 8) -GATHER_LOAD1 (float, int, 21) -GATHER_LOAD1 (float, int, 1009) - -GATHER_LOAD2 (double, long) -GATHER_LOAD2 (float, int) - -GATHER_LOAD3 (double, long) -GATHER_LOAD3 (float, int) - -GATHER_LOAD4 (double, long, 5) - -/* NOTE: We can't vectorize GATHER_LOAD4 (float, int, 5) because we can't prove - that the offsets used for the gather load won't overflow. */ - -GATHER_LOAD5 (double, long) -GATHER_LOAD5 (float, int) - -/* Widened forms. */ -GATHER_LOAD1 (double, int, 5) -GATHER_LOAD1 (double, int, 8) -GATHER_LOAD1 (double, short, 5) -GATHER_LOAD1 (double, short, 8) - -GATHER_LOAD1 (float, short, 5) -GATHER_LOAD1 (float, short, 8) - -GATHER_LOAD2 (double, int) -GATHER_LOAD2 (float, short) - -GATHER_LOAD4 (double, int, 5) -GATHER_LOAD4 (float, short, 5) - -GATHER_LOAD5 (double, int) - -/* TODO: We generate abysmal code for this even though we don't use gathers. */ -/*GATHER_LOAD5 (float, short)*/ - -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ +#include <stdint.h> + +/* Invoked 18 times for each data size. */ +#define TEST_LOOP(DATA_TYPE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict *src, \ + int n) \ + { \ + for (int i = 9; i < n; ++i) \ + dest[i] += *src[i]; \ + } + +#define TEST_ALL(T) \ + T (int64_t) \ + T (uint64_t) \ + T (double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c deleted file mode 100644 index 7608f9b569b..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c +++ /dev/null @@ -1,161 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <unistd.h> - -extern void abort (void); -extern void *memset(void *, int, size_t); - -#include "sve_gather_load_5.c" - -#define NUM_DST_ELEMS 13 -#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE) - -#define TEST_GATHER_LOAD_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ - memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - src[i * STRIDE] = i;\ - FUN##OBJTYPE##STRIDETYPE##STRIDE \ - (dst, src, NUM_DST_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - if (dst[i] != i)\ - abort ();\ -} - -#define TEST_GATHER_LOAD_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ - memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - src[i * STRIDE] = i;\ - FUN##OBJTYPE##STRIDETYPE \ - (dst, src, STRIDE, NUM_DST_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - if (dst[i] != i)\ - abort ();\ -} - -#define TEST_GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_GATHER_LOAD_COMMON1 (gather_load1, OBJTYPE, STRIDETYPE, STRIDE) - -#define TEST_GATHER_LOAD2(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_GATHER_LOAD_COMMON2 (gather_load2, OBJTYPE, STRIDETYPE, STRIDE) - -#define TEST_GATHER_LOAD3(OBJTYPE,STRIDETYPE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\ - memset (real_dst1, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ - memset (real_dst2, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ - memset (real_dst3, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ - memset (real_dst4, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ - memset (real_dst5, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst1 = &real_dst1[1];\ - OBJTYPE *dst2 = &real_dst2[1];\ - OBJTYPE *dst3 = &real_dst3[1];\ - OBJTYPE *dst4 = &real_dst4[1];\ - OBJTYPE *dst5 = &real_dst5[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\ - src[i] = i;\ - gather_load3s5##OBJTYPE##STRIDETYPE \ - (dst1, dst2, dst3, dst4, dst5, src, NUM_DST_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - {\ - STRIDETYPE base = i * 5;\ - if (dst1[i] != base)\ - abort ();\ - if (dst2[i] != (base + 1))\ - abort ();\ - if (dst3[i] != (base + 2))\ - abort ();\ - if (dst4[i] != (base + 3))\ - abort ();\ - if (dst5[i] != (base + 4))\ - abort ();\ - }\ -} - -#define TEST_GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_GATHER_LOAD_COMMON1 (gather_load4, OBJTYPE, STRIDETYPE, STRIDE) - -#define TEST_GATHER_LOAD5(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_GATHER_LOAD_COMMON2 (gather_load5, OBJTYPE, STRIDETYPE, STRIDE) - -int __attribute__ ((optimize (1))) -main () -{ - TEST_GATHER_LOAD1 (double, long, 5); - TEST_GATHER_LOAD1 (double, long, 8); - TEST_GATHER_LOAD1 (double, long, 21); - - TEST_GATHER_LOAD1 (float, int, 5); - TEST_GATHER_LOAD1 (float, int, 8); - TEST_GATHER_LOAD1 (float, int, 21); - - TEST_GATHER_LOAD2 (double, long, 5); - TEST_GATHER_LOAD2 (double, long, 8); - TEST_GATHER_LOAD2 (double, long, 21); - - TEST_GATHER_LOAD2 (float, int, 5); - TEST_GATHER_LOAD2 (float, int, 8); - TEST_GATHER_LOAD2 (float, int, 21); - - TEST_GATHER_LOAD3 (double, long); - TEST_GATHER_LOAD3 (float, int); - - TEST_GATHER_LOAD4 (double, long, 5); - - TEST_GATHER_LOAD5 (double, long, 5); - TEST_GATHER_LOAD5 (float, int, 5); - - /* Widened forms. */ - TEST_GATHER_LOAD1 (double, int, 5) - TEST_GATHER_LOAD1 (double, int, 8) - TEST_GATHER_LOAD1 (double, short, 5) - TEST_GATHER_LOAD1 (double, short, 8) - - TEST_GATHER_LOAD1 (float, short, 5) - TEST_GATHER_LOAD1 (float, short, 8) - - TEST_GATHER_LOAD2 (double, int, 5); - TEST_GATHER_LOAD2 (double, int, 8); - TEST_GATHER_LOAD2 (double, int, 21); - - TEST_GATHER_LOAD2 (float, short, 5); - TEST_GATHER_LOAD2 (float, short, 8); - TEST_GATHER_LOAD2 (float, short, 21); - - TEST_GATHER_LOAD4 (double, int, 5); - TEST_GATHER_LOAD4 (float, short, 5); - - TEST_GATHER_LOAD5 (double, int, 5); - - return 0; -} - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c index 68b0b4d59b6..745e00f1e50 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c @@ -1,14 +1,36 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ - -void -foo (double *__restrict y, double *__restrict x1, - double *__restrict x2, int m) -{ - for (int i = 0; i < 256; ++i) - y[i * m] = x1[i * m] + x2[i * m]; -} - -/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, x[0-9]+} 1 } } */ -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */ -/* { dg-final { scan-assembler-not {\torr\tz[0-9]+} } } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fwrapv -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX16 int16_t +#define INDEX32 int32_t +#endif + +/* Invoked 18 times for each data size. */ +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, INDEX##BITS mask, int n) \ + { \ + for (int i = 9; i < n; ++i) \ + dest[i] = src[(INDEX##BITS) (indices[i] | mask)]; \ + } + +#define TEST_ALL(T) \ + T (int32_t, 16) \ + T (uint32_t, 16) \ + T (float, 16) \ + T (int64_t, 32) \ + T (uint64_t, 32) \ + T (double, 32) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c index 788aeb08df2..8f2dfb75149 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c @@ -1,15 +1,15 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -void -foo (double *x, int m) -{ - for (int i = 0; i < 256; ++i) - x[i * m] += x[i * m]; -} +#define INDEX16 uint16_t +#define INDEX32 uint32_t -/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */ -/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ +#include "sve_gather_load_6.c" + +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* Either extension type is OK here. */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c deleted file mode 100644 index 0c0cf73be55..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c +++ /dev/null @@ -1,19 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ - -void -f (int *__restrict a, - int *__restrict b, - int *__restrict c, - int count) -{ - for (int i = 0; i < count; ++i) - a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 2] - + c[i * 5] + c[i * 5 + 3]); -} - -/* There must be a final scalar iteration because b[(count - 1) * 4 + 3] - is not accessed by the original code. */ -/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */ -/* { dg-final { scan-assembler {\tldr\t} } } */ -/* { dg-final { scan-assembler {\tstr\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c deleted file mode 100644 index dad798c8106..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c +++ /dev/null @@ -1,18 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ - -void -f (int *__restrict a, - int *__restrict b, - int *__restrict c, - int count) -{ - for (int i = 0; i < count; ++i) - a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 3] - + c[i * 5] + c[i * 5 + 3]); -} - -/* There's no need for a scalar tail here. */ -/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */ -/* { dg-final { scan-assembler-not {\tldr\t} } } */ -/* { dg-final { scan-assembler-not {\tstr\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c new file mode 100644 index 00000000000..9c4bb37f04e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ + +#define SIZE (15 * 8 + 3) + +#define DEF_INDEX_OFFSET(SIGNED, TYPE, ITERTYPE) \ +void __attribute__ ((noinline, noclone)) \ +set_##SIGNED##_##TYPE##_##ITERTYPE (SIGNED TYPE *restrict out, \ + SIGNED TYPE *restrict in) \ +{ \ + SIGNED ITERTYPE i; \ + for (i = 0; i < SIZE; i++) \ + { \ + out[i] = in[i]; \ + } \ +} \ +void __attribute__ ((noinline, noclone)) \ +set_##SIGNED##_##TYPE##_##ITERTYPE##_var (SIGNED TYPE *restrict out, \ + SIGNED TYPE *restrict in, \ + SIGNED ITERTYPE n) \ +{ \ + SIGNED ITERTYPE i; \ + for (i = 0; i < n; i++) \ + { \ + out[i] = in[i]; \ + } \ +} + +#define TEST_TYPE(T, SIGNED, TYPE) \ + T (SIGNED, TYPE, char) \ + T (SIGNED, TYPE, short) \ + T (SIGNED, TYPE, int) \ + T (SIGNED, TYPE, long) + +#define TEST_ALL(T) \ + TEST_TYPE (T, signed, long) \ + TEST_TYPE (T, unsigned, long) \ + TEST_TYPE (T, signed, int) \ + TEST_TYPE (T, unsigned, int) \ + TEST_TYPE (T, signed, short) \ + TEST_TYPE (T, unsigned, short) \ + TEST_TYPE (T, signed, char) \ + TEST_TYPE (T, unsigned, char) + +TEST_ALL (DEF_INDEX_OFFSET) + +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c new file mode 100644 index 00000000000..276d259ac3f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_index_offset_1_run.c @@ -0,0 +1,34 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_index_offset_1.c" + +#define TEST_INDEX_OFFSET(SIGNED, TYPE, ITERTYPE) \ +{ \ + SIGNED TYPE out[SIZE + 1]; \ + SIGNED TYPE in1[SIZE + 1]; \ + SIGNED TYPE in2[SIZE + 1]; \ + for (int i = 0; i < SIZE + 1; ++i) \ + { \ + in1[i] = (i * 4) ^ i; \ + in2[i] = (i * 2) ^ i; \ + asm volatile ("" ::: "memory"); \ + } \ + out[SIZE] = 42; \ + set_##SIGNED##_##TYPE##_##ITERTYPE (out, in1); \ + if (0 != __builtin_memcmp (out, in1, SIZE * sizeof (TYPE))) \ + __builtin_abort (); \ + set_##SIGNED##_##TYPE##_##ITERTYPE##_var (out, in2, SIZE); \ + if (0 != __builtin_memcmp (out, in2, SIZE * sizeof (TYPE))) \ + __builtin_abort (); \ + if (out[SIZE] != 42) \ + __builtin_abort (); \ +} + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (TEST_INDEX_OFFSET); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c deleted file mode 100644 index 949449cde9f..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c +++ /dev/null @@ -1,49 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" } */ - -#define SIZE 15*8+3 - -#define INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE) \ -void set_##SIGNED##_##TYPE##_##ITERTYPE (SIGNED TYPE *__restrict out, \ - SIGNED TYPE *__restrict in) \ -{ \ - SIGNED ITERTYPE i; \ - for (i = 0; i < SIZE; i++) \ - { \ - out[i] = in[i]; \ - } \ -} \ -void set_##SIGNED##_##TYPE##_##ITERTYPE##_var (SIGNED TYPE *__restrict out, \ - SIGNED TYPE *__restrict in, \ - SIGNED ITERTYPE n) \ -{\ - SIGNED ITERTYPE i;\ - for (i = 0; i < n; i++)\ - {\ - out[i] = in[i];\ - }\ -} - -#define INDEX_OFFSET_TEST(SIGNED, TYPE)\ - INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \ - INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \ - INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \ - INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long) - -INDEX_OFFSET_TEST (signed, long) -INDEX_OFFSET_TEST (unsigned, long) -INDEX_OFFSET_TEST (signed, int) -INDEX_OFFSET_TEST (unsigned, int) -INDEX_OFFSET_TEST (signed, short) -INDEX_OFFSET_TEST (unsigned, short) -INDEX_OFFSET_TEST (signed, char) -INDEX_OFFSET_TEST (unsigned, char) - -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */ -/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */ -/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */ -/* { dg-final { scan-assembler-times "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */ -/* { dg-final { scan-assembler-times "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */ -/* { dg-final { scan-assembler-times "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */ -/* { dg-final { scan-assembler-times "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c deleted file mode 100644 index d6b2646798c..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c +++ /dev/null @@ -1,48 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve" } */ -/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ - -#include "sve_indexoffset_1.c" - -#include <string.h> - -#define CALL_INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE)\ -{\ - SIGNED TYPE out[SIZE + 1];\ - SIGNED TYPE in1[SIZE + 1];\ - SIGNED TYPE in2[SIZE + 1];\ - for (int i = 0; i < SIZE + 1; ++i)\ - {\ - in1[i] = (i * 4) ^ i;\ - in2[i] = (i * 2) ^ i;\ - }\ - out[SIZE] = 42;\ - set_##SIGNED##_##TYPE##_##ITERTYPE (out, in1); \ - if (0 != memcmp (out, in1, SIZE * sizeof (TYPE)))\ - return 1;\ - set_##SIGNED##_##TYPE##_##ITERTYPE##_var (out, in2, SIZE); \ - if (0 != memcmp (out, in2, SIZE * sizeof (TYPE)))\ - return 1;\ - if (out[SIZE] != 42)\ - return 1;\ -} - -#define CALL_INDEX_OFFSET_TEST(SIGNED, TYPE)\ - CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \ - CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \ - CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \ - CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long) - -int -main (void) -{ - CALL_INDEX_OFFSET_TEST (signed, long) - CALL_INDEX_OFFSET_TEST (unsigned, long) - CALL_INDEX_OFFSET_TEST (signed, int) - CALL_INDEX_OFFSET_TEST (unsigned, int) - CALL_INDEX_OFFSET_TEST (signed, short) - CALL_INDEX_OFFSET_TEST (unsigned, short) - CALL_INDEX_OFFSET_TEST (signed, char) - CALL_INDEX_OFFSET_TEST (unsigned, char) - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C deleted file mode 100644 index 4c196684364..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C +++ /dev/null @@ -1,56 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ - -#include <stdint.h> - -#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) - -#define DEF_LOAD_BROADCAST(TYPE)\ -void set_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\ -{\ - for (int i = 0; i < NUM_ELEMS (TYPE); i++)\ - a[i] = *b;\ -}\ - -#define DEF_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\ -void set_##TYPE##SUFFIX (TYPE *__restrict__ a)\ -{\ - for (int i = 0; i < NUM_ELEMS (TYPE); i++)\ - a[i] = IMM;\ -}\ - -/* --- VALID --- */ - -DEF_LOAD_BROADCAST (int8_t) -DEF_LOAD_BROADCAST (int16_t) -DEF_LOAD_BROADCAST (int32_t) -DEF_LOAD_BROADCAST (int64_t) - -DEF_LOAD_BROADCAST_IMM (int16_t, 129, imm_129) -DEF_LOAD_BROADCAST_IMM (int32_t, 129, imm_129) -DEF_LOAD_BROADCAST_IMM (int64_t, 129, imm_129) - -DEF_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130) -DEF_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130) -DEF_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130) - -DEF_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234) -DEF_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234) -DEF_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234) - -DEF_LOAD_BROADCAST_IMM (int16_t, 0xFEDC, imm_0xFEDC) -DEF_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC) -DEF_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC) - -DEF_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678) -DEF_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678) - -DEF_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678) -DEF_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678) - -DEF_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765), imm_FEBA716B12371765) - -/* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */ -/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */ -/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */ -/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C deleted file mode 100644 index 8e954f3e32c..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C +++ /dev/null @@ -1,64 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ - -#include "sve_ld1r_1.C" - -#include <stdlib.h> -#include <stdio.h> - -#define TEST_LOAD_BROADCAST(TYPE,IMM)\ - {\ - TYPE v[NUM_ELEMS (TYPE)];\ - TYPE temp = 0;\ - set_##TYPE (v, IMM);\ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ - temp += v[i];\ - result += temp;\ - }\ - -#define TEST_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\ - {\ - TYPE v[NUM_ELEMS (TYPE)];\ - TYPE temp = 0;\ - set_##TYPE##SUFFIX (v);\ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ - temp += v[i];\ - result += temp;\ - }\ - -int main (int argc, char **argv) -{ - long long int result = 0; - - TEST_LOAD_BROADCAST_IMM (int16_t, 129, imm_129) - TEST_LOAD_BROADCAST_IMM (int32_t, 129, imm_129) - TEST_LOAD_BROADCAST_IMM (int64_t, 129, imm_129) - - TEST_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130) - TEST_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130) - TEST_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130) - - TEST_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234) - TEST_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234) - TEST_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234) - - TEST_LOAD_BROADCAST_IMM (int16_t, int16_t (0xFEDC), imm_0xFEDC) - TEST_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC) - TEST_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC) - - TEST_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678) - TEST_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678) - - TEST_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678) - TEST_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678) - - TEST_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765), - imm_FEBA716B12371765) - - if (result != int64_t (6717319005707226880)) - { - fprintf (stderr, "result = %lld\n", result); - abort (); - } - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c new file mode 100644 index 00000000000..89d5f4289de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) + +#define DEF_LOAD_BROADCAST(TYPE) \ + void __attribute__ ((noinline, noclone)) \ + set_##TYPE (TYPE *restrict a, TYPE *restrict b) \ + { \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + a[i] = *b; \ + } + +#define DEF_LOAD_BROADCAST_IMM(TYPE, IMM, SUFFIX) \ + void __attribute__ ((noinline, noclone)) \ + set_##TYPE##_##SUFFIX (TYPE *a) \ + { \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + a[i] = IMM; \ + } + +#define FOR_EACH_LOAD_BROADCAST(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) + +#define FOR_EACH_LOAD_BROADCAST_IMM(T) \ + T (int16_t, 129, imm_129) \ + T (int32_t, 129, imm_129) \ + T (int64_t, 129, imm_129) \ + \ + T (int16_t, -130, imm_m130) \ + T (int32_t, -130, imm_m130) \ + T (int64_t, -130, imm_m130) \ + \ + T (int16_t, 0x1234, imm_0x1234) \ + T (int32_t, 0x1234, imm_0x1234) \ + T (int64_t, 0x1234, imm_0x1234) \ + \ + T (int16_t, 0xFEDC, imm_0xFEDC) \ + T (int32_t, 0xFEDC, imm_0xFEDC) \ + T (int64_t, 0xFEDC, imm_0xFEDC) \ + \ + T (int32_t, 0x12345678, imm_0x12345678) \ + T (int64_t, 0x12345678, imm_0x12345678) \ + \ + T (int32_t, 0xF2345678, imm_0xF2345678) \ + T (int64_t, 0xF2345678, imm_0xF2345678) \ + \ + T (int64_t, (int64_t) 0xFEBA716B12371765, imm_FEBA716B12371765) + +FOR_EACH_LOAD_BROADCAST (DEF_LOAD_BROADCAST) +FOR_EACH_LOAD_BROADCAST_IMM (DEF_LOAD_BROADCAST_IMM) + +/* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */ +/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c new file mode 100644 index 00000000000..510b2eca517 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2_run.c @@ -0,0 +1,38 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ + +#include "sve_ld1r_2.c" + +#define TEST_LOAD_BROADCAST(TYPE) \ + { \ + TYPE v[NUM_ELEMS (TYPE)]; \ + TYPE val = 99; \ + set_##TYPE (v, &val); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + { \ + if (v[i] != (TYPE) 99) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +#define TEST_LOAD_BROADCAST_IMM(TYPE, IMM, SUFFIX) \ + { \ + TYPE v[NUM_ELEMS (TYPE)]; \ + set_##TYPE##_##SUFFIX (v); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ + { \ + if (v[i] != (TYPE) IMM) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (int argc, char **argv) +{ + FOR_EACH_LOAD_BROADCAST (TEST_LOAD_BROADCAST) + FOR_EACH_LOAD_BROADCAST_IMM (TEST_LOAD_BROADCAST_IMM) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c index 2d92708fbd2..407d1277c50 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_live_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c @@ -1,19 +1,41 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -int -liveloop (int start, int n, int *x) -{ - int i = start; - int j; +#include <stdint.h> - for (j = 0; j < n; ++j) - { - i += 1; - x[j] = i; - } - return i; -} +#define EXTRACT_LAST(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + test_##TYPE (TYPE *x, int n, TYPE value) \ + { \ + TYPE last; \ + for (int j = 0; j < n; ++j) \ + { \ + last = x[j]; \ + x[j] = last * value; \ + } \ + return last; \ + } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Using a fully-masked loop" 1 "vect" } } */ +#define TEST_ALL(T) \ + T (uint8_t) \ + T (uint16_t) \ + T (uint32_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_ALL (EXTRACT_LAST) + +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].b, } 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].h, } 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].s, } 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7].d, } 4 } } */ + +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c index 99f0be353aa..2a1f6df4788 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c @@ -1,29 +1,35 @@ /* { dg-do run { target { aarch64_sve_hw } } } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ -extern void abort(void); -#include <string.h> - #include "sve_live_1.c" -#define MAX 62 -#define START 27 - -int main (void) +#define N 107 +#define OP 70 + +#define TEST_LOOP(TYPE) \ + { \ + TYPE a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + (i % 3); \ + asm volatile ("" ::: "memory"); \ + } \ + TYPE expected = a[N - 1]; \ + TYPE res = test_##TYPE (a, N, OP); \ + if (res != expected) \ + __builtin_abort (); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE old = i * 2 + (i % 3); \ + if (a[i] != (TYPE) (old * (TYPE) OP)) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) { - int a[MAX]; - int i; - - memset (a, 0, MAX*sizeof (int)); - - int ret = liveloop (START, MAX, a); - - if (ret != 89) - abort (); - - for (i=0; i<MAX; i++) - { - if (a[i] != i+START+1) - abort (); - } -}
\ No newline at end of file + TEST_ALL (TEST_LOOP); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2.c deleted file mode 100644 index 06d95fa8ea6..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_live_2.c +++ /dev/null @@ -1,19 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ - -int -liveloop (int start, int n, int * __restrict__ x, char * __restrict__ y) -{ - int i = start; - int j; - - for (j = 0; j < n; ++j) - { - i += 1; - x[j] = y[j] + 1; - } - return i; -} - -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Can't use a fully-masked loop because ncopies is greater than 1" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c deleted file mode 100644 index e7924e020cb..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c +++ /dev/null @@ -1,32 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -extern void abort(void); -#include <string.h> -#include <stdio.h> - -#include "sve_live_2.c" - -#define MAX 193 -#define START 84 - -int main (void) -{ - int a[MAX]; - char b[MAX]; - int i; - - memset (a, 0, MAX*sizeof (int)); - memset (b, 23, MAX*sizeof (char)); - - int ret = liveloop (START, MAX, a, b); - - if (ret != 277) - abort (); - - for (i=0; i<MAX; i++) - { - if (a[i] != 24) - abort (); - } -}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c index 0bc757907cf..882da83237e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c @@ -3,10 +3,10 @@ #include <stdint.h> -typedef int64_t v4di __attribute__ ((vector_size (32))); -typedef int32_t v8si __attribute__ ((vector_size (32))); -typedef int16_t v16hi __attribute__ ((vector_size (32))); -typedef int8_t v32qi __attribute__ ((vector_size (32))); +typedef int64_t vnx2di __attribute__ ((vector_size (32))); +typedef int32_t vnx4si __attribute__ ((vector_size (32))); +typedef int16_t vnx8hi __attribute__ ((vector_size (32))); +typedef int8_t vnx16qi __attribute__ ((vector_size (32))); #define TEST_TYPE(TYPE) \ void sve_load_##TYPE##_neg9 (TYPE *a) \ @@ -45,10 +45,10 @@ typedef int8_t v32qi __attribute__ ((vector_size (32))); asm volatile ("" :: "w" (x)); \ } -TEST_TYPE (v4di) -TEST_TYPE (v8si) -TEST_TYPE (v16hi) -TEST_TYPE (v32qi) +TEST_TYPE (vnx2di) +TEST_TYPE (vnx4si) +TEST_TYPE (vnx8hi) +TEST_TYPE (vnx16qi) /* { dg-final { scan-assembler-times {\tsub\tx[0-9]+, x0, #288\n} 4 } } */ /* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 16\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c index a0ced0d9be4..78cfc7a9bd8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c @@ -1,11 +1,11 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps" } */ void -f (unsigned int *restrict a, unsigned char *restrict b, int n) +f (unsigned int *restrict a, signed char *restrict b, signed char mask, int n) { for (int i = 0; i < n; ++i) - a[i] += b[i]; + a[i] += (signed char) (b[i] | mask); } /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c index 00731d995c8..51732b03784 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c @@ -1,12 +1,7 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */ -void -f (unsigned int *restrict a, unsigned char *restrict b, int n) -{ - for (int i = 0; i < n; ++i) - a[i] += b[i]; -} +#include "sve_load_const_offset_2.c" /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #1, mul vl\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c index 9163702db1d..f1c37d388f9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c @@ -3,65 +3,65 @@ #include <stdint.h> -typedef int64_t v4di __attribute__ ((vector_size (32))); -typedef int32_t v8si __attribute__ ((vector_size (32))); -typedef int16_t v16hi __attribute__ ((vector_size (32))); -typedef int8_t v32qi __attribute__ ((vector_size (32))); +typedef int64_t vnx2di __attribute__ ((vector_size (32))); +typedef int32_t vnx4si __attribute__ ((vector_size (32))); +typedef int16_t vnx8hi __attribute__ ((vector_size (32))); +typedef int8_t vnx16qi __attribute__ ((vector_size (32))); void sve_load_64_u_lsl (uint64_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v4di *)&a[i])); + asm volatile ("" :: "w" (*(vnx2di *)&a[i])); } void sve_load_64_s_lsl (int64_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v4di *)&a[i])); + asm volatile ("" :: "w" (*(vnx2di *)&a[i])); } void sve_load_32_u_lsl (uint32_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v8si *)&a[i])); + asm volatile ("" :: "w" (*(vnx4si *)&a[i])); } void sve_load_32_s_lsl (int32_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v8si *)&a[i])); + asm volatile ("" :: "w" (*(vnx4si *)&a[i])); } void sve_load_16_z_lsl (uint16_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v16hi *)&a[i])); + asm volatile ("" :: "w" (*(vnx8hi *)&a[i])); } void sve_load_16_s_lsl (int16_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v16hi *)&a[i])); + asm volatile ("" :: "w" (*(vnx8hi *)&a[i])); } void sve_load_8_z (uint8_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v32qi *)&a[i])); + asm volatile ("" :: "w" (*(vnx16qi *)&a[i])); } void sve_load_8_s (int8_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v32qi *)&a[i])); + asm volatile ("" :: "w" (*(vnx16qi *)&a[i])); } /* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, x1, lsl 3\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c index 2d11a221e93..0f918a4155f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c @@ -10,7 +10,10 @@ { \ TYPE a[N]; \ for (int i = 0; i < N; ++i) \ - a[i] = i * i + i % 5; \ + { \ + a[i] = i * i + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ test_##TYPE##_##NAME (a, BASE, N); \ for (int i = 0; i < N; ++i) \ { \ @@ -20,7 +23,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (TEST_LOOP) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c index ccb20b4191f..551b451495d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c @@ -3,10 +3,10 @@ #include <stdint.h> -typedef int8_t v32qi __attribute__((vector_size(32))); -typedef int16_t v16hi __attribute__((vector_size(32))); -typedef int32_t v8si __attribute__((vector_size(32))); -typedef int64_t v4di __attribute__((vector_size(32))); +typedef int8_t vnx16qi __attribute__((vector_size(32))); +typedef int16_t vnx8hi __attribute__((vector_size(32))); +typedef int32_t vnx4si __attribute__((vector_size(32))); +typedef int64_t vnx2di __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v32qi) -DO_OP (v16hi) -DO_OP (v8si) -DO_OP (v4di) +DO_OP (vnx16qi) +DO_OP (vnx8hi) +DO_OP (vnx4si) +DO_OP (vnx2di) /* { dg-final { scan-assembler-times {\tmad\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */ /* { dg-final { scan-assembler-times {\tmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c index 4d47bce14fd..469e3c670d3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c @@ -1,37 +1,52 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */ #include <stdint.h> -#define INVALID_INDEX(TYPE) ((TYPE) 107) -#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE)) -#define ODD(VAL) (VAL & 0x1) - -/* TODO: This is a bit ugly for floating point types as it involves FP<>INT - conversions, but I can't find another way of auto-vectorizing the code to - make use of SVE gather instructions. */ -#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ -void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\ - LOOKUPTYPE *__restrict lookup,\ - INDEXTYPE *__restrict index, int n)\ -{\ - int i;\ - for (i = 0; i < n; ++i)\ - {\ - INDEXTYPE x = index[i];\ - if (IS_VALID_INDEX (INDEXTYPE, x))\ - x = lookup[x];\ - out[i] = x;\ - }\ -}\ - -DEF_MASK_GATHER_LOAD (int32_t, int32_t, int32_t) -DEF_MASK_GATHER_LOAD (int64_t, int64_t, int64_t) -DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t) -DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t) -DEF_MASK_GATHER_LOAD (float, float, int32_t) -DEF_MASK_GATHER_LOAD (double, double, int64_t) - -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 1 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 2 } } */ +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \ + void \ + f_##DATA_TYPE##_##CMP_TYPE \ + (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cmp1[i] == cmp2[i]) \ + dest[i] += src[indices[i]]; \ + } + +#define TEST32(T, DATA_TYPE) \ + T (DATA_TYPE, int32_t, 32) \ + T (DATA_TYPE, uint32_t, 32) \ + T (DATA_TYPE, float, 32) + +#define TEST64(T, DATA_TYPE) \ + T (DATA_TYPE, int64_t, 64) \ + T (DATA_TYPE, uint64_t, 64) \ + T (DATA_TYPE, double, 64) + +#define TEST_ALL(T) \ + TEST32 (T, int32_t) \ + TEST32 (T, uint32_t) \ + TEST32 (T, float) \ + TEST64 (T, int64_t) \ + TEST64 (T, uint64_t) \ + TEST64 (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */ + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c deleted file mode 100644 index 89ccf3e35a4..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c +++ /dev/null @@ -1,72 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include "sve_mask_gather_load_1.c" - -#include <stdio.h> - -extern void abort (); - -/* TODO: Support widening forms of gather loads and test them here. */ - -#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) - -#define INDEX_VEC_INIT(INDEXTYPE)\ - INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (INDEXTYPE)];\ - -#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ - LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (LOOKUPTYPE)];\ - OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\ - {\ - int i;\ - for (i = 0; i < NUM_ELEMS (INDEXTYPE); i++)\ - {\ - lookup_##LOOKUPTYPE [i] = i * 2;\ - index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\ - }\ - } - -#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ - fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\ - (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\ - NUM_ELEMS (INDEXTYPE));\ - {\ - int i;\ - for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\ - {\ - if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\ - break;\ - else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (INDEXTYPE))\ - break;\ - }\ - if (i < NUM_ELEMS (OUTTYPE))\ - {\ - fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\ - i, (int) out_##OUTTYPE[i]);\ - abort ();\ - }\ - } - -int main() -{ - INDEX_VEC_INIT (int32_t) - INDEX_VEC_INIT (int64_t) - INDEX_VEC_INIT (uint32_t) - INDEX_VEC_INIT (uint64_t) - - VEC_INIT (int32_t, int32_t, int32_t) - VEC_INIT (int64_t, int64_t, int64_t) - VEC_INIT (uint32_t, uint32_t, uint32_t) - VEC_INIT (uint64_t, uint64_t, uint64_t) - VEC_INIT (float, float, int32_t) - VEC_INIT (double, double, int64_t) - - TEST_MASK_GATHER_LOAD (int32_t, int32_t, int32_t) - TEST_MASK_GATHER_LOAD (int64_t, int64_t, int64_t) - TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t) - TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t) - TEST_MASK_GATHER_LOAD (float, float, int32_t) - TEST_MASK_GATHER_LOAD (double, double, int64_t) - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c index 48db58ffefd..8dd48462b51 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c @@ -1,60 +1,19 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */ -#include <stdint.h> +#define INDEX32 uint32_t +#define INDEX64 uint64_t -#define NUM_ELEMS(TYPE) (4 * (32 / sizeof (TYPE))) -#define INVALID_INDEX(TYPE) ((TYPE) 107) -#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE)) +#include "sve_mask_gather_load_1.c" -/* TODO: This is a bit ugly for floating point types as it involves FP<>INT - conversions, but I can't find another way of auto-vectorizing the code to - make use of SVE gather instructions. */ -#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ -void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\ - LOOKUPTYPE *__restrict lookup,\ - INDEXTYPE *__restrict index, INDEXTYPE n)\ -{\ - INDEXTYPE i;\ - for (i = 0; i < n; ++i)\ - {\ - LOOKUPTYPE x = index[i];\ - if (IS_VALID_INDEX (LOOKUPTYPE, x))\ - x = lookup[x];\ - out[i] = x;\ - }\ -}\ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */ -DEF_MASK_GATHER_LOAD (int32_t, int32_t, int8_t) -DEF_MASK_GATHER_LOAD (int64_t, int64_t, int8_t) -DEF_MASK_GATHER_LOAD (int32_t, int32_t, int16_t) -DEF_MASK_GATHER_LOAD (int64_t, int64_t, int16_t) -DEF_MASK_GATHER_LOAD (int64_t, int64_t, int32_t) -DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t) -DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t) -DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t) -DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t) -DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t) - -/* At present we only use predicate unpacks when the index type is - half the size of the result type. */ -/* { dg-final { scan-assembler-times "\tpunpklo\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */ -/* { dg-final { scan-assembler-times "\tpunpkhi\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */ - -/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ -/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ -/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ -/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ -/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ -/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ - -/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ -/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ -/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ -/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ -/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ -/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ - -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 6 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 6 } } */ -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 28 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c deleted file mode 100644 index c5280546206..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c +++ /dev/null @@ -1,98 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include "sve_mask_gather_load_2.c" - -#include <stdio.h> - -extern void abort (); - -#define ODD(VAL) (VAL & 0x1) -#define INDEX_VEC_INIT(INDEXTYPE)\ - INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (int8_t)];\ - -#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ - LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (OUTTYPE)];\ - OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\ - {\ - int i;\ - for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\ - {\ - lookup_##LOOKUPTYPE [i] = i * 2;\ - index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\ - }\ - } - -#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ - fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\ - (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\ - NUM_ELEMS (OUTTYPE));\ - {\ - int i;\ - for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\ - {\ - if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\ - break;\ - else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (OUTTYPE))\ - break;\ - }\ - if (i < NUM_ELEMS (OUTTYPE))\ - {\ - fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\ - i, (int) out_##OUTTYPE[i]);\ - abort ();\ - }\ - } - -int main() -{ - INDEX_VEC_INIT (int8_t) - INDEX_VEC_INIT (int16_t) - INDEX_VEC_INIT (int32_t) - INDEX_VEC_INIT (uint8_t) - INDEX_VEC_INIT (uint16_t) - INDEX_VEC_INIT (uint32_t) - - { - VEC_INIT (int32_t, int32_t, int8_t) - TEST_MASK_GATHER_LOAD (int32_t, int32_t, int8_t) - } - { - VEC_INIT (int64_t, int64_t, int8_t) - TEST_MASK_GATHER_LOAD (int64_t, int64_t, int8_t) - } - { - VEC_INIT (int32_t, int32_t, int16_t) - TEST_MASK_GATHER_LOAD (int32_t, int32_t, int16_t) - } - { - VEC_INIT (int64_t, int64_t, int16_t) - TEST_MASK_GATHER_LOAD (int64_t, int64_t, int16_t) - } - { - VEC_INIT (int64_t, int64_t, int32_t) - TEST_MASK_GATHER_LOAD (int64_t, int64_t, int32_t) - } - { - VEC_INIT (uint32_t, uint32_t, uint8_t) - TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t) - } - { - VEC_INIT (uint64_t, uint64_t, uint8_t) - TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t) - } - { - VEC_INIT (uint32_t, uint32_t, uint16_t) - TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t) - } - { - VEC_INIT (uint64_t, uint64_t, uint16_t) - TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t) - } - { - VEC_INIT (uint64_t, uint64_t, uint32_t) - TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t) - } - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c index 2965760e058..b370f532f2c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c @@ -1,29 +1,52 @@ /* { dg-do assemble } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ -#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ - DATA_TYPE __attribute__ ((noinline)) \ - NAME (char *data, INDEX_TYPE *indices, signed char n) \ +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \ + void \ + f_##DATA_TYPE##_##CMP_TYPE \ + (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n) \ { \ - DATA_TYPE sum = 0; \ - for (signed char i = 0; i < n; ++i) \ - { \ - INDEX_TYPE index = indices[i]; \ - sum += (index & 16 ? *(DATA_TYPE *) (data + index) : 1); \ - } \ - return sum; \ + for (int i = 0; i < n; ++i) \ + if (cmp1[i] == cmp2[i]) \ + dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]); \ } -TEST_LOOP (f_s32, int, unsigned int) -TEST_LOOP (f_u32, unsigned int, unsigned int) -TEST_LOOP (f_f32, float, unsigned int) +#define TEST32(T, DATA_TYPE) \ + T (DATA_TYPE, int32_t, 32) \ + T (DATA_TYPE, uint32_t, 32) \ + T (DATA_TYPE, float, 32) + +#define TEST64(T, DATA_TYPE) \ + T (DATA_TYPE, int64_t, 64) \ + T (DATA_TYPE, uint64_t, 64) \ + T (DATA_TYPE, double, 64) + +#define TEST_ALL(T) \ + TEST32 (T, int32_t) \ + TEST32 (T, uint32_t) \ + TEST32 (T, float) \ + TEST64 (T, int64_t) \ + TEST64 (T, uint64_t) \ + TEST64 (T, double) + +TEST_ALL (TEST_LOOP) -TEST_LOOP (f_s64_s64, long, long) -TEST_LOOP (f_s64_u64, long, unsigned long) -TEST_LOOP (f_u64_s64, unsigned long, long) -TEST_LOOP (f_u64_u64, unsigned long, unsigned long) -TEST_LOOP (f_f64_s64, double, long) -TEST_LOOP (f_f64_u64, double, unsigned long) +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 3 } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c deleted file mode 100644 index aa73c81ffca..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c +++ /dev/null @@ -1,47 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ - -#include "sve_mask_gather_load_3.c" - -extern void abort (void); - -#define N 57 - -#undef TEST_LOOP -#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ - { \ - INDEX_TYPE indices[N]; \ - DATA_TYPE data[N * 2]; \ - for (int i = 0; i < N * 2; ++i) \ - data[i] = (i / 2) * 4 + i % 2; \ - DATA_TYPE sum = 0; \ - for (int i = 0; i < N; ++i) \ - { \ - INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \ - j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \ - if (j & 16) \ - sum += data[j / sizeof (DATA_TYPE)]; \ - else \ - sum += 1; \ - indices[i] = j; \ - } \ - DATA_TYPE res = NAME ((char *) data, indices, N); \ - if (res != sum) \ - abort (); \ - } - -int __attribute__ ((optimize (1))) -main () -{ - TEST_LOOP (f_s32, int, unsigned int) - TEST_LOOP (f_u32, unsigned int, unsigned int) - TEST_LOOP (f_f32, float, unsigned int) - - TEST_LOOP (f_s64_s64, long, long) - TEST_LOOP (f_s64_u64, long, unsigned long) - TEST_LOOP (f_u64_s64, unsigned long, long) - TEST_LOOP (f_u64_u64, unsigned long, unsigned long) - TEST_LOOP (f_f64_s64, double, long) - TEST_LOOP (f_f64_u64, double, unsigned long) - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c index 38bb5275e59..0464e9343a3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c @@ -1,18 +1,19 @@ /* { dg-do assemble } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ -#define TEST_LOOP(NAME, TYPE) \ - TYPE __attribute__ ((noinline)) \ - NAME (TYPE **indices, long *mask, int n) \ - { \ - TYPE sum = 0; \ - for (int i = 0; i < n; ++i) \ - sum += mask[i] ? *indices[i] : 1; \ - return sum; \ - } +#define INDEX32 uint32_t +#define INDEX64 uint64_t -TEST_LOOP (f_s64, long) -TEST_LOOP (f_u64, unsigned long) -TEST_LOOP (f_f64, double) +#include "sve_mask_gather_load_3.c" -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */ + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c deleted file mode 100644 index 8a6320a002c..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c +++ /dev/null @@ -1,37 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ - -#include "sve_mask_gather_load_4.c" - -extern void abort (void); - -#define N 57 - -#undef TEST_LOOP -#define TEST_LOOP(NAME, TYPE) \ - { \ - TYPE *ptrs[N]; \ - TYPE data[N * 2]; \ - long mask[N]; \ - for (int i = 0; i < N * 2; ++i) \ - data[i] = (i / 2) * 4 + i % 2; \ - TYPE sum = 0; \ - for (int i = 0; i < N; ++i) \ - { \ - mask[i] = i & 15; \ - ptrs[i] = &data[i * 3 / 2]; \ - sum += mask[i] ? *ptrs[i] : 1; \ - } \ - TYPE res = NAME (ptrs, mask, N); \ - if (res != sum) \ - abort (); \ - } - -int __attribute__ ((optimize (1))) -main () -{ - TEST_LOOP (f_s64, long) - TEST_LOOP (f_u64, unsigned long) - TEST_LOOP (f_f64, double) - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c index abb38e40f72..831d594654a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c @@ -1,120 +1,38 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ - -#define MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -void mgather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - if (masks[i * STRIDE])\ - dst[i] = src[i * STRIDE];\ -} - -#define MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE)\ -void mgather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - if (masks[i * stride])\ - dst[i] = src[i * stride];\ -} - -#define MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\ -void mgather_load3s5##OBJTYPE##STRIDETYPE\ - (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\ - OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\ - MASKTYPE * restrict masks, STRIDETYPE count)\ -{\ - const STRIDETYPE STRIDE = 5;\ - for (STRIDETYPE i=0; i<count; i++)\ - if (masks[i * STRIDE])\ - {\ - d1[i] = src[0 + (i * STRIDE)];\ - d2[i] = src[1 + (i * STRIDE)];\ - d3[i] = src[2 + (i * STRIDE)];\ - d4[i] = src[3 + (i * STRIDE)];\ - d5[i] = src[4 + (i * STRIDE)];\ - }\ -} - -#define MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -void mgather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - if (masks[i * STRIDE])\ - *dst = *src;\ - dst += 1;\ - src += STRIDE;\ - }\ -} - -#define MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE)\ -void mgather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - if (masks[i * stride])\ - *dst = *src;\ - dst += 1;\ - src += stride;\ - }\ -} - -MASK_GATHER_LOAD1 (double, long, long, 5) -MASK_GATHER_LOAD1 (double, long, long, 8) -MASK_GATHER_LOAD1 (double, long, long, 21) -MASK_GATHER_LOAD1 (double, long, long, 1009) - -MASK_GATHER_LOAD1 (float, int, int, 5) -MASK_GATHER_LOAD1 (float, int, int, 8) -MASK_GATHER_LOAD1 (float, int, int, 21) -MASK_GATHER_LOAD1 (float, int, int, 1009) - -MASK_GATHER_LOAD2 (double, long, long) -MASK_GATHER_LOAD2 (float, int, int) - -MASK_GATHER_LOAD3 (double, long, long) -MASK_GATHER_LOAD3 (float, int, int) - -MASK_GATHER_LOAD4 (double, long, long, 5) - -/* NOTE: We can't vectorize MASK_GATHER_LOAD4 (float, int, int, 5) because we - can't prove that the offsets used for the gather load won't overflow. */ - -MASK_GATHER_LOAD5 (double, long, long) -MASK_GATHER_LOAD5 (float, int, int) - -/* Widened forms. */ -MASK_GATHER_LOAD1 (double, long, int, 5) -MASK_GATHER_LOAD1 (double, long, int, 8) -MASK_GATHER_LOAD1 (double, long, short, 5) -MASK_GATHER_LOAD1 (double, long, short, 8) - -MASK_GATHER_LOAD1 (float, int, short, 5) -MASK_GATHER_LOAD1 (float, int, short, 8) - -MASK_GATHER_LOAD2 (double, long, int) -MASK_GATHER_LOAD2 (float, int, short) - -MASK_GATHER_LOAD4 (double, long, int, 5) -MASK_GATHER_LOAD4 (float, int, short, 5) - -MASK_GATHER_LOAD5 (double, long, int) - -/* Loads including masks. */ -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 34 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 20 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 6 } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +#define TEST_LOOP(DATA_TYPE, CMP_TYPE) \ + void \ + f_##DATA_TYPE##_##CMP_TYPE \ + (DATA_TYPE *restrict dest, DATA_TYPE *restrict *restrict src, \ + CMP_TYPE *cmp1, CMP_TYPE *cmp2, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cmp1[i] == cmp2[i]) \ + dest[i] += *src[i]; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, int64_t) \ + T (DATA_TYPE, uint64_t) \ + T (DATA_TYPE, double) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c deleted file mode 100644 index 445c47f23ac..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c +++ /dev/null @@ -1,177 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <unistd.h> - -extern void abort (void); -extern void *memset(void *, int, size_t); - -#include "sve_mask_gather_load_5.c" - -#define NUM_DST_ELEMS 13 -#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE) - -#define MASKED_VALUE 3 - -#define TEST_MASK_GATHER_LOAD_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ - memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ - real_dst[0] = 0;\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - {\ - src[i * STRIDE] = i;\ - dst[i] = MASKED_VALUE;\ - masks[i * STRIDE] = i & 0x1;\ - }\ - FUN##OBJTYPE##STRIDETYPE##STRIDE \ - (dst, src, masks, NUM_DST_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ - abort ();\ -} - -#define TEST_MASK_GATHER_LOAD_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ - memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ - real_dst[0] = 0;\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - {\ - src[i * STRIDE] = i;\ - dst[i] = MASKED_VALUE;\ - masks[i * STRIDE] = i & 0x1;\ - }\ - FUN##OBJTYPE##STRIDETYPE \ - (dst, src, masks, STRIDE, NUM_DST_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ - abort ();\ -} - -#define TEST_MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load1, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -#define TEST_MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load2, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -#define TEST_MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\ - __attribute__((aligned (32)));\ - MASKTYPE masks[NUM_SRC_ELEMS (5)];\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\ - memset (masks, 0, (NUM_SRC_ELEMS (5)) * sizeof (MASKTYPE));\ - real_dst1[0] = real_dst2[0] = real_dst3[0] = real_dst4[0] = real_dst5[0] = 0;\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst1 = &real_dst1[1];\ - OBJTYPE *dst2 = &real_dst2[1];\ - OBJTYPE *dst3 = &real_dst3[1];\ - OBJTYPE *dst4 = &real_dst4[1];\ - OBJTYPE *dst5 = &real_dst5[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\ - src[i] = i;\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - {\ - dst1[i] = MASKED_VALUE;\ - dst2[i] = MASKED_VALUE;\ - dst3[i] = MASKED_VALUE;\ - dst4[i] = MASKED_VALUE;\ - dst5[i] = MASKED_VALUE;\ - masks[i * 5] = i & 0x1;\ - }\ - mgather_load3s5##OBJTYPE##STRIDETYPE \ - (dst1, dst2, dst3, dst4, dst5, src, masks, NUM_DST_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ - {\ - STRIDETYPE base = i * 5;\ - if (dst1[i] != (masks[base] ? base : MASKED_VALUE))\ - abort ();\ - if (dst2[i] != (masks[base] ? (base + 1) : MASKED_VALUE))\ - abort ();\ - if (dst3[i] != (masks[base] ? (base + 2) : MASKED_VALUE))\ - abort ();\ - if (dst4[i] != (masks[base] ? (base + 3) : MASKED_VALUE))\ - abort ();\ - if (dst5[i] != (masks[base] ? (base + 4) : MASKED_VALUE))\ - abort ();\ - }\ -} - -#define TEST_MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load4, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -#define TEST_MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load5, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -int main () -{ - TEST_MASK_GATHER_LOAD1 (double, long, long, 5); - TEST_MASK_GATHER_LOAD1 (double, long, long, 8); - TEST_MASK_GATHER_LOAD1 (double, long, long, 21); - - TEST_MASK_GATHER_LOAD1 (float, int, int, 5); - TEST_MASK_GATHER_LOAD1 (float, int, int, 8); - TEST_MASK_GATHER_LOAD1 (float, int, int, 21); - - TEST_MASK_GATHER_LOAD2 (double, long, long, 5); - TEST_MASK_GATHER_LOAD2 (double, long, long, 8); - TEST_MASK_GATHER_LOAD2 (double, long, long, 21); - - TEST_MASK_GATHER_LOAD3 (double, long, long); - TEST_MASK_GATHER_LOAD3 (float, int, int); - - TEST_MASK_GATHER_LOAD4 (double, long, long, 5); - - TEST_MASK_GATHER_LOAD5 (double, long, long, 5); - TEST_MASK_GATHER_LOAD5 (float, int, int, 5); - - /* Widened forms. */ - TEST_MASK_GATHER_LOAD1 (double, long, int, 5) - TEST_MASK_GATHER_LOAD1 (double, long, int, 8) - TEST_MASK_GATHER_LOAD1 (double, long, short, 5) - TEST_MASK_GATHER_LOAD1 (double, long, short, 8) - - TEST_MASK_GATHER_LOAD1 (float, int, short, 5) - TEST_MASK_GATHER_LOAD1 (float, int, short, 8) - - TEST_MASK_GATHER_LOAD2 (double, long, int, 5); - TEST_MASK_GATHER_LOAD2 (double, long, int, 8); - TEST_MASK_GATHER_LOAD2 (double, long, int, 21); - - TEST_MASK_GATHER_LOAD4 (double, long, int, 5); - TEST_MASK_GATHER_LOAD4 (float, int, short, 5); - - TEST_MASK_GATHER_LOAD5 (double, long, int, 5); - - return 0; -} - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c new file mode 100644 index 00000000000..64eb0c46278 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_6.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE) \ + void \ + f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE \ + (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cmp1[i] == cmp2[i]) \ + dest[i] += src[indices[i]]; \ + } + +#define TEST32(T, DATA_TYPE) \ + T (DATA_TYPE, int64_t, int32_t) \ + T (DATA_TYPE, uint64_t, int32_t) \ + T (DATA_TYPE, double, int32_t) \ + T (DATA_TYPE, int64_t, uint32_t) \ + T (DATA_TYPE, uint64_t, uint32_t) \ + T (DATA_TYPE, double, uint32_t) + +#define TEST_ALL(T) \ + TEST32 (T, int32_t) \ + TEST32 (T, uint32_t) \ + TEST32 (T, float) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 72 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 18 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c new file mode 100644 index 00000000000..4a8b38e13af --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_7.c @@ -0,0 +1,53 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE) \ + void \ + f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE \ + (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cmp1[i] == cmp2[i]) \ + dest[i] += src[indices[i]]; \ + } + +#define TEST32(T, DATA_TYPE) \ + T (DATA_TYPE, int16_t, int32_t) \ + T (DATA_TYPE, uint16_t, int32_t) \ + T (DATA_TYPE, _Float16, int32_t) \ + T (DATA_TYPE, int16_t, uint32_t) \ + T (DATA_TYPE, uint16_t, uint32_t) \ + T (DATA_TYPE, _Float16, uint32_t) + +#define TEST64(T, DATA_TYPE) \ + T (DATA_TYPE, int32_t, int64_t) \ + T (DATA_TYPE, uint32_t, int64_t) \ + T (DATA_TYPE, float, int64_t) \ + T (DATA_TYPE, int32_t, uint64_t) \ + T (DATA_TYPE, uint32_t, uint64_t) \ + T (DATA_TYPE, float, uint64_t) + +#define TEST_ALL(T) \ + TEST32 (T, int32_t) \ + TEST32 (T, uint32_t) \ + TEST32 (T, float) \ + TEST64 (T, int64_t) \ + TEST64 (T, uint64_t) \ + TEST64 (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 18 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 18 } } */ + +/* Also used for the TEST32 indices. */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 72 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 36 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c index a7f2995a6cd..562bdb720de 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c @@ -1,124 +1,51 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ - -#define MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -void mscatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - if (masks[i * STRIDE])\ - dst[i * STRIDE] = src[i];\ -} - -#define MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE)\ -void mscatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - if (masks[i * stride])\ - dst[i * stride] = src[i];\ -} - -#define MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\ -void mscatter_store3s5##OBJTYPE##STRIDETYPE\ - (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\ - OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\ - MASKTYPE * restrict masks, STRIDETYPE count)\ -{\ - const STRIDETYPE STRIDE = 5;\ - for (STRIDETYPE i=0; i<count; i++)\ - if (masks[i * STRIDE])\ - {\ - dst[0 + (i * STRIDE)] = s1[i];\ - dst[1 + (i * STRIDE)] = s2[i];\ - dst[2 + (i * STRIDE)] = s3[i];\ - dst[3 + (i * STRIDE)] = s4[i];\ - dst[4 + (i * STRIDE)] = s5[i];\ - }\ -} - -#define MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -void mscatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - if (masks[i * STRIDE])\ - *dst = *src;\ - dst += STRIDE;\ - src += 1;\ - }\ -} - -#define MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE)\ -void mscatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - MASKTYPE * restrict masks,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - if (masks[i * stride])\ - *dst = *src;\ - dst += stride;\ - src += 1;\ - }\ -} - -MASK_SCATTER_STORE1 (double, long, long, 5) -MASK_SCATTER_STORE1 (double, long, long, 8) -MASK_SCATTER_STORE1 (double, long, long, 21) -MASK_SCATTER_STORE1 (double, long, long, 1009) - -MASK_SCATTER_STORE1 (float, int, int, 5) - -MASK_SCATTER_STORE1 (float, int, int, 8) -MASK_SCATTER_STORE1 (float, int, int, 21) -MASK_SCATTER_STORE1 (float, int, int, 1009) - -MASK_SCATTER_STORE2 (double, long, long) -MASK_SCATTER_STORE2 (float, int, int) - -MASK_SCATTER_STORE3 (double, long, long) -MASK_SCATTER_STORE3 (float, int, int) - -MASK_SCATTER_STORE4 (double, long, long, 5) -/* NOTE: We can't vectorize MASK_SCATTER_STORE4 (float, int, int, 3) because we - can't prove that the offsets used for the gather load won't overflow. */ - -MASK_SCATTER_STORE5 (double, long, long) -MASK_SCATTER_STORE5 (float, int, int) - -/* Widened forms. */ -MASK_SCATTER_STORE1 (double, long, int, 5) -MASK_SCATTER_STORE1 (double, long, int, 8) -MASK_SCATTER_STORE1 (double, long, short, 5) -MASK_SCATTER_STORE1 (double, long, short, 8) - -MASK_SCATTER_STORE1 (float, int, short, 5) -MASK_SCATTER_STORE1 (float, int, short, 8) - -MASK_SCATTER_STORE2 (double, long, int) -MASK_SCATTER_STORE2 (float, int, short) - -MASK_SCATTER_STORE4 (double, long, int, 5) -MASK_SCATTER_STORE4 (float, int, short, 5) - -MASK_SCATTER_STORE5 (double, long, int) - -/* Gather loads are for the masks. */ -/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 15 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 8 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ - -/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */ -/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */ -/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS) \ + void \ + f_##DATA_TYPE##_##CMP_TYPE \ + (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + CMP_TYPE *restrict cmp1, CMP_TYPE *restrict cmp2, \ + INDEX##BITS *restrict indices, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cmp1[i] == cmp2[i]) \ + dest[indices[i]] = src[i] + 1; \ + } + +#define TEST32(T, DATA_TYPE) \ + T (DATA_TYPE, int32_t, 32) \ + T (DATA_TYPE, uint32_t, 32) \ + T (DATA_TYPE, float, 32) + +#define TEST64(T, DATA_TYPE) \ + T (DATA_TYPE, int64_t, 64) \ + T (DATA_TYPE, uint64_t, 64) \ + T (DATA_TYPE, double, 64) + +#define TEST_ALL(T) \ + TEST32 (T, int32_t) \ + TEST32 (T, uint32_t) \ + TEST32 (T, float) \ + TEST64 (T, int64_t) \ + TEST64 (T, uint64_t) \ + TEST64 (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */ + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c deleted file mode 100644 index 3222d420763..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c +++ /dev/null @@ -1,186 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <unistd.h> -#include <stdio.h> - -extern void abort (void); -extern void *memset(void *, int, size_t); - -#include "sve_mask_scatter_store_1.c" - -#define NUM_SRC_ELEMS 13 -#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE) - -#define MASKED_VALUE 3 - -#define TEST_MASK_SCATTER_STORE_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\ - memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ - real_src[0] = 0;\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - {\ - src[i] = i;\ - masks[i * STRIDE] = i & 0x1;\ - }\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\ - dst[i] = MASKED_VALUE;\ - FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, masks, NUM_SRC_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ - abort ();\ -} - -#define TEST_MASK_SCATTER_STORE_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\ - memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ - real_src[0] = 0;\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - {\ - src[i] = i;\ - masks[i * STRIDE] = i & 0x1;\ - }\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\ - dst[i] = MASKED_VALUE;\ - FUN##OBJTYPE##STRIDETYPE (dst, src, masks, STRIDE, NUM_SRC_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ - abort ();\ -} - -#define TEST_MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store1, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -#define TEST_MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store2, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -#define TEST_MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\ -{\ - OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\ - __attribute__((aligned (32)));\ - MASKTYPE masks[NUM_DST_ELEMS (5)];\ - memset (masks, 0, (NUM_DST_ELEMS (5)) * sizeof (MASKTYPE));\ - real_src1[0] = real_src2[0] = real_src3[0] = real_src4[0] = real_src5[0] = 0;\ - OBJTYPE *src1 = &real_src1[1];\ - OBJTYPE *src2 = &real_src2[1];\ - OBJTYPE *src3 = &real_src3[1];\ - OBJTYPE *src4 = &real_src4[1];\ - OBJTYPE *src5 = &real_src5[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - {\ - STRIDETYPE base = i * 5;\ - src1[i] = base;\ - src2[i] = base + 1;\ - src3[i] = base + 2;\ - src4[i] = base + 3;\ - src5[i] = base + 4;\ - masks[i * 5] = i & 0x1;\ - }\ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\ - dst[i] = MASKED_VALUE;\ - mscatter_store3s5##OBJTYPE##STRIDETYPE \ - (dst, src1, src2, src3, src4, src5, masks, NUM_SRC_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - {\ - STRIDETYPE base = i * 5;\ - if (dst[base] != (masks[i * 5] ? base : MASKED_VALUE))\ - abort ();\ - if (dst[base + 1] != (masks[i * 5] ? (base + 1) : MASKED_VALUE))\ - abort ();\ - if (dst[base + 2] != (masks[i * 5] ? (base + 2) : MASKED_VALUE))\ - abort ();\ - if (dst[base + 3] != (masks[i * 5] ? (base + 3) : MASKED_VALUE))\ - abort ();\ - if (dst[base + 4] != (masks[i * 5] ? (base + 4) : MASKED_VALUE))\ - abort ();\ - }\ -} - -#define TEST_MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store4, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -#define TEST_MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ - TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store5, OBJTYPE, MASKTYPE, \ - STRIDETYPE, STRIDE) - -int __attribute__ ((optimize (1))) -main () -{ - TEST_MASK_SCATTER_STORE1 (double, long, long, 5); - - TEST_MASK_SCATTER_STORE1 (double, long, long, 8); - TEST_MASK_SCATTER_STORE1 (double, long, long, 21); - - TEST_MASK_SCATTER_STORE1 (float, int, int, 5); - TEST_MASK_SCATTER_STORE1 (float, int, int, 8); - TEST_MASK_SCATTER_STORE1 (float, int, int, 21); - - TEST_MASK_SCATTER_STORE2 (double, long, long, 5); - TEST_MASK_SCATTER_STORE2 (double, long, long, 8); - TEST_MASK_SCATTER_STORE2 (double, long, long, 21); - - TEST_MASK_SCATTER_STORE2 (float, int, int, 5); - TEST_MASK_SCATTER_STORE2 (float, int, int, 8); - TEST_MASK_SCATTER_STORE2 (float, int, int, 21); - - TEST_MASK_SCATTER_STORE3 (double, long, long); - TEST_MASK_SCATTER_STORE3 (float, int, int); - - TEST_MASK_SCATTER_STORE4 (double, long, long, 5); - - TEST_MASK_SCATTER_STORE5 (double, long, long, 5); - TEST_MASK_SCATTER_STORE5 (float, int, int, 5); - - /* Widened forms. */ - TEST_MASK_SCATTER_STORE1 (double, long, int, 5) - TEST_MASK_SCATTER_STORE1 (double, long, int, 8) - TEST_MASK_SCATTER_STORE1 (double, long, short, 5) - TEST_MASK_SCATTER_STORE1 (double, long, short, 8) - - TEST_MASK_SCATTER_STORE1 (float, int, short, 5) - TEST_MASK_SCATTER_STORE1 (float, int, short, 8) - - TEST_MASK_SCATTER_STORE2 (double, long, int, 5); - TEST_MASK_SCATTER_STORE2 (double, long, int, 8); - TEST_MASK_SCATTER_STORE2 (double, long, int, 21); - - TEST_MASK_SCATTER_STORE2 (float, int, short, 5); - TEST_MASK_SCATTER_STORE2 (float, int, short, 8); - TEST_MASK_SCATTER_STORE2 (float, int, short, 21); - - TEST_MASK_SCATTER_STORE4 (double, long, int, 5); - TEST_MASK_SCATTER_STORE4 (float, int, short, 5); - - TEST_MASK_SCATTER_STORE5 (double, long, int, 5); - - return 0; -} - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c new file mode 100644 index 00000000000..c0f291673dc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_2.c @@ -0,0 +1,17 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve --save-temps" } */ + +#define INDEX32 uint32_t +#define INDEX64 uint64_t + +#include "sve_mask_scatter_store_1.c" + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */ + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c index 4a6247db978..9eff539c1d8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -28,6 +28,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -44,10 +45,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ Out 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for half float) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tld2h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c index 626b78c29e1..72086145290 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include "sve_mask_struct_load_1.c" #define N 100 -volatile int x; - #undef TEST_LOOP #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ { \ @@ -17,6 +15,7 @@ volatile int x; { \ out[i] = i * 7 / 2; \ mask[i] = i % 5 <= i % 3; \ + asm volatile ("" ::: "memory"); \ } \ for (int i = 0; i < N * 2; ++i) \ in[i] = i * 9 / 2; \ @@ -27,11 +26,11 @@ volatile int x; OUTTYPE if_false = i * 7 / 2; \ if (out[i] != (mask[i] ? if_true : if_false)) \ __builtin_abort (); \ - x += 1; \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c index 0004e673d49..fe69b96e35a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -30,6 +30,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -46,10 +47,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ Out 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for _Float16) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c index 86219b4a191..a9784676efb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include "sve_mask_struct_load_2.c" #define N 100 -volatile int x; - #undef TEST_LOOP #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ { \ @@ -17,6 +15,7 @@ volatile int x; { \ out[i] = i * 7 / 2; \ mask[i] = i % 5 <= i % 3; \ + asm volatile ("" ::: "memory"); \ } \ for (int i = 0; i < N * 3; ++i) \ in[i] = i * 9 / 2; \ @@ -29,11 +28,11 @@ volatile int x; OUTTYPE if_false = i * 7 / 2; \ if (out[i] != (mask[i] ? if_true : if_false)) \ __builtin_abort (); \ - x += 1; \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c index 5f784e7dd36..b8bdd51459f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -31,6 +31,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -47,10 +48,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ Out 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for half float) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c index 51bd38e2890..f168d656af9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include "sve_mask_struct_load_3.c" #define N 100 -volatile int x; - #undef TEST_LOOP #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ { \ @@ -17,6 +15,7 @@ volatile int x; { \ out[i] = i * 7 / 2; \ mask[i] = i % 5 <= i % 3; \ + asm volatile ("" ::: "memory"); \ } \ for (int i = 0; i < N * 4; ++i) \ in[i] = i * 9 / 2; \ @@ -30,11 +29,11 @@ volatile int x; OUTTYPE if_false = i * 7 / 2; \ if (out[i] != (mask[i] ? if_true : if_false)) \ __builtin_abort (); \ - x += 1; \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c index 6608558d3ff..2b319229d1f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void \ + void __attribute__ ((noinline, noclone)) \ NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -28,6 +28,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -44,10 +45,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ Out 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for half float) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c index 003cf650d7d..a81c647004f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void \ + void __attribute__ ((noinline, noclone)) \ NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -28,6 +28,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -44,10 +45,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ Out 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for half float) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c index a6161f31536..b6e3f55d7e8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void \ + void __attribute__ ((noinline, noclone)) \ NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -28,6 +28,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c index 75a3e43f267..da97e2795a9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void \ + void __attribute__ ((noinline, noclone)) \ NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -28,6 +28,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c index e87ad0bc074..c3884b0b074 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void \ + void __attribute__ ((noinline, noclone)) \ NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -28,6 +28,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c index 966968d4b91..9af479f478d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c @@ -2,16 +2,19 @@ /* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ - MASKTYPE *__restrict cond, int n) \ + MASKTYPE *__restrict cond, INTYPE bias, int n) \ { \ for (int i = 0; i < n; ++i) \ - if (cond[i]) \ - { \ - dest[i * 2] = src[i]; \ - dest[i * 2 + 1] = src[i]; \ - } \ + { \ + INTYPE value = src[i] + bias; \ + if (cond[i]) \ + { \ + dest[i * 2] = value; \ + dest[i * 2 + 1] = value; \ + } \ + } \ } #define TEST2(NAME, OUTTYPE, INTYPE) \ @@ -31,6 +34,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -47,10 +51,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ In 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for _Float16) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tst2h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c index fd48a4c96f9..f472e1da01d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include "sve_mask_struct_store_1.c" #define N 100 -volatile int x; - #undef TEST_LOOP #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ { \ @@ -17,21 +15,22 @@ volatile int x; { \ in[i] = i * 7 / 2; \ mask[i] = i % 5 <= i % 3; \ + asm volatile ("" ::: "memory"); \ } \ for (int i = 0; i < N * 2; ++i) \ out[i] = i * 9 / 2; \ - NAME##_2 (out, in, mask, N); \ + NAME##_2 (out, in, mask, 17, N); \ for (int i = 0; i < N * 2; ++i) \ { \ - OUTTYPE if_true = in[i / 2]; \ + OUTTYPE if_true = (INTYPE) (in[i / 2] + 17); \ OUTTYPE if_false = i * 9 / 2; \ if (out[i] != (mask[i / 2] ? if_true : if_false)) \ __builtin_abort (); \ - x += 1; \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c index 5359c6a457a..b817a095abe 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c @@ -2,17 +2,20 @@ /* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ - MASKTYPE *__restrict cond, int n) \ + MASKTYPE *__restrict cond, INTYPE bias, int n) \ { \ for (int i = 0; i < n; ++i) \ - if (cond[i]) \ - { \ - dest[i * 3] = src[i]; \ - dest[i * 3 + 1] = src[i]; \ - dest[i * 3 + 2] = src[i]; \ - } \ + { \ + INTYPE value = src[i] + bias; \ + if (cond[i]) \ + { \ + dest[i * 3] = value; \ + dest[i * 3 + 1] = value; \ + dest[i * 3 + 2] = value; \ + } \ + } \ } #define TEST2(NAME, OUTTYPE, INTYPE) \ @@ -32,6 +35,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -48,10 +52,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ In 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for _Float16) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tst3h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c index f8845ebd7ec..c1771d52298 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include "sve_mask_struct_store_2.c" #define N 100 -volatile int x; - #undef TEST_LOOP #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ { \ @@ -17,21 +15,22 @@ volatile int x; { \ in[i] = i * 7 / 2; \ mask[i] = i % 5 <= i % 3; \ + asm volatile ("" ::: "memory"); \ } \ for (int i = 0; i < N * 3; ++i) \ out[i] = i * 9 / 2; \ - NAME##_3 (out, in, mask, N); \ + NAME##_3 (out, in, mask, 11, N); \ for (int i = 0; i < N * 3; ++i) \ { \ - OUTTYPE if_true = in[i / 3]; \ + OUTTYPE if_true = (INTYPE) (in[i / 3] + 11); \ OUTTYPE if_false = i * 9 / 2; \ if (out[i] != (mask[i / 3] ? if_true : if_false)) \ __builtin_abort (); \ - x += 1; \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c index cc614847e7e..d604bd77efe 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c @@ -1,19 +1,22 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ - MASKTYPE *__restrict cond, int n) \ + MASKTYPE *__restrict cond, INTYPE bias, int n) \ { \ for (int i = 0; i < n; ++i) \ - if (cond[i]) \ - { \ - dest[i * 4] = src[i]; \ - dest[i * 4 + 1] = src[i]; \ - dest[i * 4 + 2] = src[i]; \ - dest[i * 4 + 3] = src[i]; \ - } \ + { \ + INTYPE value = src[i] + bias; \ + if (cond[i]) \ + { \ + dest[i * 4] = value; \ + dest[i * 4 + 1] = value; \ + dest[i * 4 + 2] = value; \ + dest[i * 4 + 3] = value; \ + } \ + } \ } #define TEST2(NAME, OUTTYPE, INTYPE) \ @@ -33,6 +36,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) @@ -49,10 +53,10 @@ TEST (test) /* Mask | 8 16 32 64 -------+------------ In 8 | 2 2 2 2 - 16 | 2 1 1 1 + 16 | 2 1 1 1 x2 (for half float) 32 | 2 1 1 1 64 | 2 1 1 1. */ -/* { dg-final { scan-assembler-times {\tst4h\t.z[0-9]} 23 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t.z[0-9]} 28 } } */ /* Mask | 8 16 32 64 -------+------------ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c index f845818fa4d..cbac3da9db2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include "sve_mask_struct_store_3.c" #define N 100 -volatile int x; - #undef TEST_LOOP #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ { \ @@ -17,21 +15,22 @@ volatile int x; { \ in[i] = i * 7 / 2; \ mask[i] = i % 5 <= i % 3; \ + asm volatile ("" ::: "memory"); \ } \ for (int i = 0; i < N * 4; ++i) \ out[i] = i * 9 / 2; \ - NAME##_4 (out, in, mask, N); \ + NAME##_4 (out, in, mask, 42, N); \ for (int i = 0; i < N * 4; ++i) \ { \ - OUTTYPE if_true = in[i / 4]; \ + OUTTYPE if_true = (INTYPE) (in[i / 4] + 42); \ OUTTYPE if_false = i * 9 / 2; \ if (out[i] != (mask[i / 4] ? if_true : if_false)) \ __builtin_abort (); \ - x += 1; \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c index ac2df82c539..9b4e75554f9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c @@ -2,7 +2,7 @@ /* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ MASKTYPE *__restrict cond, int n) \ { \ @@ -32,6 +32,7 @@ TEST1 (NAME##_i16, unsigned short) \ TEST1 (NAME##_i32, int) \ TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f16_f16, _Float16, _Float16) \ TEST2 (NAME##_f32_f32, float, float) \ TEST2 (NAME##_f64_f64, double, double) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c index a4d705e38ba..a2e671de3d3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c @@ -3,10 +3,10 @@ #include <stdint.h> -typedef int8_t v32qi __attribute__((vector_size(32))); -typedef int16_t v16hi __attribute__((vector_size(32))); -typedef int32_t v8si __attribute__((vector_size(32))); -typedef int64_t v4di __attribute__((vector_size(32))); +typedef int8_t vnx16qi __attribute__((vector_size(32))); +typedef int16_t vnx8hi __attribute__((vector_size(32))); +typedef int32_t vnx4si __attribute__((vector_size(32))); +typedef int64_t vnx2di __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v32qi) -DO_OP (v16hi) -DO_OP (v8si) -DO_OP (v4di) +DO_OP (vnx16qi) +DO_OP (vnx8hi) +DO_OP (vnx4si) +DO_OP (vnx2di) /* { dg-final { scan-assembler-times {\tmla\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c index b7cc1dba087..fb4454a1426 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c @@ -3,10 +3,10 @@ #include <stdint.h> -typedef int8_t v32qi __attribute__((vector_size(32))); -typedef int16_t v16hi __attribute__((vector_size(32))); -typedef int32_t v8si __attribute__((vector_size(32))); -typedef int64_t v4di __attribute__((vector_size(32))); +typedef int8_t vnx16qi __attribute__((vector_size(32))); +typedef int16_t vnx8hi __attribute__((vector_size(32))); +typedef int32_t vnx4si __attribute__((vector_size(32))); +typedef int64_t vnx2di __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v32qi) -DO_OP (v16hi) -DO_OP (v8si) -DO_OP (v4di) +DO_OP (vnx16qi) +DO_OP (vnx8hi) +DO_OP (vnx4si) +DO_OP (vnx2di) /* { dg-final { scan-assembler-times {\tmls\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c index a38375af017..756263253c0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c @@ -3,9 +3,9 @@ void sve_copy_rr (void) { - typedef int v8si __attribute__((vector_size(32))); - register v8si x asm ("z1"); - register v8si y asm ("z2"); + typedef int vnx4si __attribute__((vector_size(32))); + register vnx4si x asm ("z1"); + register vnx4si y asm ("z2"); asm volatile ("#foo" : "=w" (x)); y = x; asm volatile ("#foo" :: "w" (y)); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c index fc05837a920..38aab512376 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c @@ -3,10 +3,10 @@ #include <stdint.h> -typedef int8_t v32qi __attribute__((vector_size(32))); -typedef int16_t v16hi __attribute__((vector_size(32))); -typedef int32_t v8si __attribute__((vector_size(32))); -typedef int64_t v4di __attribute__((vector_size(32))); +typedef int8_t vnx16qi __attribute__((vector_size(32))); +typedef int16_t vnx8hi __attribute__((vector_size(32))); +typedef int32_t vnx4si __attribute__((vector_size(32))); +typedef int64_t vnx2di __attribute__((vector_size(32))); #define DO_OP(TYPE) \ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ @@ -23,10 +23,10 @@ void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ *x = dst; \ } -DO_OP (v32qi) -DO_OP (v16hi) -DO_OP (v8si) -DO_OP (v4di) +DO_OP (vnx16qi) +DO_OP (vnx8hi) +DO_OP (vnx4si) +DO_OP (vnx2di) /* { dg-final { scan-assembler-times {\tmsb\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */ /* { dg-final { scan-assembler-times {\tmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c index 8f50308ebd5..a87fdd2aed2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c @@ -1,36 +1,39 @@ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ -#define TEST(NAME, TYPE, ITYPE) \ +#include <stdint.h> + +#define TEST(NAME, TYPE) \ void \ - NAME##1 (TYPE *x, ITYPE n) \ + NAME##1 (TYPE *x, int n) \ { \ - for (ITYPE i = 0; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ x[i] += 1; \ } \ TYPE NAME##_array[1024]; \ void \ NAME##2 (void) \ { \ - for (ITYPE i = 1; i < 200; ++i) \ + for (int i = 1; i < 200; ++i) \ NAME##_array[i] += 1; \ } -TEST (sc, signed char, unsigned char) -TEST (uc, unsigned char, unsigned char) -TEST (ss, signed short, unsigned short) -TEST (us, unsigned short, signed short) -TEST (si, signed int, signed int) -TEST (ui, unsigned int, unsigned int) -TEST (sl, signed long, unsigned long) -TEST (ul, unsigned long, signed long) -TEST (f, float, int) -TEST (d, double, long) +TEST (s8, int8_t) +TEST (u8, uint8_t) +TEST (s16, int16_t) +TEST (u16, uint16_t) +TEST (s32, int32_t) +TEST (u32, uint32_t) +TEST (s64, int64_t) +TEST (u64, uint64_t) +TEST (f16, _Float16) +TEST (f32, float) +TEST (f64, double) /* No scalar memory accesses. */ /* { dg-final { scan-assembler-not {[wx][0-9]*, \[} } } */ /* 2 for each NAME##1 test, one in the header and one in the main loop and 1 for each NAME##2 test, in the main loop only. */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b,} 6 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 9 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 9 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c index a39f8241f46..23b1b2a51e5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c @@ -9,7 +9,7 @@ int x[N] __attribute__((aligned(32))); -void __attribute__((weak)) +void __attribute__((noinline, noclone)) foo (void) { unsigned int v = 0; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c index 1ebaeea2bb9..6ed98ec075c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */ -/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */ #include "sve_peel_ind_1.c" -volatile int y; - -int +int __attribute__ ((optimize (1))) main (void) { foo (); @@ -14,7 +12,7 @@ main (void) { if (x[i] != (i < START || i >= END ? 0 : (i - START) * 5)) __builtin_abort (); - y++; + asm volatile ("" ::: "memory"); } return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c index 9ef8c7f85e4..af1a5aaa0ec 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c @@ -9,7 +9,7 @@ int x[N] __attribute__((aligned(32))); -void __attribute__((weak)) +void __attribute__((noinline, noclone)) foo (void) { for (unsigned int i = START; i < END; ++i) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c index b3e56bbbb7c..5565c32a888 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */ -/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */ #include "sve_peel_ind_2.c" -volatile int y; - -int +int __attribute__ ((optimize (1))) main (void) { foo (); @@ -14,7 +12,7 @@ main (void) { if (x[i] != (i < START || i >= END ? 0 : i)) __builtin_abort (); - y++; + asm volatile ("" ::: "memory"); } return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c index 97a29f18361..a2602e781a1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c @@ -9,7 +9,7 @@ int x[MAX_START][N] __attribute__((aligned(32))); -void __attribute__((weak)) +void __attribute__((noinline, noclone)) foo (int start) { for (int i = start; i < start + COUNT; ++i) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c index 9851c1cce64..ee8061a1163 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */ -/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */ #include "sve_peel_ind_3.c" -volatile int y; - -int +int __attribute__ ((optimize (1))) main (void) { for (int start = 0; start < MAX_START; ++start) @@ -16,7 +14,7 @@ main (void) { if (x[start][i] != (i < start || i >= start + COUNT ? 0 : i)) __builtin_abort (); - y++; + asm volatile ("" ::: "memory"); } } return 0; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c index e5c55877341..6ab089522fb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c @@ -6,7 +6,7 @@ #define START 1 #define END 505 -void __attribute__((weak)) +void __attribute__((noinline, noclone)) foo (double *x) { double v = 10.0; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c index 60be4a038de..3764457ffcc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c @@ -1,17 +1,18 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx -fno-vect-cost-model" } */ -/* { dg-options "-Ofast -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" { target aarch64_sve256_hw } } */ +/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx" } */ +/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx -mtune=thunderx" { target aarch64_sve256_hw } } */ #include "sve_peel_ind_4.c" -volatile int y; - -int +int __attribute__ ((optimize (1))) main (void) { double x[END + 1]; for (int i = 0; i < END + 1; ++i) - x[i] = i; + { + x[i] = i; + asm volatile ("" ::: "memory"); + } foo (x); for (int i = 0; i < END + 1; ++i) { @@ -22,7 +23,7 @@ main (void) expected = 10 + (i - START) * 5; if (x[i] != expected) __builtin_abort (); - y++; + asm volatile ("" ::: "memory"); } return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C deleted file mode 100644 index 53e10bcea01..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C +++ /dev/null @@ -1,48 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <math.h> - -#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3) - -#define DEF_REDUC_PLUS(TYPE)\ -TYPE reduc_plus_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\ -{\ - TYPE r = 0, q = 3;\ - for (int i = 0; i < NUM_ELEMS(TYPE); i++)\ - {\ - r += a[i];\ - q -= b[i];\ - }\ - return r * q;\ -}\ - -DEF_REDUC_PLUS (float) -DEF_REDUC_PLUS (double) - -#define DEF_REDUC_MAXMIN(TYPE,FUN)\ -TYPE reduc_##FUN (TYPE *__restrict__ a, TYPE *__restrict__ b)\ -{\ - TYPE r = a[0], q = b[0];\ - for (int i = 0; i < NUM_ELEMS(TYPE); i++)\ - {\ - r = FUN (a[i], r);\ - q = FUN (b[i], q);\ - }\ - return r * q;\ -}\ - -DEF_REDUC_MAXMIN (float, fmaxf) -DEF_REDUC_MAXMIN (double, fmax) -DEF_REDUC_MAXMIN (float, fminf) -DEF_REDUC_MAXMIN (double, fmin) - - -/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 2 } } */ -/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 2 } } */ - -/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */ - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c new file mode 100644 index 00000000000..eb3e7e656d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define NUM_ELEMS(TYPE) ((int)(5 * (256 / sizeof (TYPE)) + 3)) + +#define DEF_REDUC_PLUS(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + reduc_plus_##TYPE (TYPE *a, TYPE *b) \ + { \ + TYPE r = 0, q = 3; \ + for (int i = 0; i < NUM_ELEMS(TYPE); i++) \ + { \ + r += a[i]; \ + q -= b[i]; \ + } \ + return r * q; \ + } + +#define TEST_ALL(T) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_ALL (DEF_REDUC_PLUS) + +/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C deleted file mode 100644 index 769d25165ea..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C +++ /dev/null @@ -1,47 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include "sve_reduc_strict_1.C" -#include <stdlib.h> -#include <stdio.h> -#include <math.h> - -#define DEF_INIT_VECTOR(TYPE)\ - TYPE a_##TYPE[NUM_ELEMS (TYPE)];\ - TYPE b_##TYPE[NUM_ELEMS (TYPE)];\ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ - {\ - a_##TYPE[i] = (i * 2) * (i & 1 ? 1 : -1);\ - b_##TYPE[i] = (i * 3) * (i & 1 ? 1 : -1);\ - } - -#define TEST_REDUC_PLUS(RES,TYPE) (RES) += reduc_plus_##TYPE (a_##TYPE, b_##TYPE); -#define TEST_REDUC_MAX(RES,TYPE) (RES) += reduc_fmax (a_##TYPE, b_##TYPE); -#define TEST_REDUC_MAXF(RES,TYPE) (RES) += reduc_fmaxf (a_##TYPE, b_##TYPE); -#define TEST_REDUC_MIN(RES,TYPE) (RES) += reduc_fmin (a_##TYPE, b_##TYPE); -#define TEST_REDUC_MINF(RES,TYPE) (RES) += reduc_fminf (a_##TYPE, b_##TYPE); - -int main () -{ - double result = 0.0; - DEF_INIT_VECTOR (float) - DEF_INIT_VECTOR (double) - - TEST_REDUC_PLUS (result, float) - TEST_REDUC_PLUS (result, double) - - TEST_REDUC_MINF (result, float) - TEST_REDUC_MIN (result, double) - - TEST_REDUC_MAXF (result, float) - TEST_REDUC_MAX (result, double) - - if (result != double (1356996)) - { - fprintf (stderr, "result = %1.16lf\n", result); - abort (); - } - - return 0; -} - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c new file mode 100644 index 00000000000..4c810d4a337 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.c @@ -0,0 +1,29 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_reduc_strict_1.c" + +#define TEST_REDUC_PLUS(TYPE) \ + { \ + TYPE a[NUM_ELEMS (TYPE)]; \ + TYPE b[NUM_ELEMS (TYPE)]; \ + TYPE r = 0, q = 3; \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + { \ + a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \ + b[i] = (i * 0.3) * (i & 1 ? 1 : -1); \ + r += a[i]; \ + q -= b[i]; \ + asm volatile ("" ::: "memory"); \ + } \ + TYPE res = reduc_plus_##TYPE (a, b); \ + if (res != r * q) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_ALL (TEST_REDUC_PLUS); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C deleted file mode 100644 index 542918abeb8..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C +++ /dev/null @@ -1,48 +0,0 @@ -/* { dg-do compile } */ -/* FIXME: With -O3 we don't generate reductions as the compiler unrolls the outer loop - and processes the rows in parallel, performing in order reductions on the inner loop. */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <math.h> - -#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3) - -/* TODO: Test with inner loop = n * NUM_ELEMS(TYPE). */ -#define DEF_REDUC_PLUS(TYPE)\ -void reduc_plus_##TYPE (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\ -{\ - for (int i = 0; i < n; i++)\ - {\ - r[i] = 0;\ - for (int j = 0; j < NUM_ELEMS(TYPE); j++)\ - r[i] += a[i][j];\ - }\ -}\ - -DEF_REDUC_PLUS (float) -DEF_REDUC_PLUS (double) - -#define DEF_REDUC_MAXMIN(TYPE,FUN)\ -void reduc_##FUN (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\ -{\ - for (int i = 0; i < n; i++)\ - {\ - r[i] = a[i][0];\ - for (int j = 0; j < NUM_ELEMS(TYPE); j++)\ - r[i] = FUN (a[i][j], r[i]);\ - }\ -}\ - -DEF_REDUC_MAXMIN (float, fmaxf) -DEF_REDUC_MAXMIN (double, fmax) -DEF_REDUC_MAXMIN (float, fminf) -DEF_REDUC_MAXMIN (double, fmin) - -/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 1 } } */ -/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 1 } } */ - -/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */ - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c new file mode 100644 index 00000000000..672be8f793e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define NUM_ELEMS(TYPE) ((int) (5 * (256 / sizeof (TYPE)) + 3)) + +#define DEF_REDUC_PLUS(TYPE) \ +void __attribute__ ((noinline, noclone)) \ +reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = 0; \ + for (int j = 0; j < NUM_ELEMS(TYPE); j++) \ + r[i] += a[i][j]; \ + } \ +} + +#define TEST_ALL(T) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_ALL (DEF_REDUC_PLUS) + +/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C deleted file mode 100644 index 86a930c7d33..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C +++ /dev/null @@ -1,59 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include "sve_reduc_strict_2.C" -#include <stdlib.h> -#include <stdio.h> -#include <math.h> - -#define NROWS 5 - -#define DEF_INIT_VECTOR(TYPE)\ - TYPE mat_##TYPE[NROWS][NUM_ELEMS (TYPE)];\ - TYPE r_##TYPE[NROWS];\ - for (int i = 0; i < NROWS; i++)\ - for (int j = 0; j < NUM_ELEMS (TYPE); j++ )\ - mat_##TYPE[i][j] = i + (j * 2) * (j & 1 ? 1 : -1);\ - -#define TEST_REDUC_PLUS(TYPE) reduc_plus_##TYPE (mat_##TYPE, r_##TYPE, NROWS); -#define TEST_REDUC_MAXF reduc_fmaxf (mat_float, r_float, NROWS); -#define TEST_REDUC_MAX reduc_fmax (mat_double, r_double, NROWS); -#define TEST_REDUC_MINF reduc_fminf (mat_float, r_float, NROWS); -#define TEST_REDUC_MIN reduc_fmin (mat_double, r_double, NROWS); - -#define SUM_VECTOR(RES, TYPE)\ - for (int i = 0; i < NROWS; i++)\ - (RES) += r_##TYPE[i]; - -#define SUM_FLOAT_RESULT(RES)\ - SUM_VECTOR (RES, float);\ - SUM_VECTOR (RES, double);\ - -int main () -{ - double resultF = 0.0; - DEF_INIT_VECTOR (float) - DEF_INIT_VECTOR (double) - - TEST_REDUC_PLUS (float) - TEST_REDUC_PLUS (double) - SUM_FLOAT_RESULT (resultF); - - TEST_REDUC_MAXF - TEST_REDUC_MAX - SUM_FLOAT_RESULT (resultF); - - TEST_REDUC_MINF - TEST_REDUC_MIN - SUM_FLOAT_RESULT (resultF); - - if (resultF != double (2460)) - { - fprintf (stderr, "resultF = %1.16lf\n", resultF); - abort (); - } - - return 0; -} - - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c new file mode 100644 index 00000000000..4741e6acb14 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.c @@ -0,0 +1,31 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_reduc_strict_2.c" + +#define NROWS 5 + +#define TEST_REDUC_PLUS(TYPE) \ + { \ + TYPE a[NROWS][NUM_ELEMS (TYPE)]; \ + TYPE r[NROWS]; \ + TYPE expected[NROWS] = {}; \ + for (int i = 0; i < NROWS; ++i) \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + { \ + a[i][j] = (i * 0.1 + j * 0.6) * (j & 1 ? 1 : -1); \ + expected[i] += a[i][j]; \ + asm volatile ("" ::: "memory"); \ + } \ + reduc_plus_##TYPE (a, r, NROWS); \ + for (int i = 0; i < NROWS; ++i) \ + if (r[i] != expected[i]) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_ALL (TEST_REDUC_PLUS); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c index 338aa614b47..ebed8e697c1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.c @@ -1,12 +1,13 @@ /* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256 -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256 -fdump-tree-vect-details" } */ double mat[100][4]; double mat2[100][8]; double mat3[100][12]; double mat4[100][3]; -double slp_reduc_plus (int n) +double +slp_reduc_plus (int n) { double tmp = 0.0; for (int i = 0; i < n; i++) @@ -19,7 +20,8 @@ double slp_reduc_plus (int n) return tmp; } -double slp_reduc_plus2 (int n) +double +slp_reduc_plus2 (int n) { double tmp = 0.0; for (int i = 0; i < n; i++) @@ -36,7 +38,8 @@ double slp_reduc_plus2 (int n) return tmp; } -double slp_reduc_plus3 (int n) +double +slp_reduc_plus3 (int n) { double tmp = 0.0; for (int i = 0; i < n; i++) @@ -57,7 +60,8 @@ double slp_reduc_plus3 (int n) return tmp; } -void slp_non_chained_reduc (int n, double * __restrict__ out) +void +slp_non_chained_reduc (int n, double * restrict out) { for (int i = 0; i < 3; i++) out[i] = 0; @@ -73,7 +77,8 @@ void slp_non_chained_reduc (int n, double * __restrict__ out) /* Strict FP reductions shouldn't be used for the outer loops, only the inner loops. */ -float double_reduc1 (float (*__restrict__ i)[16]) +float +double_reduc1 (float (*restrict i)[16]) { float l = 0; @@ -83,7 +88,8 @@ float double_reduc1 (float (*__restrict__ i)[16]) return l; } -float double_reduc2 (float *__restrict__ i) +float +double_reduc2 (float *restrict i) { float l = 0; @@ -98,7 +104,8 @@ float double_reduc2 (float *__restrict__ i) return l; } -float double_reduc3 (float *__restrict__ i, float *__restrict__ j) +float +double_reduc3 (float *restrict i, float *restrict j) { float k = 0, l = 0; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c b/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c new file mode 100644 index 00000000000..7c4290a2dc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_rev_1.c @@ -0,0 +1,49 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); + +#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X) +#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) +#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y) +#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y) +#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y) + +#define INDEX_32 vnx16qi +#define INDEX_16 vnx8hi +#define INDEX_8 vnx4si +#define INDEX_4 vnx2di + +#define PERMUTE(TYPE, NUNITS) \ + TYPE permute_##TYPE (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle \ + (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (0, NUNITS) })); \ + } + +#define TEST_ALL(T) \ + T (vnx16qi, 32) \ + T (vnx8hi, 16) \ + T (vnx4si, 8) \ + T (vnx2di, 4) \ + T (vnx8hf, 16) \ + T (vnx4sf, 8) \ + T (vnx2df, 4) + +TEST_ALL (PERMUTE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c index 9307200fb05..709fd3b37b4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c @@ -3,7 +3,7 @@ #include <stdint.h> -typedef int8_t v32qi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) @@ -11,7 +11,7 @@ typedef int8_t v32qi __attribute__((vector_size (32))); #define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y) #define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y) -#define INDEX_32 v32qi +#define INDEX_32 vnx16qi #define PERMUTE(TYPE, NUNITS, REV_NUNITS) \ TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \ @@ -22,9 +22,9 @@ typedef int8_t v32qi __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (v32qi, 32, 2) \ - T (v32qi, 32, 4) \ - T (v32qi, 32, 8) + T (vnx16qi, 32, 2) \ + T (vnx16qi, 32, 4) \ + T (vnx16qi, 32, 8) TEST_ALL (PERMUTE) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c index fb238373c4e..fe3533cf6db 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c @@ -3,8 +3,8 @@ #include <stdint.h> -typedef uint16_t v16hi __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef uint16_t vnx8hi __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) @@ -12,7 +12,7 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y) #define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y) -#define INDEX_16 v16hi +#define INDEX_16 vnx8hi #define PERMUTE(TYPE, NUNITS, REV_NUNITS) \ TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \ @@ -23,10 +23,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (v16hi, 16, 2) \ - T (v16hi, 16, 4) \ - T (v16hf, 16, 2) \ - T (v16hf, 16, 4) + T (vnx8hi, 16, 2) \ + T (vnx8hi, 16, 4) \ + T (vnx8hf, 16, 2) \ + T (vnx8hf, 16, 4) TEST_ALL (PERMUTE) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c index 4834e2c2b01..a6b95f52880 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c @@ -3,14 +3,14 @@ #include <stdint.h> -typedef uint32_t v8si __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); +typedef uint32_t vnx4si __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) #define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y) -#define INDEX_8 v8si +#define INDEX_8 vnx4si #define PERMUTE(TYPE, NUNITS, REV_NUNITS) \ TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \ @@ -21,8 +21,8 @@ typedef float v8sf __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (v8si, 8, 2) \ - T (v8sf, 8, 2) + T (vnx4si, 8, 2) \ + T (vnx4sf, 8, 2) TEST_ALL (PERMUTE) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c index 2270be2bd29..43a7e831cae 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c @@ -1,109 +1,31 @@ /* { dg-do assemble } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE)\ -void scatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - dst[i * STRIDE] = src[i];\ -} - -#define SCATTER_STORE2(OBJTYPE,STRIDETYPE)\ -void scatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - dst[i * stride] = src[i];\ -} - -#define SCATTER_STORE3(OBJTYPE,STRIDETYPE)\ -void scatter_store3s5##OBJTYPE##STRIDETYPE\ - (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\ - OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\ - STRIDETYPE count)\ -{\ - const STRIDETYPE STRIDE = 5;\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - dst[0 + (i * STRIDE)] = s1[i];\ - dst[4 + (i * STRIDE)] = s5[i];\ - dst[1 + (i * STRIDE)] = s2[i];\ - dst[2 + (i * STRIDE)] = s3[i];\ - dst[3 + (i * STRIDE)] = s4[i];\ - }\ -} - -#define SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE)\ -void scatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - *dst = *src;\ - dst += STRIDE;\ - src += 1;\ - }\ -} - -#define SCATTER_STORE5(OBJTYPE,STRIDETYPE)\ -void scatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ - OBJTYPE * restrict src,\ - STRIDETYPE stride,\ - STRIDETYPE count)\ -{\ - for (STRIDETYPE i=0; i<count; i++)\ - {\ - *dst = *src;\ - dst += stride;\ - src += 1;\ - }\ -} - -SCATTER_STORE1 (double, long, 5) -SCATTER_STORE1 (double, long, 8) -SCATTER_STORE1 (double, long, 21) -SCATTER_STORE1 (double, long, 1009) - -SCATTER_STORE1 (float, int, 5) -SCATTER_STORE1 (float, int, 8) -SCATTER_STORE1 (float, int, 21) -SCATTER_STORE1 (float, int, 1009) - -SCATTER_STORE2 (double, long) -SCATTER_STORE2 (float, int) - -SCATTER_STORE3 (double, long) -SCATTER_STORE3 (float, int) - -SCATTER_STORE4 (double, long, 5) -/* NOTE: We can't vectorize SCATTER_STORE4 (float, int, 5) because we can't - prove that the offsets used for the gather load won't overflow. */ - -SCATTER_STORE5 (double, long) -SCATTER_STORE5 (float, int) - -/* Widened forms. */ -SCATTER_STORE1 (double, int, 5) -SCATTER_STORE1 (double, int, 8) -SCATTER_STORE1 (double, short, 5) -SCATTER_STORE1 (double, short, 8) - -SCATTER_STORE1 (float, short, 5) -SCATTER_STORE1 (float, short, 8) - -SCATTER_STORE2 (double, int) -SCATTER_STORE2 (float, short) - -SCATTER_STORE4 (double, int, 5) -SCATTER_STORE4 (float, short, 5) - -SCATTER_STORE5 (double, int) - -/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */ -/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */ -/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, int n) \ + { \ + for (int i = 9; i < n; ++i) \ + dest[indices[i]] = src[i] + 1; \ + } + +#define TEST_ALL(T) \ + T (int32_t, 32) \ + T (uint32_t, 32) \ + T (float, 32) \ + T (int64_t, 64) \ + T (uint64_t, 64) \ + T (double, 64) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c deleted file mode 100644 index 4d8cddc510f..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c +++ /dev/null @@ -1,155 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <unistd.h> - -extern void abort (void); -extern void *memset(void *, int, size_t); - -#include "sve_scatter_store_1.c" - -#define NUM_SRC_ELEMS 13 -#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE) - -#define TEST_SCATTER_STORE_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ - memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - src[i] = i;\ - FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, NUM_SRC_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - if (dst[i * STRIDE] != i)\ - abort ();\ -} - -#define TEST_SCATTER_STORE_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ -{\ - OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ - __attribute__((aligned (32)));\ - memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ - memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ - OBJTYPE *src = &real_src[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - src[i] = i;\ - FUN##OBJTYPE##STRIDETYPE (dst, src, STRIDE, NUM_SRC_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - if (dst[i * STRIDE] != i)\ - abort ();\ -} - -#define TEST_SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_SCATTER_STORE_COMMON1 (scatter_store1, OBJTYPE, STRIDETYPE, STRIDE) - -#define TEST_SCATTER_STORE2(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_SCATTER_STORE_COMMON2 (scatter_store2, OBJTYPE, STRIDETYPE, STRIDE) - -#define TEST_SCATTER_STORE3(OBJTYPE,STRIDETYPE)\ -{\ - OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\ - __attribute__((aligned (32)));\ - OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\ - __attribute__((aligned (32)));\ - memset (real_src1, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ - memset (real_src2, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ - memset (real_src3, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ - memset (real_src4, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ - memset (real_src5, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ - memset (real_dst, 0, (1 + NUM_DST_ELEMS (5)) * sizeof (OBJTYPE));\ - OBJTYPE *src1 = &real_src1[1];\ - OBJTYPE *src2 = &real_src2[1];\ - OBJTYPE *src3 = &real_src3[1];\ - OBJTYPE *src4 = &real_src4[1];\ - OBJTYPE *src5 = &real_src5[1];\ - OBJTYPE *dst = &real_dst[1];\ - for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ - {\ - STRIDETYPE base = i * 5;\ - src1[i] = base;\ - src2[i] = base + 1;\ - src3[i] = base + 2;\ - src4[i] = base + 3;\ - src5[i] = base + 4;\ - }\ - scatter_store3s5##OBJTYPE##STRIDETYPE \ - (dst, src1, src2, src3, src4, src5, NUM_SRC_ELEMS); \ - for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\ - if (dst[i] != i)\ - abort ();\ -} - -#define TEST_SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_SCATTER_STORE_COMMON1 (scatter_store4, OBJTYPE, STRIDETYPE, STRIDE) - -#define TEST_SCATTER_STORE5(OBJTYPE,STRIDETYPE,STRIDE) \ - TEST_SCATTER_STORE_COMMON2 (scatter_store5, OBJTYPE, STRIDETYPE, STRIDE) - -int __attribute__ ((optimize (1))) -main () -{ - TEST_SCATTER_STORE1 (double, long, 5); - TEST_SCATTER_STORE1 (double, long, 8); - TEST_SCATTER_STORE1 (double, long, 21); - - TEST_SCATTER_STORE1 (float, int, 5); - TEST_SCATTER_STORE1 (float, int, 8); - TEST_SCATTER_STORE1 (float, int, 21); - - TEST_SCATTER_STORE2 (double, long, 5); - TEST_SCATTER_STORE2 (double, long, 8); - TEST_SCATTER_STORE2 (double, long, 21); - - TEST_SCATTER_STORE2 (float, int, 5); - TEST_SCATTER_STORE2 (float, int, 8); - TEST_SCATTER_STORE2 (float, int, 21); - - TEST_SCATTER_STORE3 (double, long); - TEST_SCATTER_STORE3 (float, int); - - TEST_SCATTER_STORE4 (double, long, 5); - - TEST_SCATTER_STORE5 (double, long, 5); - TEST_SCATTER_STORE5 (float, int, 5); - - /* Widened forms. */ - TEST_SCATTER_STORE1 (double, int, 5) - TEST_SCATTER_STORE1 (double, int, 8) - TEST_SCATTER_STORE1 (double, short, 5) - TEST_SCATTER_STORE1 (double, short, 8) - - TEST_SCATTER_STORE1 (float, short, 5) - TEST_SCATTER_STORE1 (float, short, 8) - - TEST_SCATTER_STORE2 (double, int, 5); - TEST_SCATTER_STORE2 (double, int, 8); - TEST_SCATTER_STORE2 (double, int, 21); - - TEST_SCATTER_STORE2 (float, short, 5); - TEST_SCATTER_STORE2 (float, short, 8); - TEST_SCATTER_STORE2 (float, short, 21); - - TEST_SCATTER_STORE4 (double, int, 5); - TEST_SCATTER_STORE4 (float, short, 5); - - TEST_SCATTER_STORE5 (double, int, 5); - - return 0; -} - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c new file mode 100644 index 00000000000..dcc96f07fc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_2.c @@ -0,0 +1,10 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define INDEX32 uint32_t +#define INDEX64 uint64_t + +#include "sve_scatter_store_1.c" + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c new file mode 100644 index 00000000000..d09c4015aa0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_3.c @@ -0,0 +1,32 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +/* Invoked 18 times for each data size. */ +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, int n) \ + { \ + for (int i = 9; i < n; ++i) \ + *(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \ + } + +#define TEST_ALL(T) \ + T (int32_t, 32) \ + T (uint32_t, 32) \ + T (float, 32) \ + T (int64_t, 64) \ + T (uint64_t, 64) \ + T (double, 64) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c new file mode 100644 index 00000000000..c4f2dae481b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_4.c @@ -0,0 +1,10 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define INDEX32 uint32_t +#define INDEX64 uint64_t + +#include "sve_scatter_store_3.c" + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c new file mode 100644 index 00000000000..7b117bc0b2b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_5.c @@ -0,0 +1,23 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +/* Invoked 18 times for each data size. */ +#define TEST_LOOP(DATA_TYPE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \ + int n) \ + { \ + for (int i = 9; i < n; ++i) \ + *dest[i] = src[i] + 1; \ + } + +#define TEST_ALL(T) \ + T (int64_t) \ + T (uint64_t) \ + T (double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c new file mode 100644 index 00000000000..14e68267c9f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_6.c @@ -0,0 +1,36 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fwrapv -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX16 int16_t +#define INDEX32 int32_t +#endif + +/* Invoked 18 times for each data size. */ +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, INDEX##BITS mask, int n) \ + { \ + for (int i = 9; i < n; ++i) \ + dest[(INDEX##BITS) (indices[i] | mask)] = src[i] + 1; \ + } + +#define TEST_ALL(T) \ + T (int32_t, 16) \ + T (uint32_t, 16) \ + T (float, 16) \ + T (int64_t, 32) \ + T (uint64_t, 32) \ + T (double, 32) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c new file mode 100644 index 00000000000..89e2d305c29 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_7.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define INDEX16 uint16_t +#define INDEX32 uint32_t + +#include "sve_scatter_store_6.c" + +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */ +/* Either extension type is OK here. */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c index 460359e4be3..23327a7a152 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \ { \ for (int i = 0; i < n; ++i) \ @@ -23,15 +23,18 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) TEST_ALL (VEC_PERM) -/* We should use one DUP for each of the 8-, 16- and 32-bit types. - We should use two DUPs for each of the three 64-bit types. */ +/* We should use one DUP for each of the 8-, 16- and 32-bit types, + although we currently use LD1RW for _Float16. We should use two + DUPs for each of the three 64-bit types. */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */ /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ @@ -39,17 +42,18 @@ TEST_ALL (VEC_PERM) /* The loop should be fully-masked. */ /* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ /* { dg-final { scan-assembler-not {\tldr} } } */ -/* { dg-final { scan-assembler-not {\tstr} } } */ +/* { dg-final { scan-assembler-times {\tstr} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */ /* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c index 7dd3640966a..0c10d934259 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ { \ for (int i = 0; i < n; ++i) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c index c1aeaf9b06e..08cad65ab63 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c @@ -47,7 +47,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c index 3db5769deed..ce6060a52df 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE1, TYPE2) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \ TYPE2 *restrict b, int n) \ { \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c index c302ef6fb76..aa49952b470 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c @@ -38,7 +38,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c index 9afe7e59ef2..77bf7b72454 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c @@ -6,7 +6,7 @@ #define N1 (19 * 2) #define VEC_PERM(TYPE) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b) \ { \ for (int i = 0; i < N1; ++i) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c index 8c854d4207c..e926de602bd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c @@ -46,7 +46,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c index f3ecbd7adbc..ff3046e127d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c @@ -5,7 +5,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, int n) \ { \ TYPE res = 0; \ @@ -26,6 +26,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) @@ -35,7 +36,7 @@ TEST_ALL (VEC_PERM) /* ??? We don't treat the uint loops as SLP. */ /* The loop should be fully-masked. */ /* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 3 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */ @@ -43,7 +44,7 @@ TEST_ALL (VEC_PERM) /* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ @@ -51,6 +52,7 @@ TEST_ALL (VEC_PERM) /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */ /* { dg-final { scan-assembler-not {\tfadd\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c index 282f1ae2310..2824073cf14 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_slp_13.c" @@ -21,7 +21,7 @@ __builtin_abort (); \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c index 6c1b38277ec..3971acde999 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c @@ -9,7 +9,10 @@ { \ TYPE a[N], b[2] = { 3, 11 }; \ for (unsigned int i = 0; i < N; ++i) \ - a[i] = i * 2 + i % 5; \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ vec_slp_##TYPE (a, b[0], b[1], N / 2); \ for (unsigned int i = 0; i < N; ++i) \ { \ @@ -20,7 +23,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c index 3e71596021f..ba3506ab4e4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, int n) \ { \ for (int i = 0; i < n; ++i) \ @@ -23,13 +23,14 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) TEST_ALL (VEC_PERM) /* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */ -/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */ /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 5 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #10\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */ @@ -39,14 +40,14 @@ TEST_ALL (VEC_PERM) /* The loop should be fully-masked. */ /* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ /* { dg-final { scan-assembler-not {\tldr} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c index 7d4d5e8ca3d..c0411459b94 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c @@ -9,7 +9,10 @@ { \ TYPE a[N], b[2] = { 10, 17 }; \ for (unsigned int i = 0; i < N; ++i) \ - a[i] = i * 2 + i % 5; \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ vec_slp_##TYPE (a, N / 2); \ for (unsigned int i = 0; i < N; ++i) \ { \ @@ -20,7 +23,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c index 3ac0eebf422..326630f421f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, int n) \ { \ for (int i = 0; i < n; ++i) \ @@ -25,6 +25,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) @@ -33,7 +34,7 @@ TEST_ALL (VEC_PERM) /* 1 for each 8-bit type. */ /* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */ /* 1 for each 16-bit type, 2 for each 32-bit type, and 4 for double. */ -/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 12 } } */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 13 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */ @@ -49,14 +50,14 @@ TEST_ALL (VEC_PERM) and stores each. */ /* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 6 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 12 } } */ /* { dg-final { scan-assembler-not {\tldr} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c index 7306355b873..de33f41c2c1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c @@ -9,7 +9,10 @@ { \ TYPE a[N], b[4] = { 41, 25, 31, 62 }; \ for (unsigned int i = 0; i < N; ++i) \ - a[i] = i * 2 + i % 5; \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ vec_slp_##TYPE (a, N / 4); \ for (unsigned int i = 0; i < N; ++i) \ { \ @@ -20,7 +23,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c index b0890fd934b..32c14ebe4bf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, int n) \ { \ for (int i = 0; i < n; ++i) \ @@ -29,6 +29,7 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) @@ -36,7 +37,7 @@ TEST_ALL (VEC_PERM) /* 1 for each 8-bit type, 2 for each 16-bit type, 4 for each 32-bit type and 8 for double. */ -/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 26 } } */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 28 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */ @@ -55,21 +56,21 @@ TEST_ALL (VEC_PERM) ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away) ZIP1 ZIP2 ZIP1 ZIP2 ZIP1 ZIP2 ZIP1 ZIP2. */ -/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 35 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */ /* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */ /* The loop should be fully-masked. The 32-bit types need two loads and stores each and the 64-bit types need four. */ /* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 6 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 6 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 12 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 12 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 12 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 24 } } */ /* { dg-final { scan-assembler-not {\tldr} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c index 2eb2a5ff07e..e0fe656859d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c @@ -9,7 +9,10 @@ { \ TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 }; \ for (unsigned int i = 0; i < N; ++i) \ - a[i] = i * 2 + i % 5; \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ vec_slp_##TYPE (a, N / 8); \ for (unsigned int i = 0; i < N; ++i) \ { \ @@ -20,7 +23,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c index 0f8cf624e20..e0bacb0cad8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ { \ TYPE x0 = b[0]; \ @@ -27,6 +27,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) @@ -37,9 +38,9 @@ TEST_ALL (VEC_PERM) /* ??? At present we don't treat the int8_t and int16_t loops as reductions. */ /* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-not {\tld2b\t} } } */ @@ -52,12 +53,14 @@ TEST_ALL (VEC_PERM) /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */ /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ -/* Should be 4, if we used reductions for int8_t and int16_t. */ +/* Should be 4 and 6 respectively, if we used reductions for int8_t and + int16_t. */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c index 476b40cb0e9..bb5421700da 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c @@ -5,25 +5,30 @@ #define N (141 * 2) -#define HARNESS(TYPE) \ - { \ - TYPE a[N], b[2] = { 40, 22 }; \ - for (unsigned int i = 0; i < N; ++i) \ - a[i] = i * 2 + i % 5; \ - vec_slp_##TYPE (a, b, N / 2); \ - TYPE x0 = 40; \ - TYPE x1 = 22; \ - for (unsigned int i = 0; i < N; i += 2) \ - { \ - x0 += a[i]; \ - x1 += a[i + 1]; \ - asm volatile (""); \ - } \ - if (x0 != b[0] || x1 != b[1]) \ - __builtin_abort (); \ +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[2] = { 40, 22 }; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_slp_##TYPE (a, b, N / 2); \ + TYPE x0 = 40; \ + TYPE x1 = 22; \ + for (unsigned int i = 0; i < N; i += 2) \ + { \ + x0 += a[i]; \ + x1 += a[i + 1]; \ + asm volatile ("" ::: "memory"); \ + } \ + /* _Float16 isn't precise enough for this. */ \ + if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \ + && (x0 != b[0] || x1 != b[1])) \ + __builtin_abort (); \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c index 8cdceb57dc6..b3bdb04e2ab 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ { \ TYPE x0 = b[0]; \ @@ -30,6 +30,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c index a9ca327c907..e2ad116f91d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c @@ -5,27 +5,32 @@ #define N (77 * 3) -#define HARNESS(TYPE) \ - { \ - TYPE a[N], b[3] = { 40, 22, 75 }; \ - for (unsigned int i = 0; i < N; ++i) \ - a[i] = i * 2 + i % 5; \ - vec_slp_##TYPE (a, b, N / 3); \ - TYPE x0 = 40; \ - TYPE x1 = 22; \ - TYPE x2 = 75; \ - for (unsigned int i = 0; i < N; i += 3) \ - { \ - x0 += a[i]; \ - x1 += a[i + 1]; \ - x2 += a[i + 2]; \ - asm volatile (""); \ - } \ - if (x0 != b[0] || x1 != b[1] || x2 != b[2]) \ - __builtin_abort (); \ +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[3] = { 40, 22, 75 }; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_slp_##TYPE (a, b, N / 3); \ + TYPE x0 = 40; \ + TYPE x1 = 22; \ + TYPE x2 = 75; \ + for (unsigned int i = 0; i < N; i += 3) \ + { \ + x0 += a[i]; \ + x1 += a[i + 1]; \ + x2 += a[i + 2]; \ + asm volatile ("" ::: "memory"); \ + } \ + /* _Float16 isn't precise enough for this. */ \ + if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \ + && (x0 != b[0] || x1 != b[1] || x2 != b[2])) \ + __builtin_abort (); \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c index 4dc9fafcdde..372c7575cdb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ { \ TYPE x0 = b[0]; \ @@ -33,6 +33,7 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ T (uint32_t) \ T (int64_t) \ T (uint64_t) \ + T (_Float16) \ T (float) \ T (double) @@ -45,9 +46,9 @@ TEST_ALL (VEC_PERM) /* ??? At present we don't treat the int8_t and int16_t loops as reductions. */ /* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ /* { dg-final { scan-assembler-times {\tld4d\t} 3 } } */ /* { dg-final { scan-assembler-not {\tld4b\t} } } */ @@ -60,12 +61,14 @@ TEST_ALL (VEC_PERM) /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 8 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */ /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */ /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */ -/* Should be 4, if we used reductions for int8_t and int16_t. */ +/* Should be 4 and 6 respectively, if we used reductions for int8_t and + int16_t. */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c index 12446972fde..5a8bf99bc5b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c @@ -5,29 +5,34 @@ #define N (54 * 4) -#define HARNESS(TYPE) \ - { \ - TYPE a[N], b[4] = { 40, 22, 75, 19 }; \ - for (unsigned int i = 0; i < N; ++i) \ - a[i] = i * 2 + i % 5; \ - vec_slp_##TYPE (a, b, N / 4); \ - TYPE x0 = 40; \ - TYPE x1 = 22; \ - TYPE x2 = 75; \ - TYPE x3 = 19; \ - for (unsigned int i = 0; i < N; i += 4) \ - { \ - x0 += a[i]; \ - x1 += a[i + 1]; \ - x2 += a[i + 2]; \ - x3 += a[i + 3]; \ - asm volatile (""); \ - } \ - if (x0 != b[0] || x1 != b[1] || x2 != b[2] || x3 != b[3]) \ - __builtin_abort (); \ +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[4] = { 40, 22, 75, 19 }; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_slp_##TYPE (a, b, N / 4); \ + TYPE x0 = 40; \ + TYPE x1 = 22; \ + TYPE x2 = 75; \ + TYPE x3 = 19; \ + for (unsigned int i = 0; i < N; i += 4) \ + { \ + x0 += a[i]; \ + x1 += a[i + 1]; \ + x2 += a[i + 2]; \ + x3 += a[i + 3]; \ + asm volatile ("" ::: "memory"); \ + } \ + /* _Float16 isn't precise enough for this. */ \ + if ((TYPE) 0x1000 + 1 != (TYPE) 0x1000 \ + && (x0 != b[0] || x1 != b[1] || x2 != b[2] || x3 != b[3])) \ + __builtin_abort (); \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c index caae4528d82..d57457fbef0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ { \ for (int i = 0; i < n; ++i) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c index 2717ca62de1..09a6d648c52 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c @@ -37,7 +37,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c index af06270b6f2..65e1cb8f044 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE1, TYPE2) \ -void __attribute__ ((weak)) \ +void __attribute__ ((noinline, noclone)) \ vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \ TYPE2 *restrict b, int n) \ { \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c index 0bde3b6ea03..3e69a48580b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c @@ -32,7 +32,7 @@ } \ } -int +int __attribute__ ((noinline, noclone)) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c index 25f3047444e..db35711a193 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c @@ -21,6 +21,6 @@ FPTYPE spec_fp_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit)\ SPEC_FP_LOOP (uint32_t, uint32_t, double) -/* { dg-final { scan-tree-dump-times "Not vectorized: Multiple ncopies not supported" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "not vectorized: ncopies is greater than 1" 1 "vect" } } */ /* { dg-final { scan-assembler-not "brka\tp\[0-9\]*.b, p\[0-9\]*\/z, p\[0-9\]*.b" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c index 4765b22f014..1b71687a257 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c @@ -41,4 +41,4 @@ SPEC_LOOP (uint64_t, uint16_t) SPEC_LOOP (uint64_t, uint32_t) /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ -/* { dg-final { scan-tree-dump "Speculative loop mask load/stores not supported" "vect" } } */ +/* { dg-final { scan-tree-dump "speculative mask loads not supported" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c index 3e7367cd9fa..1a48f7b6080 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c @@ -3,50 +3,50 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size(32))); -typedef int32_t v8si __attribute__((vector_size(32))); -typedef int16_t v16hi __attribute__((vector_size(32))); -typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int64_t vnx2di __attribute__((vector_size(32))); +typedef int32_t vnx4si __attribute__((vector_size(32))); +typedef int16_t vnx8hi __attribute__((vector_size(32))); +typedef int8_t vnx16qi __attribute__((vector_size(32))); void sve_store_64_z_lsl (uint64_t *a, unsigned long i) { - asm volatile ("" : "=w" (*(v4di *) &a[i])); + asm volatile ("" : "=w" (*(vnx2di *) &a[i])); } void sve_store_64_s_lsl (int64_t *a, signed long i) { - asm volatile ("" : "=w" (*(v4di *) &a[i])); + asm volatile ("" : "=w" (*(vnx2di *) &a[i])); } void sve_store_32_z_lsl (uint32_t *a, unsigned long i) { - asm volatile ("" : "=w" (*(v8si *) &a[i])); + asm volatile ("" : "=w" (*(vnx4si *) &a[i])); } void sve_store_32_s_lsl (int32_t *a, signed long i) { - asm volatile ("" : "=w" (*(v8si *) &a[i])); + asm volatile ("" : "=w" (*(vnx4si *) &a[i])); } void sve_store_16_z_lsl (uint16_t *a, unsigned long i) { - asm volatile ("" : "=w" (*(v16hi *) &a[i])); + asm volatile ("" : "=w" (*(vnx8hi *) &a[i])); } void sve_store_16_s_lsl (int16_t *a, signed long i) { - asm volatile ("" : "=w" (*(v16hi *) &a[i])); + asm volatile ("" : "=w" (*(vnx8hi *) &a[i])); } /* ??? The other argument order leads to a redundant move. */ void sve_store_8_z (unsigned long i, uint8_t *a) { - asm volatile ("" : "=w" (*(v32qi *) &a[i])); + asm volatile ("" : "=w" (*(vnx16qi *) &a[i])); } void sve_store_8_s (signed long i, int8_t *a) { - asm volatile ("" : "=w" (*(v32qi *) &a[i])); + asm volatile ("" : "=w" (*(vnx16qi *) &a[i])); } /* { dg-final { scan-assembler-times {\tst1d\tz0\.d, p[0-7], \[x0, x1, lsl 3\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c new file mode 100644 index 00000000000..b940ba9d4de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_1.c @@ -0,0 +1,40 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX8 +#define INDEX8 int8_t +#define INDEX16 int16_t +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, \ + INDEX##BITS stride, INDEX##BITS n) \ + { \ + for (INDEX##BITS i = 0; i < n; ++i) \ + dest[i] += src[i * stride]; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 8) \ + T (DATA_TYPE, 16) \ + T (DATA_TYPE, 32) \ + T (DATA_TYPE, 64) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c new file mode 100644 index 00000000000..a834989091d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_2.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define INDEX8 uint8_t +#define INDEX16 uint16_t +#define INDEX32 uint32_t +#define INDEX64 uint64_t + +#include "sve_strided_load_1.c" + +/* 8 and 16 bits are signed because the multiplication promotes to int. + Using uxtw for all 9 would be OK. */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */ +/* The 32-bit loop needs to honor the defined overflow in uint32_t, + so we vectorize the offset calculation. This means that the + 64-bit version needs two copies. */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c new file mode 100644 index 00000000000..8f0bfdd4bb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_3.c @@ -0,0 +1,32 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, OTHER_TYPE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, \ + OTHER_TYPE *restrict other, \ + OTHER_TYPE mask, \ + int stride, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i] = src[i * stride] + (OTHER_TYPE) (other[i] | mask); \ + } + +#define TEST_ALL(T) \ + T (int32_t, int16_t) \ + T (uint32_t, int16_t) \ + T (float, int16_t) \ + T (int64_t, int32_t) \ + T (uint64_t, int32_t) \ + T (double, int32_t) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c new file mode 100644 index 00000000000..b7dc12fb3c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_4.c @@ -0,0 +1,33 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i] += src[i * SCALE]; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 5, 5) \ + T (DATA_TYPE, 7, 7) \ + T (DATA_TYPE, 11, 11) \ + T (DATA_TYPE, 200, 200) \ + T (DATA_TYPE, m100, -100) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 15 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c new file mode 100644 index 00000000000..6cbcc963595 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_5.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, long n) \ + { \ + for (long i = 0; i < n; ++i) \ + dest[i] += src[i * SCALE]; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 5, 5) \ + T (DATA_TYPE, 7, 7) \ + T (DATA_TYPE, 11, 11) \ + T (DATA_TYPE, 200, 200) \ + T (DATA_TYPE, m100, -100) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c new file mode 100644 index 00000000000..aaf743b3d82 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_6.c @@ -0,0 +1,7 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable --save-temps" } */ + +#include "sve_strided_load_5.c" + +/* { dg-final { scan-assembler-not {\[x[0-9]+, z[0-9]+\.s} } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c new file mode 100644 index 00000000000..ddf6667e8c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_7.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src) \ + { \ + for (long i = 0; i < 1000; ++i) \ + dest[i] += src[i * SCALE]; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 5, 5) \ + T (DATA_TYPE, 7, 7) \ + T (DATA_TYPE, 11, 11) \ + T (DATA_TYPE, 200, 200) \ + T (DATA_TYPE, m100, -100) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c new file mode 100644 index 00000000000..788aeb08df2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_load_8.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +void +foo (double *x, int m) +{ + for (int i = 0; i < 256; ++i) + x[i * m] += x[i * m]; +} + +/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c new file mode 100644 index 00000000000..4f84b3fdec5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_1.c @@ -0,0 +1,40 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX8 +#define INDEX8 int8_t +#define INDEX16 int16_t +#define INDEX32 int32_t +#define INDEX64 int64_t +#endif + +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, \ + INDEX##BITS stride, INDEX##BITS n) \ + { \ + for (INDEX##BITS i = 0; i < n; ++i) \ + dest[i * stride] = src[i] + 1; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 8) \ + T (DATA_TYPE, 16) \ + T (DATA_TYPE, 32) \ + T (DATA_TYPE, 64) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 12 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c new file mode 100644 index 00000000000..1a8df604ead --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_2.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define INDEX8 uint8_t +#define INDEX16 uint16_t +#define INDEX32 uint32_t +#define INDEX64 uint64_t + +#include "sve_strided_store_1.c" + +/* 8 and 16 bits are signed because the multiplication promotes to int. + Using uxtw for all 9 would be OK. */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */ +/* The 32-bit loop needs to honor the defined overflow in uint32_t, + so we vectorize the offset calculation. This means that the + 64-bit version needs two copies. */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c new file mode 100644 index 00000000000..19454565f97 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_3.c @@ -0,0 +1,33 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, OTHER_TYPE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##BITS (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, \ + OTHER_TYPE *restrict other, \ + OTHER_TYPE mask, \ + int stride, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i * stride] = src[i] + (OTHER_TYPE) (other[i] | mask); \ + } + +#define TEST_ALL(T) \ + T (int32_t, int16_t) \ + T (uint32_t, int16_t) \ + T (float, int16_t) \ + T (int64_t, int32_t) \ + T (uint64_t, int32_t) \ + T (double, int32_t) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */ + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c new file mode 100644 index 00000000000..23f1329c69b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_4.c @@ -0,0 +1,33 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i * SCALE] = src[i] + 1; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 5, 5) \ + T (DATA_TYPE, 7, 7) \ + T (DATA_TYPE, 11, 11) \ + T (DATA_TYPE, 200, 200) \ + T (DATA_TYPE, m100, -100) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 15 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c new file mode 100644 index 00000000000..68f2a539c27 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_5.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src, long n) \ + { \ + for (long i = 0; i < n; ++i) \ + dest[i * SCALE] = src[i] + 1; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 5, 5) \ + T (DATA_TYPE, 7, 7) \ + T (DATA_TYPE, 11, 11) \ + T (DATA_TYPE, 200, 200) \ + T (DATA_TYPE, m100, -100) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c new file mode 100644 index 00000000000..da124b7348b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_6.c @@ -0,0 +1,7 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable --save-temps" } */ + +#include "sve_strided_store_5.c" + +/* { dg-final { scan-assembler-not {\[x[0-9]+, z[0-9]+\.s} } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c new file mode 100644 index 00000000000..a76ac359f01 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_strided_store_7.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define TEST_LOOP(DATA_TYPE, NAME, SCALE) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE##_##NAME (DATA_TYPE *restrict dest, \ + DATA_TYPE *restrict src) \ + { \ + for (long i = 0; i < 1000; ++i) \ + dest[i * SCALE] = src[i] + 1; \ + } + +#define TEST_TYPE(T, DATA_TYPE) \ + T (DATA_TYPE, 5, 5) \ + T (DATA_TYPE, 7, 7) \ + T (DATA_TYPE, 11, 11) \ + T (DATA_TYPE, 200, 200) \ + T (DATA_TYPE, m100, -100) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, uint32_t) \ + TEST_TYPE (T, float) \ + TEST_TYPE (T, int64_t) \ + TEST_TYPE (T, uint64_t) \ + TEST_TYPE (T, double) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c index bb23f9886c6..e9ac4790c7b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c @@ -1,32 +1,35 @@ -/* { dg-do compile } */ -/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */ -typedef char v32qi __attribute__((vector_size(32))); -typedef struct { v32qi a[2]; } v64qi; +typedef char vnx16qi __attribute__((vector_size(32))); +typedef struct { vnx16qi a[2]; } vnx32qi; -typedef short v16hi __attribute__((vector_size(32))); -typedef struct { v16hi a[2]; } v32hi; +typedef short vnx8hi __attribute__((vector_size(32))); +typedef struct { vnx8hi a[2]; } vnx16hi; -typedef int v8si __attribute__((vector_size(32))); -typedef struct { v8si a[2]; } v16si; +typedef int vnx4si __attribute__((vector_size(32))); +typedef struct { vnx4si a[2]; } vnx8si; -typedef long v4di __attribute__((vector_size(32))); -typedef struct { v4di a[2]; } v8di; +typedef long vnx2di __attribute__((vector_size(32))); +typedef struct { vnx2di a[2]; } vnx4di; -typedef float v8sf __attribute__((vector_size(32))); -typedef struct { v8sf a[2]; } v16sf; +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef struct { vnx8hf a[2]; } vnx16hf; -typedef double v4df __attribute__((vector_size(32))); -typedef struct { v4df a[2]; } v8df; +typedef float vnx4sf __attribute__((vector_size(32))); +typedef struct { vnx4sf a[2]; } vnx8sf; + +typedef double vnx2df __attribute__((vector_size(32))); +typedef struct { vnx2df a[2]; } vnx4df; #define TEST_TYPE(TYPE, REG1, REG2) \ void \ f1_##TYPE (TYPE *a) \ { \ register TYPE x asm (#REG1) = a[0]; \ - asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \ + asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \ register TYPE y asm (#REG2) = x; \ - asm volatile ("# test " #TYPE " 2 %0, %1, %2" \ + asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \ : "=&w" (x) : "0" (x), "w" (y)); \ a[1] = x; \ } \ @@ -54,63 +57,73 @@ typedef struct { v4df a[2]; } v8df; asm volatile ("# %0" :: "w" (x)); \ } -TEST_TYPE (v64qi, z0, z2) -TEST_TYPE (v32hi, z5, z7) -TEST_TYPE (v16si, z10, z12) -TEST_TYPE (v8di, z15, z17) -TEST_TYPE (v16sf, z20, z23) -TEST_TYPE (v8df, z28, z30) +TEST_TYPE (vnx32qi, z0, z2) +TEST_TYPE (vnx16hi, z5, z7) +TEST_TYPE (vnx8si, z10, z12) +TEST_TYPE (vnx4di, z15, z17) +TEST_TYPE (vnx16hf, z18, z20) +TEST_TYPE (vnx8sf, z21, z23) +TEST_TYPE (vnx4df, z28, z30) /* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v64qi 1 z0\n} } } */ +/* { dg-final { scan-assembler { test vnx32qi 1 z0\n} } } */ /* { dg-final { scan-assembler {\tmov\tz2.d, z0.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz3.d, z1.d\n} } } */ -/* { dg-final { scan-assembler { test v64qi 2 z0, z0, z2\n} } } */ +/* { dg-final { scan-assembler { test vnx32qi 2 z0, z0, z2\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1h\tz5.h, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v32hi 1 z5\n} } } */ +/* { dg-final { scan-assembler { test vnx16hi 1 z5\n} } } */ /* { dg-final { scan-assembler {\tmov\tz7.d, z5.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz8.d, z6.d\n} } } */ -/* { dg-final { scan-assembler { test v32hi 2 z5, z5, z7\n} } } */ +/* { dg-final { scan-assembler { test vnx16hi 2 z5, z5, z7\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz5.h, p[0-7], \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz10.s, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz11.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v16si 1 z10\n} } } */ +/* { dg-final { scan-assembler { test vnx8si 1 z10\n} } } */ /* { dg-final { scan-assembler {\tmov\tz12.d, z10.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz13.d, z11.d\n} } } */ -/* { dg-final { scan-assembler { test v16si 2 z10, z10, z12\n} } } */ +/* { dg-final { scan-assembler { test vnx8si 2 z10, z10, z12\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz10.s, p[0-7], \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz11.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz15.d, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v8di 1 z15\n} } } */ +/* { dg-final { scan-assembler { test vnx4di 1 z15\n} } } */ /* { dg-final { scan-assembler {\tmov\tz17.d, z15.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz18.d, z16.d\n} } } */ -/* { dg-final { scan-assembler { test v8di 2 z15, z15, z17\n} } } */ +/* { dg-final { scan-assembler { test vnx4di 2 z15, z15, z17\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz15.d, p[0-7], \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ -/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */ -/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v16sf 1 z20\n} } } */ -/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */ -/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */ -/* { dg-final { scan-assembler { test v16sf 2 z20, z20, z23\n} } } */ -/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #2, mul vl\]\n} } } */ -/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx16hf 1 z18\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz20.d, z18.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz21.d, z19.d\n} } } */ +/* { dg-final { scan-assembler { test vnx16hf 2 z18, z18, z20\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx8sf 1 z21\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz23.d, z21.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz24.d, z22.d\n} } } */ +/* { dg-final { scan-assembler { test vnx8sf 2 z21, z21, z23\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz29.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v8df 1 z28\n} } } */ +/* { dg-final { scan-assembler { test vnx4df 1 z28\n} } } */ /* { dg-final { scan-assembler {\tmov\tz30.d, z28.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz31.d, z29.d\n} } } */ -/* { dg-final { scan-assembler { test v8df 2 z28, z28, z30\n} } } */ +/* { dg-final { scan-assembler { test vnx4df 2 z28, z28, z30\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz29.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c index d36aa75483a..faf503c35e1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c @@ -1,51 +1,55 @@ -/* { dg-do compile } */ -/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */ -typedef char v32qi __attribute__((vector_size(32))); -typedef struct { v32qi a[3]; } v96qi; +typedef char vnx16qi __attribute__((vector_size(32))); +typedef struct { vnx16qi a[3]; } vnx48qi; -typedef short v16hi __attribute__((vector_size(32))); -typedef struct { v16hi a[3]; } v48hi; +typedef short vnx8hi __attribute__((vector_size(32))); +typedef struct { vnx8hi a[3]; } vnx24hi; -typedef int v8si __attribute__((vector_size(32))); -typedef struct { v8si a[3]; } v24si; +typedef int vnx4si __attribute__((vector_size(32))); +typedef struct { vnx4si a[3]; } vnx12si; -typedef long v4di __attribute__((vector_size(32))); -typedef struct { v4di a[3]; } v12di; +typedef long vnx2di __attribute__((vector_size(32))); +typedef struct { vnx2di a[3]; } vnx6di; -typedef float v8sf __attribute__((vector_size(32))); -typedef struct { v8sf a[3]; } v24sf; +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef struct { vnx8hf a[3]; } vnx24hf; -typedef double v4df __attribute__((vector_size(32))); -typedef struct { v4df a[3]; } v12df; +typedef float vnx4sf __attribute__((vector_size(32))); +typedef struct { vnx4sf a[3]; } vnx12sf; + +typedef double vnx2df __attribute__((vector_size(32))); +typedef struct { vnx2df a[3]; } vnx6df; #define TEST_TYPE(TYPE, REG1, REG2) \ void \ f_##TYPE (TYPE *a) \ { \ register TYPE x asm (#REG1) = a[0]; \ - asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \ + asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \ register TYPE y asm (#REG2) = x; \ - asm volatile ("# test " #TYPE " 2 %0, %1, %2" \ + asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \ : "=&w" (x) : "0" (x), "w" (y)); \ a[1] = x; \ } -TEST_TYPE (v96qi, z0, z3) -TEST_TYPE (v48hi, z6, z2) -TEST_TYPE (v24si, z12, z15) -TEST_TYPE (v12di, z16, z13) -TEST_TYPE (v24sf, z20, z23) -TEST_TYPE (v12df, z26, z29) +TEST_TYPE (vnx48qi, z0, z3) +TEST_TYPE (vnx24hi, z6, z2) +TEST_TYPE (vnx12si, z12, z15) +TEST_TYPE (vnx6di, z16, z13) +TEST_TYPE (vnx24hf, z18, z1) +TEST_TYPE (vnx12sf, z20, z23) +TEST_TYPE (vnx6df, z26, z29) /* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v96qi 1 z0\n} } } */ +/* { dg-final { scan-assembler { test vnx48qi 1 z0\n} } } */ /* { dg-final { scan-assembler {\tmov\tz3.d, z0.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz4.d, z1.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz5.d, z2.d\n} } } */ -/* { dg-final { scan-assembler { test v96qi 2 z0, z0, z3\n} } } */ +/* { dg-final { scan-assembler { test vnx48qi 2 z0, z0, z3\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #5, mul vl\]\n} } } */ @@ -53,11 +57,11 @@ TEST_TYPE (v12df, z26, z29) /* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v48hi 1 z6\n} } } */ +/* { dg-final { scan-assembler { test vnx24hi 1 z6\n} } } */ /* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */ -/* { dg-final { scan-assembler { test v48hi 2 z6, z6, z2\n} } } */ +/* { dg-final { scan-assembler { test vnx24hi 2 z6, z6, z2\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #5, mul vl\]\n} } } */ @@ -65,11 +69,11 @@ TEST_TYPE (v12df, z26, z29) /* { dg-final { scan-assembler {\tld1w\tz12.s, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v24si 1 z12\n} } } */ +/* { dg-final { scan-assembler { test vnx12si 1 z12\n} } } */ /* { dg-final { scan-assembler {\tmov\tz15.d, z12.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz16.d, z13.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz17.d, z14.d\n} } } */ -/* { dg-final { scan-assembler { test v24si 2 z12, z12, z15\n} } } */ +/* { dg-final { scan-assembler { test vnx12si 2 z12, z12, z15\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ @@ -77,23 +81,35 @@ TEST_TYPE (v12df, z26, z29) /* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz17.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v12di 1 z16\n} } } */ +/* { dg-final { scan-assembler { test vnx6di 1 z16\n} } } */ /* { dg-final { scan-assembler {\tmov\tz13.d, z16.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz14.d, z17.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz15.d, z18.d\n} } } */ -/* { dg-final { scan-assembler { test v12di 2 z16, z16, z13\n} } } */ +/* { dg-final { scan-assembler { test vnx6di 2 z16, z16, z13\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz20.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx24hf 1 z18\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz1.d, z18.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z19.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z20.d\n} } } */ +/* { dg-final { scan-assembler { test vnx24hf 2 z18, z18, z1\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz20.h, p[0-7], \[x0, #5, mul vl\]\n} } } */ + /* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v24sf 1 z20\n} } } */ +/* { dg-final { scan-assembler { test vnx12sf 1 z20\n} } } */ /* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz25.d, z22.d\n} } } */ -/* { dg-final { scan-assembler { test v24sf 2 z20, z20, z23\n} } } */ +/* { dg-final { scan-assembler { test vnx12sf 2 z20, z20, z23\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ @@ -101,11 +117,11 @@ TEST_TYPE (v12df, z26, z29) /* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v12df 1 z26\n} } } */ +/* { dg-final { scan-assembler { test vnx6df 1 z26\n} } } */ /* { dg-final { scan-assembler {\tmov\tz29.d, z26.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz30.d, z27.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz31.d, z28.d\n} } } */ -/* { dg-final { scan-assembler { test v12df 2 z26, z26, z29\n} } } */ +/* { dg-final { scan-assembler { test vnx6df 2 z26, z26, z29\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz27.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c index d97d6973359..101a33701a5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c @@ -1,53 +1,57 @@ -/* { dg-do compile } */ -/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mbig-endian --save-temps" } */ -typedef char v32qi __attribute__((vector_size(32))); -typedef struct { v32qi a[4]; } v128qi; +typedef char vnx16qi __attribute__((vector_size(32))); +typedef struct { vnx16qi a[4]; } vnx64qi; -typedef short v16hi __attribute__((vector_size(32))); -typedef struct { v16hi a[4]; } v64hi; +typedef short vnx8hi __attribute__((vector_size(32))); +typedef struct { vnx8hi a[4]; } vnx32hi; -typedef int v8si __attribute__((vector_size(32))); -typedef struct { v8si a[4]; } v32si; +typedef int vnx4si __attribute__((vector_size(32))); +typedef struct { vnx4si a[4]; } vnx16si; -typedef long v4di __attribute__((vector_size(32))); -typedef struct { v4di a[4]; } v16di; +typedef long vnx2di __attribute__((vector_size(32))); +typedef struct { vnx2di a[4]; } vnx8di; -typedef float v8sf __attribute__((vector_size(32))); -typedef struct { v8sf a[4]; } v32sf; +typedef _Float16 vnx8hf __attribute__((vector_size(32))); +typedef struct { vnx8hf a[4]; } vnx32hf; -typedef double v4df __attribute__((vector_size(32))); -typedef struct { v4df a[4]; } v16df; +typedef float vnx4sf __attribute__((vector_size(32))); +typedef struct { vnx4sf a[4]; } vnx16sf; + +typedef double vnx2df __attribute__((vector_size(32))); +typedef struct { vnx2df a[4]; } vnx8df; #define TEST_TYPE(TYPE, REG1, REG2) \ void \ f_##TYPE (TYPE *a) \ { \ register TYPE x asm (#REG1) = a[0]; \ - asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \ + asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \ register TYPE y asm (#REG2) = x; \ - asm volatile ("# test " #TYPE " 2 %0, %1, %2" \ + asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \ : "=&w" (x) : "0" (x), "w" (y)); \ a[1] = x; \ } -TEST_TYPE (v128qi, z0, z4) -TEST_TYPE (v64hi, z6, z2) -TEST_TYPE (v32si, z12, z16) -TEST_TYPE (v16di, z17, z13) -TEST_TYPE (v32sf, z20, z16) -TEST_TYPE (v16df, z24, z28) +TEST_TYPE (vnx64qi, z0, z4) +TEST_TYPE (vnx32hi, z6, z2) +TEST_TYPE (vnx16si, z12, z16) +TEST_TYPE (vnx8di, z17, z13) +TEST_TYPE (vnx32hf, z18, z1) +TEST_TYPE (vnx16sf, z20, z16) +TEST_TYPE (vnx8df, z24, z28) /* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1b\tz3.b, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v128qi 1 z0\n} } } */ +/* { dg-final { scan-assembler { test vnx64qi 1 z0\n} } } */ /* { dg-final { scan-assembler {\tmov\tz4.d, z0.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz5.d, z1.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz6.d, z2.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz7.d, z3.d\n} } } */ -/* { dg-final { scan-assembler { test v128qi 2 z0, z0, z4\n} } } */ +/* { dg-final { scan-assembler { test vnx64qi 2 z0, z0, z4\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #5, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #6, mul vl\]\n} } } */ @@ -57,12 +61,12 @@ TEST_TYPE (v16df, z24, z28) /* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1h\tz9.h, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v64hi 1 z6\n} } } */ +/* { dg-final { scan-assembler { test vnx32hi 1 z6\n} } } */ /* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz5.d, z9.d\n} } } */ -/* { dg-final { scan-assembler { test v64hi 2 z6, z6, z2\n} } } */ +/* { dg-final { scan-assembler { test vnx32hi 2 z6, z6, z2\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #5, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #6, mul vl\]\n} } } */ @@ -72,12 +76,12 @@ TEST_TYPE (v16df, z24, z28) /* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz15.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v32si 1 z12\n} } } */ +/* { dg-final { scan-assembler { test vnx16si 1 z12\n} } } */ /* { dg-final { scan-assembler {\tmov\tz16.d, z12.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz17.d, z13.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz18.d, z14.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz19.d, z15.d\n} } } */ -/* { dg-final { scan-assembler { test v32si 2 z12, z12, z16\n} } } */ +/* { dg-final { scan-assembler { test vnx16si 2 z12, z12, z16\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #6, mul vl\]\n} } } */ @@ -87,27 +91,42 @@ TEST_TYPE (v16df, z24, z28) /* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz19.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz20.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v16di 1 z17\n} } } */ +/* { dg-final { scan-assembler { test vnx8di 1 z17\n} } } */ /* { dg-final { scan-assembler {\tmov\tz13.d, z17.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz14.d, z18.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz15.d, z19.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */ -/* { dg-final { scan-assembler { test v16di 2 z17, z17, z13\n} } } */ +/* { dg-final { scan-assembler { test vnx8di 2 z17, z17, z13\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz19.d, p[0-7], \[x0, #6, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz20.d, p[0-7], \[x0, #7, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz18.h, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz19.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz20.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz21.h, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx32hf 1 z18\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz1.d, z18.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z19.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z20.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z21.d\n} } } */ +/* { dg-final { scan-assembler { test vnx32hf 2 z18, z18, z1\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz18.h, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz19.h, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz20.h, p[0-7], \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz21.h, p[0-7], \[x0, #7, mul vl\]\n} } } */ + /* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1w\tz23.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v32sf 1 z20\n} } } */ +/* { dg-final { scan-assembler { test vnx16sf 1 z20\n} } } */ /* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz17.d, z21.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz18.d, z22.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz19.d, z23.d\n} } } */ -/* { dg-final { scan-assembler { test v32sf 2 z20, z20, z16\n} } } */ +/* { dg-final { scan-assembler { test vnx16sf 2 z20, z20, z16\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #6, mul vl\]\n} } } */ @@ -117,12 +136,12 @@ TEST_TYPE (v16df, z24, z28) /* { dg-final { scan-assembler {\tld1d\tz25.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ -/* { dg-final { scan-assembler { test v16df 1 z24\n} } } */ +/* { dg-final { scan-assembler { test vnx8df 1 z24\n} } } */ /* { dg-final { scan-assembler {\tmov\tz28.d, z24.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz29.d, z25.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz30.d, z26.d\n} } } */ /* { dg-final { scan-assembler {\tmov\tz31.d, z27.d\n} } } */ -/* { dg-final { scan-assembler { test v16df 2 z24, z24, z28\n} } } */ +/* { dg-final { scan-assembler { test vnx8df 2 z24, z24, z28\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz24.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz25.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ /* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #6, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c new file mode 100644 index 00000000000..40ec0481e84 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_4.c @@ -0,0 +1,116 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */ + +typedef char vnx16qi __attribute__((vector_size(32))); +typedef struct { vnx16qi a[2]; } vnx32qi; + +typedef short vnx8hi __attribute__((vector_size(32))); +typedef struct { vnx8hi a[2]; } vnx16hi; + +typedef int vnx4si __attribute__((vector_size(32))); +typedef struct { vnx4si a[2]; } vnx8si; + +typedef long vnx2di __attribute__((vector_size(32))); +typedef struct { vnx2di a[2]; } vnx4di; + +typedef float vnx4sf __attribute__((vector_size(32))); +typedef struct { vnx4sf a[2]; } vnx8sf; + +typedef double vnx2df __attribute__((vector_size(32))); +typedef struct { vnx2df a[2]; } vnx4df; + +#define TEST_TYPE(TYPE, REG1, REG2) \ + void \ + f1_##TYPE (TYPE *a) \ + { \ + register TYPE x asm (#REG1) = a[0]; \ + asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \ + register TYPE y asm (#REG2) = x; \ + asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \ + : "=&w" (x) : "0" (x), "w" (y)); \ + a[1] = x; \ + } \ + /* This must compile, but we don't care how. */ \ + void \ + f2_##TYPE (TYPE *a) \ + { \ + TYPE x = a[0]; \ + x.a[0][3] = 1; \ + x.a[1][2] = 12; \ + asm volatile ("# %0" :: "w" (x)); \ + } \ + void \ + f3_##TYPE (TYPE *a, int i) \ + { \ + TYPE x = a[0]; \ + x.a[0][i] = 1; \ + asm volatile ("# %0" :: "w" (x)); \ + } \ + void \ + f4_##TYPE (TYPE *a, int i, int j) \ + { \ + TYPE x = a[0]; \ + x.a[i][j] = 44; \ + asm volatile ("# %0" :: "w" (x)); \ + } + +TEST_TYPE (vnx32qi, z0, z2) +TEST_TYPE (vnx16hi, z5, z7) +TEST_TYPE (vnx8si, z10, z12) +TEST_TYPE (vnx4di, z15, z17) +TEST_TYPE (vnx8sf, z20, z23) +TEST_TYPE (vnx4df, z28, z30) + +/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx32qi 1 z0\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z0.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z1.d\n} } } */ +/* { dg-final { scan-assembler { test vnx32qi 2 z0, z0, z2\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz5, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz6, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx16hi 1 z5\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz7.d, z5.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz8.d, z6.d\n} } } */ +/* { dg-final { scan-assembler { test vnx16hi 2 z5, z5, z7\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz5, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz10, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz11, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx8si 1 z10\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz12.d, z10.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz13.d, z11.d\n} } } */ +/* { dg-final { scan-assembler { test vnx8si 2 z10, z10, z12\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz10, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz11, \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz15, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz16, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx4di 1 z15\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z15.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz18.d, z16.d\n} } } */ +/* { dg-final { scan-assembler { test vnx4di 2 z15, z15, z17\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz15, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz16, \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx8sf 1 z20\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */ +/* { dg-final { scan-assembler { test vnx8sf 2 z20, z20, z23\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz28, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz29, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx4df 1 z28\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz30.d, z28.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz31.d, z29.d\n} } } */ +/* { dg-final { scan-assembler { test vnx4df 2 z28, z28, z30\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz28, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz29, \[x0, #3, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c new file mode 100644 index 00000000000..ee04c3e0f23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_5.c @@ -0,0 +1,111 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */ + +typedef char vnx16qi __attribute__((vector_size(32))); +typedef struct { vnx16qi a[3]; } vnx48qi; + +typedef short vnx8hi __attribute__((vector_size(32))); +typedef struct { vnx8hi a[3]; } vnx24hi; + +typedef int vnx4si __attribute__((vector_size(32))); +typedef struct { vnx4si a[3]; } vnx12si; + +typedef long vnx2di __attribute__((vector_size(32))); +typedef struct { vnx2di a[3]; } vnx6di; + +typedef float vnx4sf __attribute__((vector_size(32))); +typedef struct { vnx4sf a[3]; } vnx12sf; + +typedef double vnx2df __attribute__((vector_size(32))); +typedef struct { vnx2df a[3]; } vnx6df; + +#define TEST_TYPE(TYPE, REG1, REG2) \ + void \ + f_##TYPE (TYPE *a) \ + { \ + register TYPE x asm (#REG1) = a[0]; \ + asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \ + register TYPE y asm (#REG2) = x; \ + asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \ + : "=&w" (x) : "0" (x), "w" (y)); \ + a[1] = x; \ + } + +TEST_TYPE (vnx48qi, z0, z3) +TEST_TYPE (vnx24hi, z6, z2) +TEST_TYPE (vnx12si, z12, z15) +TEST_TYPE (vnx6di, z16, z13) +TEST_TYPE (vnx12sf, z20, z23) +TEST_TYPE (vnx6df, z26, z29) + +/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz2, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx48qi 1 z0\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z0.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z1.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz5.d, z2.d\n} } } */ +/* { dg-final { scan-assembler { test vnx48qi 2 z0, z0, z3\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz2, \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz6, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz7, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz8, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx24hi 1 z6\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */ +/* { dg-final { scan-assembler { test vnx24hi 2 z6, z6, z2\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz7, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz8, \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz12, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz13, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz14, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx12si 1 z12\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz15.d, z12.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z13.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z14.d\n} } } */ +/* { dg-final { scan-assembler { test vnx12si 2 z12, z12, z15\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz12, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz13, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz14, \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz16, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz17, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz18, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx6di 1 z16\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz13.d, z16.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz14.d, z17.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz15.d, z18.d\n} } } */ +/* { dg-final { scan-assembler { test vnx6di 2 z16, z16, z13\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz16, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz17, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz18, \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz22, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx12sf 1 z20\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz25.d, z22.d\n} } } */ +/* { dg-final { scan-assembler { test vnx12sf 2 z20, z20, z23\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz22, \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz26, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz27, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz28, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx6df 1 z26\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz29.d, z26.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz30.d, z27.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz31.d, z28.d\n} } } */ +/* { dg-final { scan-assembler { test vnx6df 2 z26, z26, z29\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz26, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz27, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz28, \[x0, #5, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c new file mode 100644 index 00000000000..8bfd9f6d1af --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_6.c @@ -0,0 +1,129 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 -mlittle-endian --save-temps" } */ + +typedef char vnx16qi __attribute__((vector_size(32))); +typedef struct { vnx16qi a[4]; } vnx64qi; + +typedef short vnx8hi __attribute__((vector_size(32))); +typedef struct { vnx8hi a[4]; } vnx32hi; + +typedef int vnx4si __attribute__((vector_size(32))); +typedef struct { vnx4si a[4]; } vnx16si; + +typedef long vnx2di __attribute__((vector_size(32))); +typedef struct { vnx2di a[4]; } vnx8di; + +typedef float vnx4sf __attribute__((vector_size(32))); +typedef struct { vnx4sf a[4]; } vnx16sf; + +typedef double vnx2df __attribute__((vector_size(32))); +typedef struct { vnx2df a[4]; } vnx8df; + +#define TEST_TYPE(TYPE, REG1, REG2) \ + void \ + f_##TYPE (TYPE *a) \ + { \ + register TYPE x asm (#REG1) = a[0]; \ + asm volatile ("# test " #TYPE " 1 %S0" :: "w" (x)); \ + register TYPE y asm (#REG2) = x; \ + asm volatile ("# test " #TYPE " 2 %S0, %S1, %S2" \ + : "=&w" (x) : "0" (x), "w" (y)); \ + a[1] = x; \ + } + +TEST_TYPE (vnx64qi, z0, z4) +TEST_TYPE (vnx32hi, z6, z2) +TEST_TYPE (vnx16si, z12, z16) +TEST_TYPE (vnx8di, z17, z13) +TEST_TYPE (vnx16sf, z20, z16) +TEST_TYPE (vnx8df, z24, z28) + +/* { dg-final { scan-assembler {\tldr\tz0, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz1, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz2, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz3, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx64qi 1 z0\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z0.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz5.d, z1.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz6.d, z2.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz7.d, z3.d\n} } } */ +/* { dg-final { scan-assembler { test vnx64qi 2 z0, z0, z4\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz0, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz1, \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz2, \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz3, \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz6, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz7, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz8, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz9, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx32hi 1 z6\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz5.d, z9.d\n} } } */ +/* { dg-final { scan-assembler { test vnx32hi 2 z6, z6, z2\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz6, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz7, \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz8, \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz9, \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz12, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz13, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz14, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz15, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx16si 1 z12\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z12.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z13.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz18.d, z14.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz19.d, z15.d\n} } } */ +/* { dg-final { scan-assembler { test vnx16si 2 z12, z12, z16\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz12, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz13, \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz14, \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz15, \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz17, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz18, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz19, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz20, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx8di 1 z17\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz13.d, z17.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz14.d, z18.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz15.d, z19.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */ +/* { dg-final { scan-assembler { test vnx8di 2 z17, z17, z13\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz17, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz18, \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz19, \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz20, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz21, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz22, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz23, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx16sf 1 z20\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z21.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz18.d, z22.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz19.d, z23.d\n} } } */ +/* { dg-final { scan-assembler { test vnx16sf 2 z20, z20, z16\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz20, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz21, \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz22, \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz23, \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tldr\tz24, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz25, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz26, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tldr\tz27, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test vnx8df 1 z24\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz28.d, z24.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz29.d, z25.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz30.d, z26.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz31.d, z27.d\n} } } */ +/* { dg-final { scan-assembler { test vnx8df 2 z24, z24, z28\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz24, \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz25, \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz26, \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tstr\tz27, \[x0, #7, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c index 6d7b5fecbce..3405bd76eb1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #ifndef TYPE #define TYPE unsigned char diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c index 7ae718ada2c..dff9e963e06 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned long #define ITYPE long diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c index 5ab3ff68bda..611cbbda078 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned long #define ITYPE long diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c index 6771938131b..80e69463e18 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c @@ -1,13 +1,13 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TYPE float -#define ITYPE int +#define TYPE _Float16 +#define ITYPE short #include "sve_struct_vect_7.c" -/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c index f9c129801fc..bfab53d9b6b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c @@ -1,6 +1,6 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TYPE float -#define ITYPE int +#define TYPE _Float16 +#define ITYPE short #include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c index 37c11b3b29a..47279e0a80e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c @@ -1,13 +1,13 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TYPE double -#define ITYPE long +#define TYPE float +#define ITYPE int #include "sve_struct_vect_7.c" -/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c index c7ed3fe2806..74007a938b7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c @@ -1,6 +1,6 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define TYPE double -#define ITYPE long +#define TYPE float +#define ITYPE int #include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c index 3e3b9d733e4..5ebf5d8ee38 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c @@ -1,66 +1,13 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ - -#define TYPE unsigned char -#define NAME(X) qi_##X -#include "sve_struct_vect_1.c" -#undef NAME -#undef TYPE - -#define TYPE unsigned short -#define NAME(X) hi_##X -#include "sve_struct_vect_1.c" -#undef NAME -#undef TYPE - -#define TYPE unsigned int -#define NAME(X) si_##X -#include "sve_struct_vect_1.c" -#undef NAME -#undef TYPE - -#define TYPE unsigned long -#define NAME(X) di_##X -#include "sve_struct_vect_1.c" -#undef NAME -#undef TYPE - -#define TYPE float -#define NAME(X) sf_##X -#include "sve_struct_vect_1.c" -#undef NAME -#undef TYPE +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE double -#define NAME(X) df_##X -#include "sve_struct_vect_1.c" -#undef NAME -#undef TYPE - -/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ - -/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ - -/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ - -/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +#define ITYPE long +#include "sve_struct_vect_7.c" + +/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c new file mode 100644 index 00000000000..6fb5329913b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13_run.c @@ -0,0 +1,6 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define TYPE double +#define ITYPE long +#include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c index c3e81f500e0..46126e841dc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c @@ -1,7 +1,47 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=512" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -#include "sve_struct_vect_13.c" +#define TYPE unsigned char +#define NAME(X) qi_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE unsigned short +#define NAME(X) hi_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE unsigned int +#define NAME(X) si_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE unsigned long +#define NAME(X) di_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE _Float16 +#define NAME(X) hf_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE float +#define NAME(X) sf_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE double +#define NAME(X) df_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE /* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ @@ -10,12 +50,12 @@ /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c index 635910e11a0..c1ccf7f09bb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c @@ -1,7 +1,7 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=1024" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=512 --save-temps" } */ -#include "sve_struct_vect_13.c" +#include "sve_struct_vect_14.c" /* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ @@ -10,12 +10,12 @@ /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c index 9afc0708fb1..61985f98974 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c @@ -1,7 +1,7 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=2048" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=1024 --save-temps" } */ -#include "sve_struct_vect_13.c" +#include "sve_struct_vect_14.c" /* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ @@ -10,12 +10,12 @@ /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c index 80c99961791..6dd2878c552 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c @@ -1,47 +1,32 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=2048 --save-temps" } */ -#define N 2000 +#include "sve_struct_vect_14.c" -#define TEST_LOOP(NAME, TYPE) \ - void __attribute__((weak)) \ - NAME (TYPE *restrict dest, TYPE *restrict src) \ - { \ - for (int i = 0; i < N; ++i) \ - dest[i] += src[i * 2]; \ - } +/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -#define TEST(NAME) \ - TEST_LOOP (NAME##_i8, signed char) \ - TEST_LOOP (NAME##_i16, unsigned short) \ - TEST_LOOP (NAME##_f32, float) \ - TEST_LOOP (NAME##_f64, double) +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 2 } } */ -TEST (test) +/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ -/* Check the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ - -/* Check the scalar tail. */ -/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ -/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ -/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ -/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ -/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ -/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ -/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ -/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ - -/* The only branches should be in the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c deleted file mode 100644 index 970c6de6f08..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c +++ /dev/null @@ -1,32 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ - -#include "sve_struct_vect_17.c" - -volatile int x; - -#undef TEST_LOOP -#define TEST_LOOP(NAME, TYPE) \ - { \ - TYPE out[N]; \ - TYPE in[N * 2]; \ - for (int i = 0; i < N; ++i) \ - out[i] = i * 7 / 2; \ - for (int i = 0; i < N * 2; ++i) \ - in[i] = i * 9 / 2; \ - NAME (out, in); \ - for (int i = 0; i < N; ++i) \ - { \ - TYPE expected = i * 7 / 2 + in[i * 2]; \ - if (out[i] != expected) \ - __builtin_abort (); \ - x += 1; \ - } \ - } - -int -main (void) -{ - TEST (test); - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c index 90e0b53c7df..fd0ce83ffac 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c @@ -4,11 +4,11 @@ #define N 2000 #define TEST_LOOP(NAME, TYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME (TYPE *restrict dest, TYPE *restrict src) \ { \ for (int i = 0; i < N; ++i) \ - dest[i] += src[i * 4]; \ + dest[i] += src[i * 3]; \ } #define TEST(NAME) \ @@ -21,16 +21,16 @@ TEST (test) /* Check the vectorized loop. */ /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ /* Check the scalar tail. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c index f7db5aea413..6467fa23b83 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c @@ -3,28 +3,32 @@ #include "sve_struct_vect_18.c" -volatile int x; - #undef TEST_LOOP #define TEST_LOOP(NAME, TYPE) \ { \ TYPE out[N]; \ - TYPE in[N * 4]; \ + TYPE in[N * 3]; \ for (int i = 0; i < N; ++i) \ - out[i] = i * 7 / 2; \ - for (int i = 0; i < N * 4; ++i) \ - in[i] = i * 9 / 2; \ + { \ + out[i] = i * 7 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + for (int i = 0; i < N * 3; ++i) \ + { \ + in[i] = i * 9 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ NAME (out, in); \ for (int i = 0; i < N; ++i) \ { \ - TYPE expected = i * 7 / 2 + in[i * 4]; \ + TYPE expected = i * 7 / 2 + in[i * 3]; \ if (out[i] != expected) \ __builtin_abort (); \ - x += 1; \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c index 3430459a2f3..2a099d05d65 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c @@ -2,11 +2,11 @@ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #define TEST_LOOP(NAME, TYPE) \ - void __attribute__((weak)) \ + void __attribute__ ((noinline, noclone)) \ NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ { \ for (int i = 0; i < n; ++i) \ - dest[i] += src[i * 2]; \ + dest[i] += src[i * 3]; \ } #define TEST(NAME) \ @@ -19,16 +19,16 @@ TEST (test) /* Check the vectorized loop. */ /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ /* Check the scalar tail. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c index 94593cef684..f9bf095d3a5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c @@ -3,37 +3,41 @@ #include "sve_struct_vect_19.c" -volatile int x; - #define N 1000 #undef TEST_LOOP -#define TEST_LOOP(NAME, TYPE) \ - { \ - TYPE out[N]; \ - TYPE in[N * 2]; \ - int counts[] = { 0, 1, N - 1 }; \ - for (int j = 0; j < 3; ++j) \ - { \ - int count = counts[j]; \ - for (int i = 0; i < N; ++i) \ - out[i] = i * 7 / 2; \ - for (int i = 0; i < N * 2; ++i) \ - in[i] = i * 9 / 2; \ - NAME (out, in, count); \ - for (int i = 0; i < N; ++i) \ - { \ - TYPE expected = i * 7 / 2; \ - if (i < count) \ - expected += in[i * 2]; \ - if (out[i] != expected) \ - __builtin_abort (); \ - x += 1; \ - } \ - } \ +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 3]; \ + int counts[] = { 0, 1, N - 1 }; \ + for (int j = 0; j < 3; ++j) \ + { \ + int count = counts[j]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + for (int i = 0; i < N * 3; ++i) \ + { \ + in[i] = i * 9 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + NAME (out, in, count); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2; \ + if (i < count) \ + expected += in[i * 3]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c index 1f99c676586..a94142f2c9e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c @@ -1,10 +1,8 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #include "sve_struct_vect_1.c" -extern void abort() __attribute__((noreturn)); - TYPE a[N], b[N], c[N], d[N], e[N * 4]; void __attribute__ ((noinline, noclone)) @@ -19,10 +17,10 @@ check_array (TYPE *array, int n, TYPE base, TYPE step) { for (int i = 0; i < n; ++i) if (array[i] != (TYPE) (base + step * i)) - abort (); + __builtin_abort (); } -int +int __attribute__ ((optimize (1))) main (void) { init_array (e, 2 * N, 11, 5); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c index 8e5a96361f6..0d51808552e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned short #include "sve_struct_vect_1.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c index aad0e104379..3a2907f4ad9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c @@ -1,12 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +#define N 2000 + #define TEST_LOOP(NAME, TYPE) \ - void __attribute__((weak)) \ - NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ + void __attribute__ ((noinline, noclone)) \ + NAME (TYPE *restrict dest, TYPE *restrict src) \ { \ - for (int i = 0; i < n; ++i) \ - dest[i] += src[i * 4]; \ + for (int i = 0; i < N; ++i) \ + dest[i] += src[i * 2]; \ } #define TEST(NAME) \ @@ -19,16 +21,16 @@ TEST (test) /* Check the vectorized loop. */ /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ /* Check the scalar tail. */ @@ -41,7 +43,5 @@ TEST (test) /* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ -/* Each function should have three branches: one directly to the exit - (n <= 0), one to the single scalar epilogue iteration (n == 1), - and one branch-back for the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ +/* The only branches should be in the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c index 3be63364455..de563c98c1f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c @@ -3,37 +3,32 @@ #include "sve_struct_vect_20.c" -volatile int x; - -#define N 1000 - #undef TEST_LOOP #define TEST_LOOP(NAME, TYPE) \ { \ TYPE out[N]; \ - TYPE in[N * 4]; \ - int counts[] = { 0, 1, N - 1 }; \ - for (int j = 0; j < 3; ++j) \ + TYPE in[N * 2]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + for (int i = 0; i < N * 2; ++i) \ + { \ + in[i] = i * 9 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + NAME (out, in); \ + for (int i = 0; i < N; ++i) \ { \ - int count = counts[j]; \ - for (int i = 0; i < N; ++i) \ - out[i] = i * 7 / 2; \ - for (int i = 0; i < N * 4; ++i) \ - in[i] = i * 9 / 2; \ - NAME (out, in, count); \ - for (int i = 0; i < N; ++i) \ - { \ - TYPE expected = i * 7 / 2; \ - if (i < count) \ - expected += in[i * 4]; \ - if (out[i] != expected) \ - __builtin_abort (); \ - x += 1; \ - } \ + TYPE expected = i * 7 / 2 + in[i * 2]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c index ac3a7dd2383..bb29747b0c1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c @@ -1,14 +1,12 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -#define N 2000 - #define TEST_LOOP(NAME, TYPE) \ - void __attribute__((weak)) \ - NAME (TYPE *restrict dest, TYPE *restrict src) \ + void __attribute__ ((noinline, noclone)) \ + NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ { \ - for (int i = 0; i < N; ++i) \ - dest[i] += src[i * 3]; \ + for (int i = 0; i < n; ++i) \ + dest[i] += src[i * 2]; \ } #define TEST(NAME) \ @@ -21,16 +19,16 @@ TEST (test) /* Check the vectorized loop. */ /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ /* Check the scalar tail. */ @@ -43,5 +41,7 @@ TEST (test) /* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ -/* The only branches should be in the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ +/* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c index 94d72d1835a..6f9a4e3dc32 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c @@ -3,28 +3,41 @@ #include "sve_struct_vect_21.c" -volatile int x; +#define N 1000 #undef TEST_LOOP -#define TEST_LOOP(NAME, TYPE) \ - { \ - TYPE out[N]; \ - TYPE in[N * 3]; \ - for (int i = 0; i < N; ++i) \ - out[i] = i * 7 / 2; \ - for (int i = 0; i < N * 3; ++i) \ - in[i] = i * 9 / 2; \ - NAME (out, in); \ - for (int i = 0; i < N; ++i) \ - { \ - TYPE expected = i * 7 / 2 + in[i * 3]; \ - if (out[i] != expected) \ - __builtin_abort (); \ - x += 1; \ - } \ +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 2]; \ + int counts[] = { 0, 1, N - 1 }; \ + for (int j = 0; j < 3; ++j) \ + { \ + int count = counts[j]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + for (int i = 0; i < N * 2; ++i) \ + { \ + in[i] = i * 9 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + NAME (out, in, count); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2; \ + if (i < count) \ + expected += in[i * 2]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c index c17766c7d23..8ee25a0e279 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c @@ -1,12 +1,14 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +#define N 2000 + #define TEST_LOOP(NAME, TYPE) \ - void __attribute__((weak)) \ - NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ + void __attribute__ ((noinline, noclone)) \ + NAME (TYPE *restrict dest, TYPE *restrict src) \ { \ - for (int i = 0; i < n; ++i) \ - dest[i] += src[i * 3]; \ + for (int i = 0; i < N; ++i) \ + dest[i] += src[i * 4]; \ } #define TEST(NAME) \ @@ -19,16 +21,16 @@ TEST (test) /* Check the vectorized loop. */ /* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ /* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */ /* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ /* Check the scalar tail. */ @@ -41,7 +43,5 @@ TEST (test) /* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ -/* Each function should have three branches: one directly to the exit - (n <= 0), one to the single scalar epilogue iteration (n == 1), - and one branch-back for the vectorized loop. */ -/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ +/* The only branches should be in the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c index 550364b16d1..1c3699292c0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c @@ -3,37 +3,32 @@ #include "sve_struct_vect_22.c" -volatile int x; - -#define N 1000 - #undef TEST_LOOP #define TEST_LOOP(NAME, TYPE) \ { \ TYPE out[N]; \ - TYPE in[N * 3]; \ - int counts[] = { 0, 1, N - 1 }; \ - for (int j = 0; j < 3; ++j) \ + TYPE in[N * 4]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + for (int i = 0; i < N * 4; ++i) \ + { \ + in[i] = i * 9 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + NAME (out, in); \ + for (int i = 0; i < N; ++i) \ { \ - int count = counts[j]; \ - for (int i = 0; i < N; ++i) \ - out[i] = i * 7 / 2; \ - for (int i = 0; i < N * 3; ++i) \ - in[i] = i * 9 / 2; \ - NAME (out, in, count); \ - for (int i = 0; i < N; ++i) \ - { \ - TYPE expected = i * 7 / 2; \ - if (i < count) \ - expected += in[i * 3]; \ - if (out[i] != expected) \ - __builtin_abort (); \ - x += 1; \ - } \ + TYPE expected = i * 7 / 2 + in[i * 4]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST (test); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c new file mode 100644 index 00000000000..7542e531624 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, TYPE) \ + void __attribute__ ((noinline, noclone)) \ + NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i] += src[i * 4]; \ + } + +#define TEST(NAME) \ + TEST_LOOP (NAME##_i8, signed char) \ + TEST_LOOP (NAME##_i16, unsigned short) \ + TEST_LOOP (NAME##_f32, float) \ + TEST_LOOP (NAME##_f64, double) + +TEST (test) + +/* Check the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ + +/* Check the scalar tail. */ +/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + +/* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c new file mode 100644 index 00000000000..83f13dd46cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_23_run.c @@ -0,0 +1,45 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_23.c" + +#define N 1000 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 4]; \ + int counts[] = { 0, 1, N - 1 }; \ + for (int j = 0; j < 3; ++j) \ + { \ + int count = counts[j]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + for (int i = 0; i < N * 4; ++i) \ + { \ + in[i] = i * 9 / 2; \ + asm volatile ("" ::: "memory"); \ + } \ + NAME (out, in, count); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2; \ + if (i < count) \ + expected += in[i * 4]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c index 6229b78b72e..0da23e144af 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned short #include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c index 3a29ae16701..b1e37e536e5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned int #include "sve_struct_vect_1.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c index 7703dc6c043..74a5bd3233b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned int #include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c index 0c526365829..af20d763bdd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned long #include "sve_struct_vect_1.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c index 4ea2cff9dd0..a8aedd188c8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned long #include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c index efc1c9d2e2c..4b1f8cd341a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE float #include "sve_struct_vect_1.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c index f0d56e87dcc..22ba35ff702 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE float #include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c index ff445c1fbb0..981c9d31950 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE double #include "sve_struct_vect_1.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c index b0b685c0789..dbcbae8259f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE double #include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c index 9712f89d171..8067d5ed169 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #ifndef TYPE #define TYPE unsigned char diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c index 5cfb7559a5c..8cc1993e997 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c @@ -1,12 +1,10 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #include "sve_struct_vect_7.c" #define N 93 -extern void abort() __attribute__((noreturn)); - TYPE a[N], b[N], c[N], d[N], e[N * 4]; void __attribute__ ((noinline, noclone)) @@ -21,10 +19,10 @@ check_array (TYPE *array, int n, TYPE base, TYPE step) { for (int i = 0; i < n; ++i) if (array[i] != (TYPE) (base + step * i)) - abort (); + __builtin_abort (); } -int +int __attribute__ ((optimize (1))) main (void) { init_array (e, 2 * N, 11, 5); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c index 57cb93de5d9..e807179a6a5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned short #define ITYPE short diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c index 59005a2f05b..954043fa874 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned short #define ITYPE short diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c index d897d556d05..a167a7b2caf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned int #define ITYPE int diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c index ab694b4a971..4b94d383fec 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ #define TYPE unsigned int #define ITYPE int diff --git a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c index 0c7b887d232..754b188a206 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c @@ -7,13 +7,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define MASK_2(X, Y) X, Y + X #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) @@ -21,10 +21,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y) #define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y) -#define INDEX_4 v4di -#define INDEX_8 v8si -#define INDEX_16 v16hi -#define INDEX_32 v32qi +#define INDEX_4 vnx2di +#define INDEX_8 vnx4si +#define INDEX_16 vnx8hi +#define INDEX_32 vnx16qi #define PERMUTE(TYPE, NUNITS) \ TYPE permute_##TYPE (TYPE values1, TYPE values2) \ @@ -35,13 +35,13 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (v4di, 4) \ - T (v8si, 8) \ - T (v16hi, 16) \ - T (v32qi, 32) \ - T (v4df, 4) \ - T (v8sf, 8) \ - T (v16hf, 16) + T (vnx2di, 4) \ + T (vnx4si, 8) \ + T (vnx8hi, 16) \ + T (vnx16qi, 32) \ + T (vnx2df, 4) \ + T (vnx4sf, 8) \ + T (vnx8hf, 16) TEST_ALL (PERMUTE) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c index 4d345cf81e9..303276a64cf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c @@ -3,12 +3,12 @@ #include <stdint.h> -#define UNPACK(TYPED, TYPES) \ -void __attribute__ ((noinline, noclone)) \ -unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ -{ \ - for (int i = 0; i < size; i++) \ - d[i] = s[i] + 1; \ +#define UNPACK(TYPED, TYPES) \ +void __attribute__ ((noinline, noclone)) \ +unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, TYPES mask, int size) \ +{ \ + for (int i = 0; i < size; i++) \ + d[i] = (TYPES) (s[i] | mask); \ } #define TEST_ALL(T) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c index d183408d124..da29eda1434 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c @@ -14,9 +14,9 @@ arrays[i] = (i - 10) * 3; \ asm volatile ("" ::: "memory"); \ } \ - unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ + unpack_##TYPED##_##TYPES (arrayd, arrays, 7, ARRAY_SIZE); \ for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) | 7)) \ __builtin_abort (); \ } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c index fa8de963264..8c927873340 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c @@ -8,7 +8,7 @@ void __attribute__ ((noinline, noclone)) \ unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ { \ for (int i = 0; i < size; i++) \ - d[i] = s[i] + 1; \ + d[i] = (TYPES) (s[i] + 1); \ } #define TEST_ALL(T) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c index 3fa66220f17..d2df061e88d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c @@ -16,7 +16,7 @@ } \ unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + if (arrayd[i] != (TYPED) (TYPES) (((i - 10) * 3) + 1)) \ __builtin_abort (); \ } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c index aaa4fdccbf0..36048f03f99 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define UZP1(TYPE, MASK) \ TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \ @@ -18,18 +18,18 @@ TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \ } -UZP1 (v4di, ((v4di) { 0, 2, 4, 6 })); -UZP1 (v8si, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 })); -UZP1 (v16hi, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30 })); -UZP1 (v32qi, ((v32qi) { 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30, - 32, 34, 36, 38, 40, 42, 44, 46, - 48, 50, 52, 54, 56, 58, 60, 62 })); -UZP1 (v4df, ((v4di) { 0, 2, 4, 6 })); -UZP1 (v8sf, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 })); -UZP1 (v16hf, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30 })); +UZP1 (vnx2di, ((vnx2di) { 0, 2, 4, 6 })); +UZP1 (vnx4si, ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 })); +UZP1 (vnx8hi, ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30 })); +UZP1 (vnx16qi, ((vnx16qi) { 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62 })); +UZP1 (vnx2df, ((vnx2di) { 0, 2, 4, 6 })); +UZP1 (vnx4sf, ((vnx4si) { 0, 2, 4, 6, 8, 10, 12, 14 })); +UZP1 (vnx8hf, ((vnx8hi) { 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30 })); /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c index d35dad0ffca..622f0d10f5f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c @@ -16,48 +16,48 @@ int main (void) { - TEST_UZP1 (v4di, - ((v4di) { 4, 6, 12, 36 }), - ((v4di) { 4, 5, 6, 7 }), - ((v4di) { 12, 24, 36, 48 })); - TEST_UZP1 (v8si, - ((v8si) { 3, 5, 7, 9, 33, 35, 37, 39 }), - ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), - ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); - TEST_UZP1 (v16hi, - ((v16hi) { 3, 5, 7, 9, 11, 13, 15, 17, - 33, 35, 37, 39, 41, 43, 45, 47 }), - ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18 }), - ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48 })); - TEST_UZP1 (v32qi, - ((v32qi) { 4, 6, 4, 6, 4, 6, 4, 6, - 4, 6, 4, 6, 4, 6, 4, 6, - 12, 36, 12, 36, 12, 36, 12, 36, - 12, 36, 12, 36, 12, 36, 12, 36 }), - ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }), - ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48 })); - TEST_UZP1 (v4df, - ((v4df) { 4.0, 6.0, 12.0, 36.0 }), - ((v4df) { 4.0, 5.0, 6.0, 7.0 }), - ((v4df) { 12.0, 24.0, 36.0, 48.0 })); - TEST_UZP1 (v8sf, - ((v8sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }), - ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), - ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); - TEST_UZP1 (v16hf, - ((v16hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, - 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }), - ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, - 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), - ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, - 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + TEST_UZP1 (vnx2di, + ((vnx2di) { 4, 6, 12, 36 }), + ((vnx2di) { 4, 5, 6, 7 }), + ((vnx2di) { 12, 24, 36, 48 })); + TEST_UZP1 (vnx4si, + ((vnx4si) { 3, 5, 7, 9, 33, 35, 37, 39 }), + ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_UZP1 (vnx8hi, + ((vnx8hi) { 3, 5, 7, 9, 11, 13, 15, 17, + 33, 35, 37, 39, 41, 43, 45, 47 }), + ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_UZP1 (vnx16qi, + ((vnx16qi) { 4, 6, 4, 6, 4, 6, 4, 6, + 4, 6, 4, 6, 4, 6, 4, 6, + 12, 36, 12, 36, 12, 36, 12, 36, + 12, 36, 12, 36, 12, 36, 12, 36 }), + ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_UZP1 (vnx2df, + ((vnx2df) { 4.0, 6.0, 12.0, 36.0 }), + ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }), + ((vnx2df) { 12.0, 24.0, 36.0, 48.0 })); + TEST_UZP1 (vnx4sf, + ((vnx4sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }), + ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), + ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); + TEST_UZP1 (vnx8hf, + ((vnx8hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, + 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }), + ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c index 1bb84d80eb0..a9e4a63fb4d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define UZP2(TYPE, MASK) \ TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \ @@ -17,18 +17,18 @@ TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \ return __builtin_shuffle (values1, values2, MASK); \ } -UZP2 (v4di, ((v4di) { 1, 3, 5, 7 })); -UZP2 (v8si, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 })); -UZP2 (v16hi, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15, - 17, 19, 21, 23, 25, 27, 29, 31 })); -UZP2 (v32qi, ((v32qi) { 1, 3, 5, 7, 9, 11, 13, 15, - 17, 19, 21, 23, 25, 27, 29, 31, - 33, 35, 37, 39, 41, 43, 45, 47, - 49, 51, 53, 55, 57, 59, 61, 63 })); -UZP2 (v4df, ((v4di) { 1, 3, 5, 7 })); -UZP2 (v8sf, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 })); -UZP2 (v16hf, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15, - 17, 19, 21, 23, 25, 27, 29, 31 })); +UZP2 (vnx2di, ((vnx2di) { 1, 3, 5, 7 })); +UZP2 (vnx4si, ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 })); +UZP2 (vnx8hi, ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31 })); +UZP2 (vnx16qi, ((vnx16qi) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63 })); +UZP2 (vnx2df, ((vnx2di) { 1, 3, 5, 7 })); +UZP2 (vnx4sf, ((vnx4si) { 1, 3, 5, 7, 9, 11, 13, 15 })); +UZP2 (vnx8hf, ((vnx8hi) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31 })); /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c index d7a241c1258..05d82fe08c1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c @@ -16,48 +16,48 @@ int main (void) { - TEST_UZP2 (v4di, - ((v4di) { 5, 7, 24, 48 }), - ((v4di) { 4, 5, 6, 7 }), - ((v4di) { 12, 24, 36, 48 })); - TEST_UZP2 (v8si, - ((v8si) { 4, 6, 8, 10, 34, 36, 38, 40 }), - ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), - ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); - TEST_UZP2 (v16hi, - ((v16hi) { 4, 6, 8, 10, 12, 14, 16, 18, - 34, 36, 38, 40, 42, 44, 46, 48 }), - ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18 }), - ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48 })); - TEST_UZP2 (v32qi, - ((v32qi) { 5, 7, 5, 7, 5, 7, 5, 7, - 5, 7, 5, 7, 5, 7, 5, 7, - 24, 48, 24, 48, 24, 48, 24, 48, - 24, 48, 24, 48, 24, 48, 24, 48 }), - ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }), - ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48 })); - TEST_UZP2 (v4df, - ((v4df) { 5.0, 7.0, 24.0, 48.0 }), - ((v4df) { 4.0, 5.0, 6.0, 7.0 }), - ((v4df) { 12.0, 24.0, 36.0, 48.0 })); - TEST_UZP2 (v8sf, - ((v8sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }), - ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), - ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); - TEST_UZP2 (v16hf, - ((v16hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, - 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }), - ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, - 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), - ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, - 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + TEST_UZP2 (vnx2di, + ((vnx2di) { 5, 7, 24, 48 }), + ((vnx2di) { 4, 5, 6, 7 }), + ((vnx2di) { 12, 24, 36, 48 })); + TEST_UZP2 (vnx4si, + ((vnx4si) { 4, 6, 8, 10, 34, 36, 38, 40 }), + ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_UZP2 (vnx8hi, + ((vnx8hi) { 4, 6, 8, 10, 12, 14, 16, 18, + 34, 36, 38, 40, 42, 44, 46, 48 }), + ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_UZP2 (vnx16qi, + ((vnx16qi) { 5, 7, 5, 7, 5, 7, 5, 7, + 5, 7, 5, 7, 5, 7, 5, 7, + 24, 48, 24, 48, 24, 48, 24, 48, + 24, 48, 24, 48, 24, 48, 24, 48 }), + ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_UZP2 (vnx2df, + ((vnx2df) { 5.0, 7.0, 24.0, 48.0 }), + ((vnx2df) { 4.0, 5.0, 6.0, 7.0 }), + ((vnx2df) { 12.0, 24.0, 36.0, 48.0 })); + TEST_UZP2 (vnx4sf, + ((vnx4sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }), + ((vnx4sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), + ((vnx4sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); + TEST_UZP2 (vnx8hf, + ((vnx8hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, + 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }), + ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c index 958dce4262d..74acc7983b8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c @@ -16,7 +16,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m) /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ /* Should multiply by (257-1)*4 rather than (VF-1)*4. */ -/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x[0-9]+, 10, 16} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x[0-9]+, x[0-9]+, lsl 10\n} 2 } } */ /* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ /* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ /* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c index 54d592d8ef1..f915e90b12e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c @@ -16,7 +16,7 @@ f (TYPE *x, TYPE *y, int n, int m) /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ /* Should multiply by (257-1)*4 rather than (VF-1)*4. */ -/* { dg-final { scan-assembler-times {\tsbfiz\tx[0-9]+, x[0-9]+, 10, 32} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tx[0-9]+, x[0-9]+, 10\n} 2 } } */ /* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */ /* { dg-final { scan-assembler {\tcmp\tw3, 0} } } */ /* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C index 9be09546c80..d0febc69533 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C @@ -3,10 +3,10 @@ #include <stdint.h> -typedef int8_t v32qi __attribute__((vector_size(32))); -typedef int16_t v16hi __attribute__((vector_size(32))); -typedef int32_t v8si __attribute__((vector_size(32))); -typedef int64_t v4di __attribute__((vector_size(32))); +typedef int8_t vnx16qi __attribute__((vector_size(32))); +typedef int16_t vnx8hi __attribute__((vector_size(32))); +typedef int32_t vnx4si __attribute__((vector_size(32))); +typedef int64_t vnx2di __attribute__((vector_size(32))); typedef uint8_t v32qu __attribute__((vector_size(32))); typedef uint16_t v16hu __attribute__((vector_size(32))); @@ -30,10 +30,10 @@ TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \ } #define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ - T (v32qi, COND, SUFFIX) \ - T (v16hi, COND, SUFFIX) \ - T (v8si, COND, SUFFIX) \ - T (v4di, COND, SUFFIX) + T (vnx16qi, COND, SUFFIX) \ + T (vnx8hi, COND, SUFFIX) \ + T (vnx4si, COND, SUFFIX) \ + T (vnx2di, COND, SUFFIX) #define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ T (v32qu, COND, SUFFIX) \ @@ -54,10 +54,10 @@ TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \ TEST_COND_VAR_ALL (T, !=, ne) #define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \ - T (v32qi, COND, IMM, SUFFIX) \ - T (v16hi, COND, IMM, SUFFIX) \ - T (v8si, COND, IMM, SUFFIX) \ - T (v4di, COND, IMM, SUFFIX) + T (vnx16qi, COND, IMM, SUFFIX) \ + T (vnx8hi, COND, IMM, SUFFIX) \ + T (vnx4si, COND, IMM, SUFFIX) \ + T (vnx2di, COND, IMM, SUFFIX) #define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \ T (v32qu, COND, IMM, SUFFIX) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c index 3b7c3e75775..d94cbb37b6a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c @@ -1,57 +1,41 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include <stdint.h> #include <stdbool.h> -#define VEC_BOOL_CMPNE(VARTYPE, INDUCTYPE) \ -void \ -vec_bool_cmpne##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \ - INDUCTYPE start, INDUCTYPE n, \ - INDUCTYPE mask) \ +#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \ +void __attribute__ ((noinline, noclone)) \ +vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \ + INDUCTYPE start, \ + INDUCTYPE n, \ + INDUCTYPE mask) \ { \ - INDUCTYPE i; \ - for (i = 0; i < n; i++) \ + for (INDUCTYPE i = 0; i < n; i++) \ { \ bool lhs = i >= start; \ bool rhs = (i & mask) != 0x3D; \ - if (lhs != rhs) \ + if (lhs OP rhs) \ dst[i] = src[i]; \ } \ } -#define VEC_BOOL_CMPEQ(VARTYPE, INDUCTYPE) \ -void \ -vec_bool_cmpeq##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \ - INDUCTYPE start, INDUCTYPE n, \ - INDUCTYPE mask) \ -{ \ - INDUCTYPE i; \ - for (i = 0; i < n; i++) \ - { \ - bool lhs = i >= start; \ - bool rhs = (i & mask) != 0x3D; \ - if (lhs == rhs) \ - dst[i] = src[i]; \ - } \ -} +#define TEST_OP(T, NAME, OP) \ + T (NAME, OP, uint8_t, uint8_t) \ + T (NAME, OP, uint16_t, uint16_t) \ + T (NAME, OP, uint32_t, uint32_t) \ + T (NAME, OP, uint64_t, uint64_t) \ + T (NAME, OP, float, uint32_t) \ + T (NAME, OP, double, uint64_t) -VEC_BOOL_CMPNE (uint8_t, uint8_t) -VEC_BOOL_CMPNE (uint16_t, uint16_t) -VEC_BOOL_CMPNE (uint32_t, uint32_t) -VEC_BOOL_CMPNE (uint64_t, uint64_t) -VEC_BOOL_CMPNE (float, uint32_t) -VEC_BOOL_CMPNE (double, uint64_t) +#define TEST_ALL(T) \ + TEST_OP (T, cmpeq, ==) \ + TEST_OP (T, cmpne, !=) -VEC_BOOL_CMPEQ (uint8_t, uint8_t) -VEC_BOOL_CMPEQ (uint16_t, uint16_t) -VEC_BOOL_CMPEQ (uint32_t, uint32_t) -VEC_BOOL_CMPEQ (uint64_t, uint64_t) -VEC_BOOL_CMPEQ (float, uint32_t) -VEC_BOOL_CMPEQ (double, uint64_t) +TEST_ALL (VEC_BOOL) -/* Both CMPNE and CMPEQ loops will contain an exclusive predicate or. */ +/* Both cmpne and cmpeq loops will contain an exclusive predicate or. */ /* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */ -/* CMPEQ will also contain a masked predicate not operation, which gets +/* cmpeq will also contain a masked predicate not operation, which gets folded to BIC. */ /* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-7]/z, p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c index 8c341c0e932..092aa386c60 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c @@ -3,32 +3,9 @@ #include "sve_vec_bool_cmp_1.c" -extern void abort (void); - #define N 103 -#define TEST_VEC_BOOL_CMPNE(VARTYPE,INDUCTYPE) \ -{ \ - INDUCTYPE i; \ - VARTYPE src[N]; \ - VARTYPE dst[N]; \ - for (i = 0; i < N; i++) \ - { \ - src[i] = i; \ - dst[i] = i * 2; \ - } \ - vec_bool_cmpne##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \ - for (i = 0; i < 13; i++) \ - if (dst[i] != i) \ - abort (); \ - for (i = 13; i < N; i++) \ - if (i != 0x3D && dst[i] != (i * 2)) \ - abort (); \ - else if (i == 0x3D && dst[i] != 0x3D) \ - abort (); \ -} - -#define TEST_VEC_BOOL_CMPEQ(VARTYPE,INDUCTYPE) \ +#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE) \ { \ INDUCTYPE i; \ VARTYPE src[N]; \ @@ -37,36 +14,24 @@ extern void abort (void); { \ src[i] = i; \ dst[i] = i * 2; \ + asm volatile ("" ::: "memory"); \ } \ - vec_bool_cmpeq##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \ + vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13, \ + 97, 0xFF); \ for (i = 0; i < 13; i++) \ - if (dst[i] != (i * 2)) \ - abort (); \ + if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2)) \ + __builtin_abort (); \ for (i = 13; i < 97; i++) \ - if (i != 0x3D && dst[i] != i) \ - abort (); \ - else if (i == 0x3D && dst[i] != (0x3D) * 2) \ - abort (); \ + if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2)) \ + __builtin_abort (); \ for (i = 97; i < N; i++) \ if (dst[i] != (i * 2)) \ - abort (); \ + __builtin_abort (); \ } -int main () +int __attribute__ ((optimize (1))) +main () { - TEST_VEC_BOOL_CMPNE (uint8_t, uint8_t); - TEST_VEC_BOOL_CMPNE (uint16_t, uint16_t); - TEST_VEC_BOOL_CMPNE (uint32_t, uint32_t); - TEST_VEC_BOOL_CMPNE (uint64_t, uint64_t); - TEST_VEC_BOOL_CMPNE (float, uint32_t); - TEST_VEC_BOOL_CMPNE (double, uint64_t); - - TEST_VEC_BOOL_CMPEQ (uint8_t, uint8_t); - TEST_VEC_BOOL_CMPEQ (uint16_t, uint16_t); - TEST_VEC_BOOL_CMPEQ (uint32_t, uint32_t); - TEST_VEC_BOOL_CMPEQ (uint64_t, uint64_t); - TEST_VEC_BOOL_CMPEQ (float, uint32_t); - TEST_VEC_BOOL_CMPEQ (double, uint64_t); - + TEST_ALL (TEST_VEC_BOOL) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c index 3d5b584e9e5..95b278e58f5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c @@ -1,10 +1,10 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef unsigned int v8si __attribute__ ((vector_size(32))); +typedef unsigned int vnx4si __attribute__ ((vector_size(32))); void -f (v8si *ptr, int x) +f (vnx4si *ptr, int x) { - *ptr += (v8si) { x, x, 1, 2, 3, x, x, 4 }; + *ptr += (vnx4si) { x, x, 1, 2, 3, x, x, 4 }; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c index ae8542f2c75..31283fcf424 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define VEC_PERM(TYPE, MASKTYPE) \ TYPE __attribute__ ((noinline, noclone)) \ @@ -18,13 +18,13 @@ vec_perm_##TYPE (TYPE values1, TYPE values2, MASKTYPE mask) \ return __builtin_shuffle (values1, values2, mask); \ } -VEC_PERM (v4di, v4di); -VEC_PERM (v8si, v8si); -VEC_PERM (v16hi, v16hi); -VEC_PERM (v32qi, v32qi); -VEC_PERM (v4df, v4di); -VEC_PERM (v8sf, v8si); -VEC_PERM (v16hf, v16hi); +VEC_PERM (vnx2di, vnx2di); +VEC_PERM (vnx4si, vnx4si); +VEC_PERM (vnx8hi, vnx8hi); +VEC_PERM (vnx16qi, vnx16qi); +VEC_PERM (vnx2df, vnx2di); +VEC_PERM (vnx4sf, vnx4si); +VEC_PERM (vnx8hf, vnx8hi); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c index 6ab82250d4c..1b98389d996 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c @@ -19,93 +19,93 @@ int main (void) { - TEST_VEC_PERM (v4di, v4di, - ((v4di) { 5, 36, 7, 48 }), - ((v4di) { 4, 5, 6, 7 }), - ((v4di) { 12, 24, 36, 48 }), - ((v4di) { 1 + (8 * 1), 6 + (8 * 3), - 3 + (8 * 1), 7 + (8 * 5) })); - TEST_VEC_PERM (v8si, v8si, - ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }), - ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), - ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }), - ((v8si) { 9 + (16 * 2), 13 + (16 * 5), - 15 + (16 * 1), 7 + (16 * 0), - 6 + (16 * 8), 5 + (16 * 2), - 4 + (16 * 3), 10 + (16 * 2) })); - TEST_VEC_PERM (v16hi, v16hi, - ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34, - 7, 48, 3, 35, 9, 8, 7, 13 }), - ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18 }), - ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48 }), - ((v16hi) { 9 + (32 * 2), 13 + (32 * 2), - 15 + (32 * 8), 7 + (32 * 9), - 25 + (32 * 4), 26 + (32 * 3), - 27 + (32 * 1), 17 + (32 * 2), - 4 + (32 * 6), 31 + (32 * 7), - 0 + (32 * 8), 18 + (32 * 9), - 6 + (32 * 6), 5 + (32 * 7), - 4 + (32 * 2), 10 + (32 * 2) })); - TEST_VEC_PERM (v32qi, v32qi, - ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5, - 6, 7, 12, 24, 36, 48, 12, 24, - 5, 6, 7, 4, 5, 6, 4, 5, - 6, 7, 12, 24, 36, 48, 12, 24 }), - ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }), - ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48 }), - ((v32qi) { 5 + (64 * 3), 6 + (64 * 1), - 7 + (64 * 2), 8 + (64 * 1), - 9 + (64 * 3), 10 + (64 * 1), - 28 + (64 * 3), 29 + (64 * 3), - 30 + (64 * 1), 31 + (64 * 1), - 32 + (64 * 3), 33 + (64 * 2), - 54 + (64 * 2), 55 + (64 * 2), - 56 + (64 * 1), 61 + (64 * 2), - 5 + (64 * 2), 6 + (64 * 1), - 7 + (64 * 2), 8 + (64 * 2), - 9 + (64 * 2), 10 + (64 * 1), - 28 + (64 * 3), 29 + (64 * 1), - 30 + (64 * 3), 31 + (64 * 3), - 32 + (64 * 1), 33 + (64 * 1), - 54 + (64 * 2), 55 + (64 * 2), - 56 + (64 * 2), 61 + (64 * 2) })); - TEST_VEC_PERM (v4df, v4di, - ((v4df) { 5.1, 36.1, 7.1, 48.1 }), - ((v4df) { 4.1, 5.1, 6.1, 7.1 }), - ((v4df) { 12.1, 24.1, 36.1, 48.1 }), - ((v4di) { 1 + (8 * 3), 6 + (8 * 10), - 3 + (8 * 8), 7 + (8 * 2) })); - TEST_VEC_PERM (v8sf, v8si, - ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }), - ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), - ((v8sf) { 33.2, 34.2, 35.2, 36.2, - 37.2, 38.2, 39.2, 40.2 }), - ((v8si) { 9 + (16 * 1), 13 + (16 * 5), - 15 + (16 * 4), 7 + (16 * 4), - 6 + (16 * 3), 5 + (16 * 2), - 4 + (16 * 1), 10 + (16 * 0) })); - TEST_VEC_PERM (v16hf, v16hi, - ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, - 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), - ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, - 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), - ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, - 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), - ((v16hi) { 9 + (32 * 2), 13 + (32 * 2), - 15 + (32 * 8), 7 + (32 * 9), - 25 + (32 * 4), 26 + (32 * 3), - 27 + (32 * 1), 17 + (32 * 2), - 4 + (32 * 6), 31 + (32 * 7), - 0 + (32 * 8), 18 + (32 * 9), - 6 + (32 * 6), 5 + (32 * 7), - 4 + (32 * 2), 10 + (32 * 2) })); + TEST_VEC_PERM (vnx2di, vnx2di, + ((vnx2di) { 5, 36, 7, 48 }), + ((vnx2di) { 4, 5, 6, 7 }), + ((vnx2di) { 12, 24, 36, 48 }), + ((vnx2di) { 1 + (8 * 1), 6 + (8 * 3), + 3 + (8 * 1), 7 + (8 * 5) })); + TEST_VEC_PERM (vnx4si, vnx4si, + ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }), + ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }), + ((vnx4si) { 9 + (16 * 2), 13 + (16 * 5), + 15 + (16 * 1), 7 + (16 * 0), + 6 + (16 * 8), 5 + (16 * 2), + 4 + (16 * 3), 10 + (16 * 2) })); + TEST_VEC_PERM (vnx8hi, vnx8hi, + ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34, + 7, 48, 3, 35, 9, 8, 7, 13 }), + ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 }), + ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2), + 15 + (32 * 8), 7 + (32 * 9), + 25 + (32 * 4), 26 + (32 * 3), + 27 + (32 * 1), 17 + (32 * 2), + 4 + (32 * 6), 31 + (32 * 7), + 0 + (32 * 8), 18 + (32 * 9), + 6 + (32 * 6), 5 + (32 * 7), + 4 + (32 * 2), 10 + (32 * 2) })); + TEST_VEC_PERM (vnx16qi, vnx16qi, + ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24, + 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24 }), + ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 }), + ((vnx16qi) { 5 + (64 * 3), 6 + (64 * 1), + 7 + (64 * 2), 8 + (64 * 1), + 9 + (64 * 3), 10 + (64 * 1), + 28 + (64 * 3), 29 + (64 * 3), + 30 + (64 * 1), 31 + (64 * 1), + 32 + (64 * 3), 33 + (64 * 2), + 54 + (64 * 2), 55 + (64 * 2), + 56 + (64 * 1), 61 + (64 * 2), + 5 + (64 * 2), 6 + (64 * 1), + 7 + (64 * 2), 8 + (64 * 2), + 9 + (64 * 2), 10 + (64 * 1), + 28 + (64 * 3), 29 + (64 * 1), + 30 + (64 * 3), 31 + (64 * 3), + 32 + (64 * 1), 33 + (64 * 1), + 54 + (64 * 2), 55 + (64 * 2), + 56 + (64 * 2), 61 + (64 * 2) })); + TEST_VEC_PERM (vnx2df, vnx2di, + ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }), + ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }), + ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }), + ((vnx2di) { 1 + (8 * 3), 6 + (8 * 10), + 3 + (8 * 8), 7 + (8 * 2) })); + TEST_VEC_PERM (vnx4sf, vnx4si, + ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }), + ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), + ((vnx4sf) { 33.2, 34.2, 35.2, 36.2, + 37.2, 38.2, 39.2, 40.2 }), + ((vnx4si) { 9 + (16 * 1), 13 + (16 * 5), + 15 + (16 * 4), 7 + (16 * 4), + 6 + (16 * 3), 5 + (16 * 2), + 4 + (16 * 1), 10 + (16 * 0) })); + TEST_VEC_PERM (vnx8hf, vnx8hi, + ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, + 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), + ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), + ((vnx8hi) { 9 + (32 * 2), 13 + (32 * 2), + 15 + (32 * 8), 7 + (32 * 9), + 25 + (32 * 4), 26 + (32 * 3), + 27 + (32 * 1), 17 + (32 * 2), + 4 + (32 * 6), 31 + (32 * 7), + 0 + (32 * 8), 18 + (32 * 9), + 6 + (32 * 6), 5 + (32 * 7), + 4 + (32 * 2), 10 + (32 * 2) })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c index 4d46ff02192..a551ffa9b49 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c @@ -19,61 +19,61 @@ int main (void) { - TEST_VEC_PERM (v4di, v4di, - ((v4di) { 5, 36, 7, 48 }), - ((v4di) { 4, 5, 6, 7 }), - ((v4di) { 12, 24, 36, 48 }), - ((v4di) { 1, 6, 3, 7 })); - TEST_VEC_PERM (v8si, v8si, - ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }), - ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), - ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }), - ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); - TEST_VEC_PERM (v16hi, v16hi, - ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34, - 7, 48, 3, 35, 9, 8, 7, 13 }), - ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18 }), - ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48 }), - ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, - 4, 31, 0, 18, 6, 5, 4, 10 })); - TEST_VEC_PERM (v32qi, v32qi, - ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5, - 6, 7, 12, 24, 36, 48, 12, 24, - 5, 6, 7, 4, 5, 6, 4, 5, - 6, 7, 12, 24, 36, 48, 12, 24 }), - ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }), - ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48 }), - ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29, - 30, 31, 32, 33, 54, 55, 56, 61, - 5, 6, 7, 8, 9, 10, 28, 29, - 30, 31, 32, 33, 54, 55, 56, 61 })); - TEST_VEC_PERM (v4df, v4di, - ((v4df) { 5.1, 36.1, 7.1, 48.1 }), - ((v4df) { 4.1, 5.1, 6.1, 7.1 }), - ((v4df) { 12.1, 24.1, 36.1, 48.1 }), - ((v4di) { 1, 6, 3, 7 })); - TEST_VEC_PERM (v8sf, v8si, - ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }), - ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), - ((v8sf) { 33.2, 34.2, 35.2, 36.2, - 37.2, 38.2, 39.2, 40.2 }), - ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); - TEST_VEC_PERM (v16hf, v16hi, - ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, - 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), - ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, - 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), - ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, - 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), - ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, - 4, 31, 0, 18, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx2di, vnx2di, + ((vnx2di) { 5, 36, 7, 48 }), + ((vnx2di) { 4, 5, 6, 7 }), + ((vnx2di) { 12, 24, 36, 48 }), + ((vnx2di) { 1, 6, 3, 7 })); + TEST_VEC_PERM (vnx4si, vnx4si, + ((vnx4si) { 34, 38, 40, 10, 9, 8, 7, 35 }), + ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 }), + ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx8hi, vnx8hi, + ((vnx8hi) { 12, 16, 18, 10, 42, 43, 44, 34, + 7, 48, 3, 35, 9, 8, 7, 13 }), + ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 }), + ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx16qi, vnx16qi, + ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24, + 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24 }), + ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 }), + ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61, + 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61 })); + TEST_VEC_PERM (vnx2df, vnx2di, + ((vnx2df) { 5.1, 36.1, 7.1, 48.1 }), + ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }), + ((vnx2df) { 12.1, 24.1, 36.1, 48.1 }), + ((vnx2di) { 1, 6, 3, 7 })); + TEST_VEC_PERM (vnx4sf, vnx4si, + ((vnx4sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }), + ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), + ((vnx4sf) { 33.2, 34.2, 35.2, 36.2, + 37.2, 38.2, 39.2, 40.2 }), + ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx8hf, vnx8hi, + ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, + 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), + ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), + ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c index 31cff7ab113..4c3df975bab 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_reverse_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ { \ for (int i = 0; i < n; ++i) \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c index 342b1ddb44d..9a9300509ab 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c @@ -9,7 +9,10 @@ { \ TYPE a[N], b[N]; \ for (unsigned int i = 0; i < N; ++i) \ - b[i] = i * 2 + i % 5; \ + { \ + b[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ vec_reverse_##TYPE (a, b, N); \ for (unsigned int i = 0; i < N; ++i) \ { \ @@ -19,7 +22,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c index 4f70abd35e5..8b4901b1014 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_zip_##TYPE (TYPE *restrict a, TYPE *restrict b, \ TYPE *restrict c, long n) \ { \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c index 14d66f99383..c47b4050ae2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c @@ -12,6 +12,7 @@ { \ b[i] = i * 2 + i % 5; \ c[i] = i * 3; \ + asm volatile ("" ::: "memory"); \ } \ vec_zip_##TYPE (a, b, c, N / 8); \ for (unsigned int i = 0; i < N / 2; ++i) \ @@ -23,7 +24,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c index 5fbd59f08bd..c08ad23868c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c @@ -4,7 +4,7 @@ #include <stdint.h> #define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ +TYPE __attribute__ ((noinline, noclone)) \ vec_uzp_##TYPE (TYPE *restrict a, TYPE *restrict b, \ TYPE *restrict c, long n) \ { \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c index 404429208a0..a096b6c5353 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c @@ -9,7 +9,10 @@ { \ TYPE a[N], b[N], c[N]; \ for (unsigned int i = 0; i < N; ++i) \ - c[i] = i * 2 + i % 5; \ + { \ + c[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ vec_uzp_##TYPE (a, b, c, N / 8); \ for (unsigned int i = 0; i < N; ++i) \ { \ @@ -19,7 +22,7 @@ } \ } -int +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (HARNESS) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c index e76b3bc5abb..7b470cb04e2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define VEC_PERM_CONST(TYPE, MASK) \ TYPE __attribute__ ((noinline, noclone)) \ @@ -18,18 +18,18 @@ vec_perm_##TYPE (TYPE values1, TYPE values2) \ return __builtin_shuffle (values1, values2, MASK); \ } -VEC_PERM_CONST (v4di, ((v4di) { 4, 3, 6, 1 })); -VEC_PERM_CONST (v8si, ((v8si) { 3, 9, 11, 12, 2, 4, 4, 2 })); -VEC_PERM_CONST (v16hi, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0, - 22, 1, 8, 9, 3, 24, 15, 1 })); -VEC_PERM_CONST (v32qi, ((v32qi) { 13, 31, 11, 2, 48, 28, 3, 4, - 54, 11, 30, 1, 0, 61, 2, 3, - 4, 5, 11, 63, 24, 11, 42, 39, - 2, 57, 22, 11, 6, 16, 18, 21 })); -VEC_PERM_CONST (v4df, ((v4di) { 7, 3, 2, 1 })); -VEC_PERM_CONST (v8sf, ((v8si) { 1, 9, 13, 11, 2, 5, 4, 2 })); -VEC_PERM_CONST (v16hf, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0, - 22, 1, 8, 9, 3, 24, 15, 1 })); +VEC_PERM_CONST (vnx2di, ((vnx2di) { 4, 3, 6, 1 })); +VEC_PERM_CONST (vnx4si, ((vnx4si) { 3, 9, 11, 12, 2, 4, 4, 2 })); +VEC_PERM_CONST (vnx8hi, ((vnx8hi) { 8, 27, 5, 4, 21, 12, 13, 0, + 22, 1, 8, 9, 3, 24, 15, 1 })); +VEC_PERM_CONST (vnx16qi, ((vnx16qi) { 13, 31, 11, 2, 48, 28, 3, 4, + 54, 11, 30, 1, 0, 61, 2, 3, + 4, 5, 11, 63, 24, 11, 42, 39, + 2, 57, 22, 11, 6, 16, 18, 21 })); +VEC_PERM_CONST (vnx2df, ((vnx2di) { 7, 3, 2, 1 })); +VEC_PERM_CONST (vnx4sf, ((vnx4si) { 1, 9, 13, 11, 2, 5, 4, 2 })); +VEC_PERM_CONST (vnx8hf, ((vnx8hi) { 8, 27, 5, 4, 21, 12, 13, 0, + 22, 1, 8, 9, 3, 24, 15, 1 })); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c index b4f82091f7c..d397c3d6670 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define VEC_PERM_CONST_OVERRUN(TYPE, MASK) \ TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \ @@ -17,50 +17,50 @@ TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \ return __builtin_shuffle (values1, values2, MASK); \ } -VEC_PERM_CONST_OVERRUN (v4di, ((v4di) { 4 + (8 * 1), 3 + (8 * 1), - 6 + (8 * 2), 1 + (8 * 3) })); -VEC_PERM_CONST_OVERRUN (v8si, ((v8si) { 3 + (16 * 3), 9 + (16 * 4), - 11 + (16 * 5), 12 + (16 * 3), - 2 + (16 * 2), 4 + (16 * 1), - 4 + (16 * 2), 2 + (16 * 1) })); -VEC_PERM_CONST_OVERRUN (v16hi, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1), - 5 + (32 * 3), 4 + (32 * 3), - 21 + (32 * 1), 12 + (32 * 3), - 13 + (32 * 3), 0 + (32 * 1), - 22 + (32 * 2), 1 + (32 * 2), - 8 + (32 * 2), 9 + (32 * 1), - 3 + (32 * 2), 24 + (32 * 2), - 15 + (32 * 1), 1 + (32 * 1) })); -VEC_PERM_CONST_OVERRUN (v32qi, ((v32qi) { 13 + (64 * 2), 31 + (64 * 2), - 11 + (64 * 2), 2 + (64 * 1), - 48 + (64 * 1), 28 + (64 * 2), - 3 + (64 * 2), 4 + (64 * 3), - 54 + (64 * 1), 11 + (64 * 2), - 30 + (64 * 2), 1 + (64 * 1), - 0 + (64 * 1), 61 + (64 * 2), - 2 + (64 * 3), 3 + (64 * 2), - 4 + (64 * 3), 5 + (64 * 3), - 11 + (64 * 3), 63 + (64 * 1), - 24 + (64 * 1), 11 + (64 * 3), - 42 + (64 * 3), 39 + (64 * 2), - 2 + (64 * 2), 57 + (64 * 3), - 22 + (64 * 3), 11 + (64 * 2), - 6 + (64 * 2), 16 + (64 * 2), - 18 + (64 * 2), 21 + (64 * 3) })); -VEC_PERM_CONST_OVERRUN (v4df, ((v4di) { 7 + (8 * 1), 3 + (8 * 3), - 2 + (8 * 5), 1 + (8 * 3) })); -VEC_PERM_CONST_OVERRUN (v8sf, ((v8si) { 1 + (16 * 1), 9 + (16 * 2), - 13 + (16 * 2), 11 + (16 * 3), - 2 + (16 * 2), 5 + (16 * 2), - 4 + (16 * 4), 2 + (16 * 3) })); -VEC_PERM_CONST_OVERRUN (v16hf, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1), - 5 + (32 * 3), 4 + (32 * 3), - 21 + (32 * 1), 12 + (32 * 3), - 13 + (32 * 3), 0 + (32 * 1), - 22 + (32 * 2), 1 + (32 * 2), - 8 + (32 * 2), 9 + (32 * 1), - 3 + (32 * 2), 24 + (32 * 2), - 15 + (32 * 1), 1 + (32 * 1) })); +VEC_PERM_CONST_OVERRUN (vnx2di, ((vnx2di) { 4 + (8 * 1), 3 + (8 * 1), + 6 + (8 * 2), 1 + (8 * 3) })); +VEC_PERM_CONST_OVERRUN (vnx4si, ((vnx4si) { 3 + (16 * 3), 9 + (16 * 4), + 11 + (16 * 5), 12 + (16 * 3), + 2 + (16 * 2), 4 + (16 * 1), + 4 + (16 * 2), 2 + (16 * 1) })); +VEC_PERM_CONST_OVERRUN (vnx8hi, ((vnx8hi) { 8 + (32 * 3), 27 + (32 * 1), + 5 + (32 * 3), 4 + (32 * 3), + 21 + (32 * 1), 12 + (32 * 3), + 13 + (32 * 3), 0 + (32 * 1), + 22 + (32 * 2), 1 + (32 * 2), + 8 + (32 * 2), 9 + (32 * 1), + 3 + (32 * 2), 24 + (32 * 2), + 15 + (32 * 1), 1 + (32 * 1) })); +VEC_PERM_CONST_OVERRUN (vnx16qi, ((vnx16qi) { 13 + (64 * 2), 31 + (64 * 2), + 11 + (64 * 2), 2 + (64 * 1), + 48 + (64 * 1), 28 + (64 * 2), + 3 + (64 * 2), 4 + (64 * 3), + 54 + (64 * 1), 11 + (64 * 2), + 30 + (64 * 2), 1 + (64 * 1), + 0 + (64 * 1), 61 + (64 * 2), + 2 + (64 * 3), 3 + (64 * 2), + 4 + (64 * 3), 5 + (64 * 3), + 11 + (64 * 3), 63 + (64 * 1), + 24 + (64 * 1), 11 + (64 * 3), + 42 + (64 * 3), 39 + (64 * 2), + 2 + (64 * 2), 57 + (64 * 3), + 22 + (64 * 3), 11 + (64 * 2), + 6 + (64 * 2), 16 + (64 * 2), + 18 + (64 * 2), 21 + (64 * 3) })); +VEC_PERM_CONST_OVERRUN (vnx2df, ((vnx2di) { 7 + (8 * 1), 3 + (8 * 3), + 2 + (8 * 5), 1 + (8 * 3) })); +VEC_PERM_CONST_OVERRUN (vnx4sf, ((vnx4si) { 1 + (16 * 1), 9 + (16 * 2), + 13 + (16 * 2), 11 + (16 * 3), + 2 + (16 * 2), 5 + (16 * 2), + 4 + (16 * 4), 2 + (16 * 3) })); +VEC_PERM_CONST_OVERRUN (vnx8hf, ((vnx8hi) { 8 + (32 * 3), 27 + (32 * 1), + 5 + (32 * 3), 4 + (32 * 3), + 21 + (32 * 1), 12 + (32 * 3), + 13 + (32 * 3), 0 + (32 * 1), + 22 + (32 * 2), 1 + (32 * 2), + 8 + (32 * 2), 9 + (32 * 1), + 3 + (32 * 2), 24 + (32 * 2), + 15 + (32 * 1), 1 + (32 * 1) })); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c index 7324c1da0a4..a0214880dbe 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c @@ -22,49 +22,49 @@ int main (void) { - TEST_VEC_PERM (v4di, - ((v4di) { 12, 7, 36, 5 }), - ((v4di) { 4, 5, 6, 7 }), - ((v4di) { 12, 24, 36, 48 })); - TEST_VEC_PERM (v8si, - ((v8si) { 6, 34, 36, 37, 5, 7, 7, 5 }), - ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), - ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); - TEST_VEC_PERM (v16hi, - ((v16hi) { 11, 44, 8, 7, 38, 15, 16, 3, - 39, 4, 11, 12, 6, 41, 18, 4 }), - ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18 }), - ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48 })); - TEST_VEC_PERM (v32qi, - ((v32qi) { 5, 7, 7, 6, 12, 4, 7, 4, - 36, 7, 6, 5, 4, 24, 6, 7, - 4, 5, 7, 48, 4, 7, 36, 48, - 6, 24, 6, 7, 6, 4, 6, 5 }), - ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }), - ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48 })); - TEST_VEC_PERM (v4df, - ((v4df) { 48.5, 7.5, 6.5, 5.5 }), - ((v4df) { 4.5, 5.5, 6.5, 7.5 }), - ((v4df) { 12.5, 24.5, 36.5, 48.5 })); - TEST_VEC_PERM (v8sf, - ((v8sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }), - ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), - ((v8sf) { 33.5, 34.5, 35.5, 36.5, - 37.5, 38.5, 39.5, 40.5 })); - TEST_VEC_PERM (v16hf, - ((v16hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0, - 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }), - ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, - 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), - ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, - 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + TEST_VEC_PERM (vnx2di, + ((vnx2di) { 12, 7, 36, 5 }), + ((vnx2di) { 4, 5, 6, 7 }), + ((vnx2di) { 12, 24, 36, 48 })); + TEST_VEC_PERM (vnx4si, + ((vnx4si) { 6, 34, 36, 37, 5, 7, 7, 5 }), + ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_VEC_PERM (vnx8hi, + ((vnx8hi) { 11, 44, 8, 7, 38, 15, 16, 3, + 39, 4, 11, 12, 6, 41, 18, 4 }), + ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18 }), + ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_VEC_PERM (vnx16qi, + ((vnx16qi) { 5, 7, 7, 6, 12, 4, 7, 4, + 36, 7, 6, 5, 4, 24, 6, 7, + 4, 5, 7, 48, 4, 7, 36, 48, + 6, 24, 6, 7, 6, 4, 6, 5 }), + ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_VEC_PERM (vnx2df, + ((vnx2df) { 48.5, 7.5, 6.5, 5.5 }), + ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }), + ((vnx2df) { 12.5, 24.5, 36.5, 48.5 })); + TEST_VEC_PERM (vnx4sf, + ((vnx4sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }), + ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), + ((vnx4sf) { 33.5, 34.5, 35.5, 36.5, + 37.5, 38.5, 39.5, 40.5 })); + TEST_VEC_PERM (vnx8hf, + ((vnx8hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0, + 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }), + ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c index a4efb4fea79..beabf272f11 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define VEC_PERM_SINGLE(TYPE, MASK) \ TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \ @@ -17,18 +17,18 @@ TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \ return __builtin_shuffle (values1, values2, MASK); \ } -VEC_PERM_SINGLE (v4di, ((v4di) { 0, 3, 2, 1 })); -VEC_PERM_SINGLE (v8si, ((v8si) { 3, 7, 1, 0, 2, 4, 4, 2 })); -VEC_PERM_SINGLE (v16hi, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0, - 1, 1, 8, 9, 3, 14, 15, 1 })); -VEC_PERM_SINGLE (v32qi, ((v32qi) { 13, 21, 11, 2, 8, 28, 3, 4, - 14, 11, 30, 1, 0, 31, 2, 3, - 4, 5, 11, 23, 24, 11, 12, 9, - 2, 7, 22, 11, 6, 16, 18, 21 })); -VEC_PERM_SINGLE (v4df, ((v4di) { 3, 3, 1, 1 })); -VEC_PERM_SINGLE (v8sf, ((v8si) { 4, 5, 6, 0, 2, 7, 4, 2 })); -VEC_PERM_SINGLE (v16hf, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0, - 1, 1, 8, 9, 3, 14, 15, 1 })); +VEC_PERM_SINGLE (vnx2di, ((vnx2di) { 0, 3, 2, 1 })); +VEC_PERM_SINGLE (vnx4si, ((vnx4si) { 3, 7, 1, 0, 2, 4, 4, 2 })); +VEC_PERM_SINGLE (vnx8hi, ((vnx8hi) { 8, 7, 5, 4, 11, 12, 13, 0, + 1, 1, 8, 9, 3, 14, 15, 1 })); +VEC_PERM_SINGLE (vnx16qi, ((vnx16qi) { 13, 21, 11, 2, 8, 28, 3, 4, + 14, 11, 30, 1, 0, 31, 2, 3, + 4, 5, 11, 23, 24, 11, 12, 9, + 2, 7, 22, 11, 6, 16, 18, 21 })); +VEC_PERM_SINGLE (vnx2df, ((vnx2di) { 3, 3, 1, 1 })); +VEC_PERM_SINGLE (vnx4sf, ((vnx4si) { 4, 5, 6, 0, 2, 7, 4, 2 })); +VEC_PERM_SINGLE (vnx8hf, ((vnx8hi) { 8, 7, 5, 4, 11, 12, 13, 0, + 1, 1, 8, 9, 3, 14, 15, 1 })); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c index fbae30c8d1c..aa443563182 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c @@ -17,49 +17,49 @@ int main (void) { - TEST_VEC_PERM (v4di, - ((v4di) { 4, 7, 6, 5 }), - ((v4di) { 4, 5, 6, 7 }), - ((v4di) { 12, 24, 36, 48 })); - TEST_VEC_PERM (v8si, - ((v8si) { 6, 10, 4, 3, 5, 7, 7, 5 }), - ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), - ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); - TEST_VEC_PERM (v16hi, - ((v16hi) { 11, 10, 8, 7, 14, 15, 16, 3, - 4, 4, 11, 12, 6, 17, 18, 4 }), - ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18 }), - ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48 })); - TEST_VEC_PERM (v32qi, - ((v32qi) { 5, 5, 7, 6, 4, 4, 7, 4, - 6, 7, 6, 5, 4, 7, 6, 7, - 4, 5, 7, 7, 4, 7, 4, 5, - 6, 7, 6, 7, 6, 4, 6, 5 }), - ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }), - ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48, - 12, 24, 36, 48, 12, 24, 36, 48 })); - TEST_VEC_PERM (v4df, - ((v4df) { 7.5, 7.5, 5.5, 5.5 }), - ((v4df) { 4.5, 5.5, 6.5, 7.5 }), - ((v4df) { 12.5, 24.5, 36.5, 48.5 })); - TEST_VEC_PERM (v8sf, - ((v8sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }), - ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), - ((v8sf) { 33.5, 34.5, 35.5, 36.5, - 37.5, 38.5, 39.5, 40.5 })); - TEST_VEC_PERM (v16hf, - ((v16hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0, - 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }), - ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, - 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), - ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, - 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + TEST_VEC_PERM (vnx2di, + ((vnx2di) { 4, 7, 6, 5 }), + ((vnx2di) { 4, 5, 6, 7 }), + ((vnx2di) { 12, 24, 36, 48 })); + TEST_VEC_PERM (vnx4si, + ((vnx4si) { 6, 10, 4, 3, 5, 7, 7, 5 }), + ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((vnx4si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_VEC_PERM (vnx8hi, + ((vnx8hi) { 11, 10, 8, 7, 14, 15, 16, 3, + 4, 4, 11, 12, 6, 17, 18, 4 }), + ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((vnx8hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_VEC_PERM (vnx16qi, + ((vnx16qi) { 5, 5, 7, 6, 4, 4, 7, 4, + 6, 7, 6, 5, 4, 7, 6, 7, + 4, 5, 7, 7, 4, 7, 4, 5, + 6, 7, 6, 7, 6, 4, 6, 5 }), + ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((vnx16qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_VEC_PERM (vnx2df, + ((vnx2df) { 7.5, 7.5, 5.5, 5.5 }), + ((vnx2df) { 4.5, 5.5, 6.5, 7.5 }), + ((vnx2df) { 12.5, 24.5, 36.5, 48.5 })); + TEST_VEC_PERM (vnx4sf, + ((vnx4sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }), + ((vnx4sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), + ((vnx4sf) { 33.5, 34.5, 35.5, 36.5, + 37.5, 38.5, 39.5, 40.5 })); + TEST_VEC_PERM (vnx8hf, + ((vnx8hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0, + 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }), + ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((vnx8hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c index a82b57dc378..c4abc2de551 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define VEC_PERM(TYPE, MASKTYPE) \ TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \ @@ -17,13 +17,13 @@ TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \ return __builtin_shuffle (values, mask); \ } -VEC_PERM (v4di, v4di) -VEC_PERM (v8si, v8si) -VEC_PERM (v16hi, v16hi) -VEC_PERM (v32qi, v32qi) -VEC_PERM (v4df, v4di) -VEC_PERM (v8sf, v8si) -VEC_PERM (v16hf, v16hi) +VEC_PERM (vnx2di, vnx2di) +VEC_PERM (vnx4si, vnx4si) +VEC_PERM (vnx8hi, vnx8hi) +VEC_PERM (vnx16qi, vnx16qi) +VEC_PERM (vnx2df, vnx2di) +VEC_PERM (vnx4sf, vnx4si) +VEC_PERM (vnx8hf, vnx8hi) /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c index 539c99d4f61..fd73bc9652f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c @@ -18,48 +18,48 @@ extern void abort (void); int main (void) { - TEST_VEC_PERM (v4di, v4di, - ((v4di) { 5, 6, 7, 5 }), - ((v4di) { 4, 5, 6, 7 }), - ((v4di) { 1, 6, 3, 5 })); - TEST_VEC_PERM (v8si, v8si, - ((v8si) { 4, 8, 10, 10, 9, 8, 7, 5 }), - ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), - ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); - TEST_VEC_PERM (v16hi, v16hi, - ((v16hi) { 12, 16, 18, 10, 12, 13, 14, 4, - 7, 18, 3, 5, 9, 8, 7, 13 }), - ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18 }), - ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, - 4, 31, 0, 18, 6, 5, 4, 10 })); - TEST_VEC_PERM (v32qi, v32qi, - ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5, - 6, 7, 4, 5, 6, 7, 4, 5, - 5, 6, 7, 4, 5, 6, 4, 5, - 6, 7, 4, 5, 6, 7, 4, 5 }), - ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7, - 4, 5, 6, 7, 4, 5, 6, 7 }), - ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29, - 30, 31, 32, 33, 54, 55, 56, 61, - 5, 6, 7, 8, 9, 10, 28, 29, - 30, 31, 32, 33, 54, 55, 56, 61 })); - TEST_VEC_PERM (v4df, v4di, - ((v4df) { 5.1, 6.1, 7.1, 5.1 }), - ((v4df) { 4.1, 5.1, 6.1, 7.1 }), - ((v4di) { 1, 6, 3, 5 })); - TEST_VEC_PERM (v8sf, v8si, - ((v8sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }), - ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), - ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); - TEST_VEC_PERM (v16hf, v16hi, - ((v16hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0, - 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }), - ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, - 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), - ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, - 4, 31, 0, 18, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx2di, vnx2di, + ((vnx2di) { 5, 6, 7, 5 }), + ((vnx2di) { 4, 5, 6, 7 }), + ((vnx2di) { 1, 6, 3, 5 })); + TEST_VEC_PERM (vnx4si, vnx4si, + ((vnx4si) { 4, 8, 10, 10, 9, 8, 7, 5 }), + ((vnx4si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx8hi, vnx8hi, + ((vnx8hi) { 12, 16, 18, 10, 12, 13, 14, 4, + 7, 18, 3, 5, 9, 8, 7, 13 }), + ((vnx8hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx16qi, vnx16qi, + ((vnx16qi) { 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 4, 5, 6, 7, 4, 5, + 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 4, 5, 6, 7, 4, 5 }), + ((vnx16qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((vnx16qi) { 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61, + 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61 })); + TEST_VEC_PERM (vnx2df, vnx2di, + ((vnx2df) { 5.1, 6.1, 7.1, 5.1 }), + ((vnx2df) { 4.1, 5.1, 6.1, 7.1 }), + ((vnx2di) { 1, 6, 3, 5 })); + TEST_VEC_PERM (vnx4sf, vnx4si, + ((vnx4sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }), + ((vnx4sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), + ((vnx4si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (vnx8hf, vnx8hi, + ((vnx8hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0, + 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }), + ((vnx8hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((vnx8hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c index c54db87fa21..2a268a447e3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_while_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c @@ -3,13 +3,13 @@ #include <stdint.h> -#define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ -vec_while_##TYPE (TYPE *restrict a, int n) \ -{ \ - for (int i = 0; i < n; ++i) \ - a[i] += 1; \ -} +#define ADD_LOOP(TYPE) \ + void __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i] += 1; \ + } #define TEST_ALL(T) \ T (int8_t) \ @@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, int n) \ T (float) \ T (double) -TEST_ALL (VEC_PERM) +TEST_ALL (ADD_LOOP) /* { dg-final { scan-assembler-not {\tuqdec} } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c index 62f82cc43f4..2f0f0f49e12 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_while_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c @@ -3,13 +3,13 @@ #include <stdint.h> -#define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ -vec_while_##TYPE (TYPE *restrict a, unsigned int n) \ -{ \ - for (unsigned int i = 0; i < n; ++i) \ - a[i] += 1; \ -} +#define ADD_LOOP(TYPE) \ + void __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a, unsigned int n) \ + { \ + for (unsigned int i = 0; i < n; ++i) \ + a[i] += 1; \ + } #define TEST_ALL(T) \ T (int8_t) \ @@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, unsigned int n) \ T (float) \ T (double) -TEST_ALL (VEC_PERM) +TEST_ALL (ADD_LOOP) /* { dg-final { scan-assembler-not {\tuqdec} } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c index ace7ebc5a0f..026a8195238 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_while_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c @@ -3,13 +3,13 @@ #include <stdint.h> -#define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ -vec_while_##TYPE (TYPE *restrict a, long n) \ -{ \ - for (long i = 0; i < n; ++i) \ - a[i] += 1; \ -} +#define ADD_LOOP(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a, int64_t n) \ + { \ + for (int64_t i = 0; i < n; ++i) \ + a[i] += 1; \ + } #define TEST_ALL(T) \ T (int8_t) \ @@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, long n) \ T (float) \ T (double) -TEST_ALL (VEC_PERM) +TEST_ALL (ADD_LOOP) /* { dg-final { scan-assembler-not {\tuqdec} } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_4.c b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c index 0717eac1ff6..d71b141b431 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_while_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c @@ -3,13 +3,13 @@ #include <stdint.h> -#define VEC_PERM(TYPE) \ -TYPE __attribute__ ((weak)) \ -vec_while_##TYPE (TYPE *restrict a, unsigned long n) \ -{ \ - for (unsigned long i = 0; i < n; ++i) \ - a[i] += 1; \ -} +#define ADD_LOOP(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + vec_while_##TYPE (TYPE *restrict a, uint64_t n) \ + { \ + for (uint64_t i = 0; i < n; ++i) \ + a[i] += 1; \ + } #define TEST_ALL(T) \ T (int8_t) \ @@ -23,7 +23,7 @@ vec_while_##TYPE (TYPE *restrict a, unsigned long n) \ T (float) \ T (double) -TEST_ALL (VEC_PERM) +TEST_ALL (ADD_LOOP) /* { dg-final { scan-assembler-times {\tuqdec} 2 } } */ /* { dg-final { scan-assembler-times {\tuqdecb\tx[0-9]+} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c deleted file mode 100644 index ead821b43ca..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ - -int -loop (short b) -{ - int c = 0; -l1: - b++; - c |= b; - if (b) - goto l1; - return c; -} - -/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c deleted file mode 100644 index 1a3502a0f94..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ - -int -loop (short b) -{ - int c = 0; -l1: - b++; - c |= b; - if (b < 32767) - goto l1; -return c; -} - -/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c deleted file mode 100644 index 125fc31a464..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c +++ /dev/null @@ -1,18 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ - -int -loop (short b) -{ - int c = 0; -l1: - b++; - c |= b; - if (b < 32766) - goto l1; -return c; -} - -/* { dg-final { scan-assembler-not {\tmov\tx[0-9], 65536\n} } } */ -/* { dg-final { scan-assembler-not {\tcmp\tx[0-9], 0\n} } } */ -/* { dg-final { scan-assembler-not {\tcsel\tx[0-9], x[0-9], x[0-9], ne\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c index 918313f62bd..c84b88a2e70 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c @@ -7,13 +7,13 @@ #include <stdint.h> -typedef int64_t v4di __attribute__((vector_size (32))); -typedef int32_t v8si __attribute__((vector_size (32))); -typedef int16_t v16hi __attribute__((vector_size (32))); -typedef int8_t v32qi __attribute__((vector_size (32))); -typedef double v4df __attribute__((vector_size (32))); -typedef float v8sf __attribute__((vector_size (32))); -typedef _Float16 v16hf __attribute__((vector_size (32))); +typedef int64_t vnx2di __attribute__((vector_size (32))); +typedef int32_t vnx4si __attribute__((vector_size (32))); +typedef int16_t vnx8hi __attribute__((vector_size (32))); +typedef int8_t vnx16qi __attribute__((vector_size (32))); +typedef double vnx2df __attribute__((vector_size (32))); +typedef float vnx4sf __attribute__((vector_size (32))); +typedef _Float16 vnx8hf __attribute__((vector_size (32))); #define MASK_2(X, Y) X, Y + X #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 1, Y) @@ -21,10 +21,10 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 4, Y) #define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 8, Y) -#define INDEX_4 v4di -#define INDEX_8 v8si -#define INDEX_16 v16hi -#define INDEX_32 v32qi +#define INDEX_4 vnx2di +#define INDEX_8 vnx4si +#define INDEX_16 vnx8hi +#define INDEX_32 vnx16qi #define PERMUTE(TYPE, NUNITS) \ TYPE permute_##TYPE (TYPE values1, TYPE values2) \ @@ -36,13 +36,13 @@ typedef _Float16 v16hf __attribute__((vector_size (32))); } #define TEST_ALL(T) \ - T (v4di, 4) \ - T (v8si, 8) \ - T (v16hi, 16) \ - T (v32qi, 32) \ - T (v4df, 4) \ - T (v8sf, 8) \ - T (v16hf, 16) + T (vnx2di, 4) \ + T (vnx4si, 8) \ + T (vnx8hi, 16) \ + T (vnx16qi, 32) \ + T (vnx2df, 4) \ + T (vnx4sf, 8) \ + T (vnx8hf, 16) TEST_ALL (PERMUTE) diff --git a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c index c7c15ee5c4a..aecf8262706 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c @@ -49,4 +49,6 @@ f12 (void) return sum; } -/* { dg-final { scan-assembler-not "sp" } } */ +/* Fails for fixed-length SVE because we lack a vec_init pattern. + A later patch fixes this in generic code. */ +/* { dg-final { scan-assembler-not "sp" { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ |