diff options
Diffstat (limited to 'gcc/testsuite/gcc.target/aarch64')
163 files changed, 3469 insertions, 2974 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c new file mode 100644 index 00000000000..b7378adf8ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c @@ -0,0 +1,73 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#include <arm_neon.h> + +/* Unsigned Dot Product instructions. */ + +uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_u32 (r, x, y); +} + +uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_u32 (r, x, y); +} + +uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_lane_u32 (r, x, y, 0); +} + +uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y) +{ + return vdot_laneq_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y) +{ + return vdotq_lane_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_laneq_u32 (r, x, y, 0); +} + +/* Signed Dot Product instructions. */ + +int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_s32 (r, x, y); +} + +int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_s32 (r, x, y); +} + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_lane_s32 (r, x, y, 0); +} + +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y) +{ + return vdot_laneq_s32 (r, x, y, 0); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y) +{ + return vdotq_lane_s32 (r, x, y, 0); +} + +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_laneq_s32 (r, x, y, 0); +} + +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c new file mode 100644 index 00000000000..3e7cd6c2fc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-exec.c @@ -0,0 +1,81 @@ +/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */ +/* { dg-do run { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ +/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw } */ + +#include <arm_neon.h> + +extern void abort(); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define ORDER(x, y) y +#else +# define ORDER(x, y) x - y +#endif + +#define P(n1,n2) n1,n1,n1,n1,n2,n2,n2,n2 +#define ARR(nm, p, ty, ...) ty nm##_##p = { __VA_ARGS__ } +#define TEST(t1, t2, t3, f, r1, r2, n1, n2) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##r = {0}; \ + f##_##r = f (f##_##r, f##_##x, f##_##y); \ + if (f##_##r[0] != n1 || f##_##r[1] != n2) \ + abort (); + +#define TEST_LANE(t1, t2, t3, f, r1, r2, n1, n2, n3, n4) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (1, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (1, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); + +#define Px(n1,n2,n3,n4) P(n1,n2),P(n3,n4) +#define TEST_LANEQ(t1, t2, t3, f, r1, r2, n1, n2, n3, n4, n5, n6, n7, n8) \ + ARR(f, x, t1, r1); \ + ARR(f, y, t2, r2); \ + t3 f##_##rx = {0}; \ + f##_##rx = f (f##_##rx, f##_##x, f##_##y, ORDER (3, 0)); \ + if (f##_##rx[0] != n1 || f##_##rx[1] != n2) \ + abort (); \ + t3 f##_##rx1 = {0}; \ + f##_##rx1 = f (f##_##rx1, f##_##x, f##_##y, ORDER (3, 1)); \ + if (f##_##rx1[0] != n3 || f##_##rx1[1] != n4) \ + abort (); \ + t3 f##_##rx2 = {0}; \ + f##_##rx2 = f (f##_##rx2, f##_##x, f##_##y, ORDER (3, 2)); \ + if (f##_##rx2[0] != n5 || f##_##rx2[1] != n6) \ + abort (); \ + t3 f##_##rx3 = {0}; \ + f##_##rx3 = f (f##_##rx3, f##_##x, f##_##y, ORDER (3, 3)); \ + if (f##_##rx3[0] != n7 || f##_##rx3[1] != n8) \ + abort (); + +int +main() +{ + TEST (uint8x8_t, uint8x8_t, uint32x2_t, vdot_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x8_t, int8x8_t, int32x2_t, vdot_s32, P(1,2), P(-2,-3), -8, -24); + + TEST (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_u32, P(1,2), P(2,3), 8, 24); + TEST (int8x16_t, int8x16_t, int32x4_t, vdotq_s32, P(1,2), P(-2,-3), -8, -24); + + TEST_LANE (uint8x8_t, uint8x8_t, uint32x2_t, vdot_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x8_t, int8x8_t, int32x2_t, vdot_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANE (uint8x16_t, uint8x8_t, uint32x4_t, vdotq_lane_u32, P(1,2), P(2,3), 8, 16, 12, 24); + TEST_LANE (int8x16_t, int8x8_t, int32x4_t, vdotq_lane_s32, P(1,2), P(-2,-3), -8, -16, -12, -24); + + TEST_LANEQ (uint8x8_t, uint8x16_t, uint32x2_t, vdot_laneq_u32, P(1,2), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x8_t, int8x16_t, int32x2_t, vdot_laneq_s32, P(1,2), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + TEST_LANEQ (uint8x16_t, uint8x16_t, uint32x4_t, vdotq_laneq_u32, Px(1,2,2,1), Px(2,3,1,4), 8, 16, 12, 24, 4, 8, 16, 32); + TEST_LANEQ (int8x16_t, int8x16_t, int32x4_t, vdotq_laneq_s32, Px(1,2,2,1), Px(-2,-3,-1,-4), -8, -16, -12, -24, -4, -8, -16, -32); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h new file mode 100644 index 00000000000..90b00aff95c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h @@ -0,0 +1,15 @@ +TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) int +foo1(int len) { + int i; + TYPE int result = 0; + TYPE short prod; + + for (i=0; i<len; i++) { + prod = X[i] * Y[i]; + result += prod; + } + return result; +}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c new file mode 100644 index 00000000000..57b5ef82f85 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#define N 64 +#define TYPE signed + +#include "vect-dot-qi.h" + +/* { dg-final { scan-assembler-times {sdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c new file mode 100644 index 00000000000..b2cef318500 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#define N 64 +#define TYPE unsigned + +#include "vect-dot-qi.h" + +/* { dg-final { scan-assembler-times {udot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/cmpelim_mult_uses_1.c b/gcc/testsuite/gcc.target/aarch64/cmpelim_mult_uses_1.c new file mode 100644 index 00000000000..953c388037f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/cmpelim_mult_uses_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* X is both compared against zero and used. Make sure we can still + generate an ADDS and avoid an explicit comparison against zero. */ + +int +foo (int x, int y) +{ + x += y; + if (x != 0) + x = x + 2; + return x; +} + +/* { dg-final { scan-assembler-times "adds\\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, 0" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/fix_trunc1.c b/gcc/testsuite/gcc.target/aarch64/fix_trunc1.c new file mode 100644 index 00000000000..0441458f635 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fix_trunc1.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +float +f1 (float x) +{ + int y = x; + + return (float) y; +} + +double +f2 (double x) +{ + long y = x; + + return (double) y; +} + +/* { dg-final { scan-assembler "fcvtzs\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzs\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "scvtf\\td\[0-9\]+, d\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c b/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c index 6080e186d8f..bd0c73c8d34 100644 --- a/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c +++ b/gcc/testsuite/gcc.target/aarch64/inline-lrint_2.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target ilp32 } */ -/* { dg-options "-O3 -fno-math-errno" } */ +/* { dg-options "-O3 -fno-math-errno -fno-trapping-math" } */ #include "lrint-matherr.h" diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c new file mode 100644 index 00000000000..1e46755a39a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + +/* Check that we split unaligned LDP/STP into base and aligned offset. */ + +typedef struct +{ + int a, b, c, d, e; +} S; + +void foo (S *); + +void test (int x) +{ + S s = { .a = x }; + foo (&s); +} + +/* { dg-final { scan-assembler-not "mov\tx\[0-9\]+, sp" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr78733.c b/gcc/testsuite/gcc.target/aarch64/pr78733.c index ce462cedf9f..3cdb3ba7373 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr78733.c +++ b/gcc/testsuite/gcc.target/aarch64/pr78733.c @@ -7,4 +7,5 @@ t (void) return (__int128)1 << 80; } -/* { dg-final { scan-assembler "adr" } } */ +/* { dg-final { scan-assembler "\tmov\tx0, 0" } } */ +/* { dg-final { scan-assembler "\tmov\tx1, 65536" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr79041-2.c b/gcc/testsuite/gcc.target/aarch64/pr79041-2.c index a889dfdd895..62856f10438 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr79041-2.c +++ b/gcc/testsuite/gcc.target/aarch64/pr79041-2.c @@ -8,5 +8,6 @@ t (void) return (__int128)1 << 80; } -/* { dg-final { scan-assembler "adr" } } */ +/* { dg-final { scan-assembler "\tmov\tx0, 0" } } */ +/* { dg-final { scan-assembler "\tmov\tx1, 65536" } } */ /* { dg-final { scan-assembler-not "adrp" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr80295.c b/gcc/testsuite/gcc.target/aarch64/pr80295.c new file mode 100644 index 00000000000..b3866d8d6a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr80295.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-mabi=ilp32" } */ + +void f (void *b) +{ + __builtin_update_setjmp_buf (b); +} + diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c index ccfe417e644..f57e0c54632 100644 --- a/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c +++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c @@ -4,6 +4,6 @@ __attribute__((target ("arch=armv8-a-typo"))) void foo () { /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'armv8-a'?" "" { target *-*-* } .-1 } */ - /* { dg-error "unknown value 'armv8-a-typo' for 'arch' target attribute" "" { target *-*-* } .-2 } */ - /* { dg-error "target attribute 'arch=armv8-a-typo' is invalid" "" { target *-*-* } .-3 } */ + /* { dg-error "invalid name \\(\"armv8-a-typo\"\\) in 'target\\(\"arch=\"\\)' pragma or attribute" "" { target *-*-* } .-2 } */ + /* { dg-error "pragma or attribute 'target\\(\"arch=armv8-a-typo\"\\)' is not valid" "" { target *-*-* } .-3 } */ } diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c index 42ba51a7226..70096f89e0b 100644 --- a/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c +++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c @@ -3,7 +3,7 @@ __attribute__((target ("cpu=cortex-a57-typo"))) void foo () { - /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57?" "" { target *-*-* } .-1 } */ - /* { dg-error "unknown value 'cortex-a57-typo' for 'cpu' target attribute" "" { target *-*-* } .-2 } */ - /* { dg-error "target attribute 'cpu=cortex-a57-typo' is invalid" "" { target *-*-* } .-3 } */ + /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57'?" "" { target *-*-* } .-1 } */ + /* { dg-error "invalid name \\(\"cortex-a57-typo\"\\) in 'target\\(\"cpu=\"\\)' pragma or attribute" "" { target *-*-* } .-2 } */ + /* { dg-error "pragma or attribute 'target\\(\"cpu=cortex-a57-typo\"\\)' is not valid" "" { target *-*-* } .-3 } */ } diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c index 03d2bbf14a0..20dff2b6e45 100644 --- a/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c +++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c @@ -3,7 +3,7 @@ __attribute__((target ("tune=cortex-a57-typo"))) void foo () { - /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57?" "" { target *-*-* } .-1 } */ - /* { dg-error "unknown value 'cortex-a57-typo' for 'tune' target attribute" "" { target *-*-* } .-2 } */ - /* { dg-error "target attribute 'tune=cortex-a57-typo' is invalid" "" { target *-*-* } .-3 } */ + /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57'?" "" { target *-*-* } .-1 } */ + /* { dg-error "invalid name \\(\"cortex-a57-typo\"\\) in 'target\\(\"tune=\"\\)' pragma or attribute" "" { target *-*-* } .-2 } */ + /* { dg-error "pragma or attribute 'target\\(\"tune=cortex-a57-typo\"\\)' is not valid" "" { target *-*-* } .-3 } */ } diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c new file mode 100644 index 00000000000..2ce38483b6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +extern void arf (unsigned long int *, unsigned long int *); +void +frob () +{ + unsigned long int num[1000]; + unsigned long int den[1000]; + arf (den, num); +} + +/* This verifies that the scheduler did not break the dependencies + by adjusting the offsets within the probe and that the scheduler + did not reorder around the stack probes. */ +/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c new file mode 100644 index 00000000000..d8886835989 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X +#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X) +void out1(ARG192(__int128)); +int t1(int); + +int t3(int x) +{ + if (x < 1000) + return t1 (x) + 1; + + out1 (ARG192(1)); + return 0; +} + + + +/* This test creates a large (> 1k) outgoing argument area that needs + to be probed. We don't test the exact size of the space or the + exact offset to make the test a little less sensitive to trivial + output changes. */ +/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c new file mode 100644 index 00000000000..59ffe01376d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +int t1(int); + +int t2(int x) +{ + char *p = __builtin_alloca (4050); + x = t1 (x); + return p[x]; +} + + +/* This test has a constant sized alloca that is smaller than the + probe interval. But it actually requires two probes instead + of one because of the optimistic assumptions we made in the + aarch64 prologue code WRT probing state. + + The form can change quite a bit so we just check for two + probes without looking at the actual address. */ +/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c new file mode 100644 index 00000000000..e06db6dc2f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +int t1(int); + +int t2(int x) +{ + char *p = __builtin_alloca (x); + x = t1 (x); + return p[x]; +} + + +/* This test has a variable sized alloca. It requires 3 probes. + One in the loop, one for the residual and at the end of the + alloca area. + + The form can change quite a bit so we just check for two + probes without looking at the actual address. */ +/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c b/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c index 95c8f696fee..2691250f79e 100644 --- a/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c +++ b/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c @@ -11,5 +11,5 @@ foo (int a, int b) return 0; } -/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */ +/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c b/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c index 60c6d9e5ccd..d343acc1195 100644 --- a/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c +++ b/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c @@ -11,5 +11,5 @@ foo (int a, int b) return 0; } -/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, #4" 1 } } */ +/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, #4" 1 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve_arith_1.c index b3c4cb9d8a7..1a61d6a7f40 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_arith_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_arith_1.c @@ -1,40 +1,41 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> #define DO_REGREG_OPS(TYPE, OP, NAME) \ -void varith_##TYPE##_##NAME (TYPE* dst, TYPE* src, int count) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = dst[i] OP src[i]; \ } - #define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ -void varithimm_##NAME##_##TYPE (TYPE* dst, int count) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = dst[i] OP VALUE; \ } #define DO_ARITH_OPS(TYPE, OP, NAME) \ -DO_REGREG_OPS (TYPE, OP, NAME); \ -DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0); \ -DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5); \ -DO_IMMEDIATE_OPS (255, TYPE, OP, NAME ## 255); \ -DO_IMMEDIATE_OPS (256, TYPE, OP, NAME ## 256); \ -DO_IMMEDIATE_OPS (257, TYPE, OP, NAME ## 257); \ -DO_IMMEDIATE_OPS (65280, TYPE, OP, NAME ## 65280); \ -DO_IMMEDIATE_OPS (65281, TYPE, OP, NAME ## 65281); \ -DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0); \ + DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5); \ + DO_IMMEDIATE_OPS (255, TYPE, OP, NAME ## 255); \ + DO_IMMEDIATE_OPS (256, TYPE, OP, NAME ## 256); \ + DO_IMMEDIATE_OPS (257, TYPE, OP, NAME ## 257); \ + DO_IMMEDIATE_OPS (65280, TYPE, OP, NAME ## 65280); \ + DO_IMMEDIATE_OPS (65281, TYPE, OP, NAME ## 65281); \ + DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); -DO_ARITH_OPS (char, +, add) -DO_ARITH_OPS (short, +, add) -DO_ARITH_OPS (int, +, add) -DO_ARITH_OPS (long, +, add) -DO_ARITH_OPS (char, -, minus) -DO_ARITH_OPS (short, -, minus) -DO_ARITH_OPS (int, -, minus) -DO_ARITH_OPS (long, -, minus) +DO_ARITH_OPS (int8_t, +, add) +DO_ARITH_OPS (int16_t, +, add) +DO_ARITH_OPS (int32_t, +, add) +DO_ARITH_OPS (int64_t, +, add) +DO_ARITH_OPS (int8_t, -, minus) +DO_ARITH_OPS (int16_t, -, minus) +DO_ARITH_OPS (int32_t, -, minus) +DO_ARITH_OPS (int64_t, -, minus) /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ @@ -47,20 +48,21 @@ DO_ARITH_OPS (long, -, minus) /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 4 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ -/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #251\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #255\n} 4 } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #65280\n} } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */ -/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 4 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 1 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #256\n} 2 } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */ -/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} 2 } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */ /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 1 } } */ @@ -83,8 +85,8 @@ DO_ARITH_OPS (long, -, minus) /* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #5\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */ @@ -94,12 +96,11 @@ DO_ARITH_OPS (long, -, minus) /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #256\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */ -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 1 } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ @@ -118,4 +119,3 @@ DO_ARITH_OPS (long, -, minus) /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #65281\n} } } */ /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 1 } } */ - diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1.c index d97a501512b..86d3930e476 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1.c @@ -1,17 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ -void vcvtf_32 (float *dst, signed int *src1, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vcvtf_16 (_Float16 *dst, int16_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (_Float16) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vcvtf_32 (float *dst, int32_t *src1, int size) { for (int i = 0; i < size; i++) dst[i] = (float) src1[i]; } -void vcvtf_64 (double *dst, signed long *src1, int size) +void __attribute__ ((noinline, noclone)) +vcvtf_64 (double *dst, int64_t *src1, int size) { for (int i = 0; i < size; i++) dst[i] = (double) src1[i]; } +/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1_run.c index b0aa05c055e..9b431ad0ed4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1_run.c @@ -1,47 +1,47 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_cvtf_signed_1.c" #define ARRAY_SIZE 47 -#define VAL1 ((i * 3) - (15 * 3)) -#define VAL2 ((i * 0xffdfffef) - (11 * 0xffdfffef)) +#define VAL1 (i ^ 3) +#define VAL2 ((i * 3) - (15 * 3)) +#define VAL3 ((i * 0xffdfffef) - (11 * 0xffdfffef)) int __attribute__ ((optimize (1))) main (void) { - static float array_destf[ARRAY_SIZE]; - static double array_destd[ARRAY_SIZE]; + static _Float16 array_dest16[ARRAY_SIZE]; + static float array_dest32[ARRAY_SIZE]; + static double array_dest64[ARRAY_SIZE]; - signed int array_source_i[ARRAY_SIZE]; - signed long array_source_l[ARRAY_SIZE]; + int16_t array_source16[ARRAY_SIZE]; + int32_t array_source32[ARRAY_SIZE]; + int64_t array_source64[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) { - array_source_i[i] = VAL1; - array_source_l[i] = VAL2; + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); } - vcvtf_32 (array_destf, array_source_i, ARRAY_SIZE); + vcvtf_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (_Float16) VAL1) + __builtin_abort (); + + vcvtf_32 (array_dest32, array_source32, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_destf[i] != (float) VAL1) - { - fprintf (stderr,"%d: %f != %f\n", i, array_destf[i], (float) VAL1); - exit (1); - } + if (array_dest32[i] != (float) VAL2) + __builtin_abort (); - vcvtf_64 (array_destd, array_source_l, ARRAY_SIZE); + vcvtf_64 (array_dest64, array_source64, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_destd[i] != (double) VAL2) - { - fprintf (stderr,"%d: %lf != %f\n", i, array_destd[i], (double) VAL2); - exit (1); - } + if (array_dest64[i] != (double) VAL3) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1.c index bd8cf6f6cf5..0605307d1e3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1.c @@ -1,17 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ -void vcvtf_32 (float *dst, unsigned int *src1, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vcvtf_16 (_Float16 *dst, uint16_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (_Float16) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vcvtf_32 (float *dst, uint32_t *src1, int size) { for (int i = 0; i < size; i++) dst[i] = (float) src1[i]; } -void vcvtf_64 (double *dst, unsigned long *src1, int size) +void __attribute__ ((noinline, noclone)) +vcvtf_64 (double *dst, uint64_t *src1, int size) { for (int i = 0; i < size; i++) dst[i] = (double) src1[i]; } +/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1_run.c index 5b9291ca2c2..a4434cbf478 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1_run.c @@ -1,47 +1,47 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_cvtf_unsigned_1.c" #define ARRAY_SIZE 65 -#define VAL1 (i * 9456) -#define VAL2 (i * 0xfddff13f) +#define VAL1 (i * 109) +#define VAL2 (i * 9456) +#define VAL3 (i * 0xfddff13f) int __attribute__ ((optimize (1))) main (void) { - static float array_destf[ARRAY_SIZE]; - static double array_destd[ARRAY_SIZE]; + static _Float16 array_dest16[ARRAY_SIZE]; + static float array_dest32[ARRAY_SIZE]; + static double array_dest64[ARRAY_SIZE]; - unsigned int array_source_i[ARRAY_SIZE]; - unsigned long array_source_l[ARRAY_SIZE]; + uint16_t array_source16[ARRAY_SIZE]; + uint32_t array_source32[ARRAY_SIZE]; + uint64_t array_source64[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) { - array_source_i[i] = VAL1; - array_source_l[i] = VAL2; + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); } - vcvtf_32 (array_destf, array_source_i, ARRAY_SIZE); + vcvtf_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (_Float16) VAL1) + __builtin_abort (); + + vcvtf_32 (array_dest32, array_source32, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_destf[i] != (float) VAL1) - { - fprintf (stderr,"%d: %f != %f\n", i, array_destf[i], (float) VAL1); - exit (1); - } + if (array_dest32[i] != (float) VAL2) + __builtin_abort (); - vcvtf_64 (array_destd, array_source_l, ARRAY_SIZE); + vcvtf_64 (array_dest64, array_source64, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_destd[i] != (double) VAL2) - { - fprintf (stderr,"%d: %lf != %f\n", i, array_destd[i], (double) VAL2); - exit (1); - } + if (array_dest64[i] != (double) VAL3) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1.C b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1.c index 1f7d8a4a9ba..9fed379607b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1.c @@ -1,12 +1,14 @@ /* { dg-do compile } */ -/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ +/* -fno-tree-loop-distribute-patterns prevents conversion to memset. */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ #include <stdint.h> #define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) -#define DEF_SET_IMM(TYPE,IMM,SUFFIX) \ -void set_##TYPE##SUFFIX (TYPE *__restrict__ a) \ +#define DEF_SET_IMM(TYPE, IMM, SUFFIX) \ +void __attribute__ ((noinline, noclone)) \ +set_##TYPE##_##SUFFIX (TYPE *a) \ { \ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ a[i] = IMM; \ @@ -93,7 +95,7 @@ DEF_SET_IMM (int64_t, 0xFE00FE00FE00FE00LL, imm_FE00_pat) // shouldn't assert! DEF_SET_IMM (int32_t, 129, imm_m129) DEF_SET_IMM (int32_t, 32513, imm_32513) -DEF_SET_IMM (int32_t, -32767, imm_m32767) +DEF_SET_IMM (int32_t, -32763, imm_m32763) /* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #-1\n} } } */ @@ -130,3 +132,7 @@ DEF_SET_IMM (int32_t, -32767, imm_m32767) /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #-2\n} 2 } } */ /* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #-512\n} } } */ + +/* { dg-final { scan-assembler-not {#129\n} } } */ +/* { dg-final { scan-assembler-not {#32513\n} } } */ +/* { dg-final { scan-assembler-not {#-32763\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1_run.c index cbc16e8e2bb..237f44947ab 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1_run.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1_run.c @@ -1,23 +1,20 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ -#include "sve_dup_imm_1.C" +#include "sve_dup_imm_1.c" -#include <stdlib.h> - -#define TEST_SET_IMM(TYPE,IMM,SUFFIX) \ +#define TEST_SET_IMM(TYPE, IMM, SUFFIX) \ { \ TYPE v[NUM_ELEMS (TYPE)]; \ - set_##TYPE##SUFFIX (v); \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ - if (v[i] != IMM) \ - result++; \ + set_##TYPE##_##SUFFIX (v); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (v[i] != (TYPE) IMM) \ + __builtin_abort (); \ } -int main (int argc, char **argv) +int __attribute__ ((optimize (1))) +main (int argc, char **argv) { - int result = 0; - TEST_SET_IMM (int8_t, 0, imm_0) TEST_SET_IMM (int16_t, 0, imm_0) TEST_SET_IMM (int32_t, 0, imm_0) @@ -62,15 +59,12 @@ int main (int argc, char **argv) TEST_SET_IMM (int32_t, 0x00010001, imm_0001_pat) TEST_SET_IMM (int64_t, 0x0001000100010001LL, imm_0001_pat) - TEST_SET_IMM (int16_t, int16_t (0xFEFE), imm_FE_pat) - TEST_SET_IMM (int32_t, int32_t (0xFEFEFEFE), imm_FE_pat) - TEST_SET_IMM (int64_t, int64_t (0xFEFEFEFEFEFEFEFE), imm_FE_pat) - - TEST_SET_IMM (int32_t, int32_t (0xFE00FE00), imm_FE00_pat) - TEST_SET_IMM (int64_t, int64_t (0xFE00FE00FE00FE00), imm_FE00_pat) + TEST_SET_IMM (int16_t, 0xFEFE, imm_FE_pat) + TEST_SET_IMM (int32_t, 0xFEFEFEFE, imm_FE_pat) + TEST_SET_IMM (int64_t, 0xFEFEFEFEFEFEFEFE, imm_FE_pat) - if (result != 0) - abort (); + TEST_SET_IMM (int32_t, 0xFE00FE00, imm_FE00_pat) + TEST_SET_IMM (int64_t, 0xFE00FE00FE00FE00, imm_FE00_pat) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c index d4de247b05e..ea977207226 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c @@ -1,12 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_2(X) X, X #define MASK_4(X) MASK_2 (X), MASK_2 (X) @@ -44,7 +47,10 @@ typedef float v8sf __attribute__((vector_size (32))); T (v4df, 4, 3) \ T (v8sf, 8, 0) \ T (v8sf, 8, 5) \ - T (v8sf, 8, 7) + T (v8sf, 8, 7) \ + T (v16hf, 16, 0) \ + T (v16hf, 16, 6) \ + T (v16hf, 16, 15) \ TEST_ALL (DUP_LANE) @@ -56,9 +62,9 @@ TEST_ALL (DUP_LANE) /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[0\]} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[5\]} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[7\]} 2 } } */ -/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[0\]} 1 } } */ -/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[6\]} 1 } } */ -/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[15\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[0\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[6\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[15\]} 2 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[0\]} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[19\]} 1 } } */ /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[31\]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c index 3056c60eee7..1ec51aa2eaf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c @@ -1,12 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_2(X) X, X + 1 #define MASK_4(X) MASK_2 (X), MASK_2 (X + 2) @@ -44,21 +47,24 @@ typedef float v8sf __attribute__((vector_size (32))); T (v4df, 4, 3) \ T (v8sf, 8, 1) \ T (v8sf, 8, 5) \ - T (v8sf, 8, 7) + T (v8sf, 8, 7) \ + T (v16hf, 16, 1) \ + T (v16hf, 16, 6) \ + T (v16hf, 16, 15) \ TEST_ALL (DUP_LANE) /* { dg-final { scan-assembler-not {\ttbl\t} } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #1\n} 1 } } */ -/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #4\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #8\n} 2 } } */ -/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #12\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #12\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #16\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #19\n} 1 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #20\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #24\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #28\n} 2 } } */ -/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #30\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #30\n} 2 } } */ /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #31\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c new file mode 100644 index 00000000000..1ba277ffa6d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c @@ -0,0 +1,93 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v4di, 0) \ + T (int64_t, v4di, 1) \ + T (int64_t, v4di, 2) \ + T (int64_t, v4di, 3) \ + T (int32_t, v8si, 0) \ + T (int32_t, v8si, 1) \ + T (int32_t, v8si, 3) \ + T (int32_t, v8si, 4) \ + T (int32_t, v8si, 7) \ + T (int16_t, v16hi, 0) \ + T (int16_t, v16hi, 1) \ + T (int16_t, v16hi, 7) \ + T (int16_t, v16hi, 8) \ + T (int16_t, v16hi, 15) \ + T (int8_t, v32qi, 0) \ + T (int8_t, v32qi, 1) \ + T (int8_t, v32qi, 15) \ + T (int8_t, v32qi, 16) \ + T (int8_t, v32qi, 31) \ + T (double, v4df, 0) \ + T (double, v4df, 1) \ + T (double, v4df, 2) \ + T (double, v4df, 3) \ + T (float, v8sf, 0) \ + T (float, v8sf, 1) \ + T (float, v8sf, 3) \ + T (float, v8sf, 4) \ + T (float, v8sf, 7) \ + T (_Float16, v16hf, 0) \ + T (_Float16, v16hf, 1) \ + T (_Float16, v16hf, 7) \ + T (_Float16, v16hf, 8) \ + T (_Float16, v16hf, 15) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c new file mode 100644 index 00000000000..b163f28ef28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c @@ -0,0 +1,93 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=512 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v8di __attribute__((vector_size (64))); +typedef int32_t v16si __attribute__((vector_size (64))); +typedef int16_t v32hi __attribute__((vector_size (64))); +typedef int8_t v64qi __attribute__((vector_size (64))); +typedef double v8df __attribute__((vector_size (64))); +typedef float v16sf __attribute__((vector_size (64))); +typedef _Float16 v32hf __attribute__((vector_size (64))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v8di, 0) \ + T (int64_t, v8di, 1) \ + T (int64_t, v8di, 2) \ + T (int64_t, v8di, 7) \ + T (int32_t, v16si, 0) \ + T (int32_t, v16si, 1) \ + T (int32_t, v16si, 3) \ + T (int32_t, v16si, 4) \ + T (int32_t, v16si, 15) \ + T (int16_t, v32hi, 0) \ + T (int16_t, v32hi, 1) \ + T (int16_t, v32hi, 7) \ + T (int16_t, v32hi, 8) \ + T (int16_t, v32hi, 31) \ + T (int8_t, v64qi, 0) \ + T (int8_t, v64qi, 1) \ + T (int8_t, v64qi, 15) \ + T (int8_t, v64qi, 16) \ + T (int8_t, v64qi, 63) \ + T (double, v8df, 0) \ + T (double, v8df, 1) \ + T (double, v8df, 2) \ + T (double, v8df, 7) \ + T (float, v16sf, 0) \ + T (float, v16sf, 1) \ + T (float, v16sf, 3) \ + T (float, v16sf, 4) \ + T (float, v16sf, 15) \ + T (_Float16, v32hf, 0) \ + T (_Float16, v32hf, 1) \ + T (_Float16, v32hf, 7) \ + T (_Float16, v32hf, 8) \ + T (_Float16, v32hf, 31) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c new file mode 100644 index 00000000000..87ac2351768 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c @@ -0,0 +1,124 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=1024 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v16di __attribute__((vector_size (128))); +typedef int32_t v32si __attribute__((vector_size (128))); +typedef int16_t v64hi __attribute__((vector_size (128))); +typedef int8_t v128qi __attribute__((vector_size (128))); +typedef double v16df __attribute__((vector_size (128))); +typedef float v32sf __attribute__((vector_size (128))); +typedef _Float16 v64hf __attribute__((vector_size (128))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v16di, 0) \ + T (int64_t, v16di, 1) \ + T (int64_t, v16di, 2) \ + T (int64_t, v16di, 7) \ + T (int64_t, v16di, 8) \ + T (int64_t, v16di, 9) \ + T (int64_t, v16di, 15) \ + T (int32_t, v32si, 0) \ + T (int32_t, v32si, 1) \ + T (int32_t, v32si, 3) \ + T (int32_t, v32si, 4) \ + T (int32_t, v32si, 15) \ + T (int32_t, v32si, 16) \ + T (int32_t, v32si, 21) \ + T (int32_t, v32si, 31) \ + T (int16_t, v64hi, 0) \ + T (int16_t, v64hi, 1) \ + T (int16_t, v64hi, 7) \ + T (int16_t, v64hi, 8) \ + T (int16_t, v64hi, 31) \ + T (int16_t, v64hi, 32) \ + T (int16_t, v64hi, 47) \ + T (int16_t, v64hi, 63) \ + T (int8_t, v128qi, 0) \ + T (int8_t, v128qi, 1) \ + T (int8_t, v128qi, 15) \ + T (int8_t, v128qi, 16) \ + T (int8_t, v128qi, 63) \ + T (int8_t, v128qi, 64) \ + T (int8_t, v128qi, 100) \ + T (int8_t, v128qi, 127) \ + T (double, v16df, 0) \ + T (double, v16df, 1) \ + T (double, v16df, 2) \ + T (double, v16df, 7) \ + T (double, v16df, 8) \ + T (double, v16df, 9) \ + T (double, v16df, 15) \ + T (float, v32sf, 0) \ + T (float, v32sf, 1) \ + T (float, v32sf, 3) \ + T (float, v32sf, 4) \ + T (float, v32sf, 15) \ + T (float, v32sf, 16) \ + T (float, v32sf, 21) \ + T (float, v32sf, 31) \ + T (_Float16, v64hf, 0) \ + T (_Float16, v64hf, 1) \ + T (_Float16, v64hf, 7) \ + T (_Float16, v64hf, 8) \ + T (_Float16, v64hf, 31) \ + T (_Float16, v64hf, 32) \ + T (_Float16, v64hf, 47) \ + T (_Float16, v64hf, 63) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_4.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_4.c new file mode 100644 index 00000000000..e61a2fa94e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_4.c @@ -0,0 +1,135 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=2048 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v32di __attribute__((vector_size (256))); +typedef int32_t v64si __attribute__((vector_size (256))); +typedef int16_t v128hi __attribute__((vector_size (256))); +typedef int8_t v256qi __attribute__((vector_size (256))); +typedef double v32df __attribute__((vector_size (256))); +typedef float v64sf __attribute__((vector_size (256))); +typedef _Float16 v128hf __attribute__((vector_size (256))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v32di, 0) \ + T (int64_t, v32di, 1) \ + T (int64_t, v32di, 2) \ + T (int64_t, v32di, 7) \ + T (int64_t, v32di, 8) \ + T (int64_t, v32di, 9) \ + T (int64_t, v32di, 15) \ + T (int64_t, v32di, 31) \ + T (int32_t, v64si, 0) \ + T (int32_t, v64si, 1) \ + T (int32_t, v64si, 3) \ + T (int32_t, v64si, 4) \ + T (int32_t, v64si, 15) \ + T (int32_t, v64si, 16) \ + T (int32_t, v64si, 21) \ + T (int32_t, v64si, 31) \ + T (int32_t, v64si, 63) \ + T (int16_t, v128hi, 0) \ + T (int16_t, v128hi, 1) \ + T (int16_t, v128hi, 7) \ + T (int16_t, v128hi, 8) \ + T (int16_t, v128hi, 31) \ + T (int16_t, v128hi, 32) \ + T (int16_t, v128hi, 47) \ + T (int16_t, v128hi, 63) \ + T (int16_t, v128hi, 127) \ + T (int8_t, v256qi, 0) \ + T (int8_t, v256qi, 1) \ + T (int8_t, v256qi, 15) \ + T (int8_t, v256qi, 16) \ + T (int8_t, v256qi, 63) \ + T (int8_t, v256qi, 64) \ + T (int8_t, v256qi, 100) \ + T (int8_t, v256qi, 127) \ + T (int8_t, v256qi, 255) \ + T (double, v32df, 0) \ + T (double, v32df, 1) \ + T (double, v32df, 2) \ + T (double, v32df, 7) \ + T (double, v32df, 8) \ + T (double, v32df, 9) \ + T (double, v32df, 15) \ + T (double, v32df, 31) \ + T (float, v64sf, 0) \ + T (float, v64sf, 1) \ + T (float, v64sf, 3) \ + T (float, v64sf, 4) \ + T (float, v64sf, 15) \ + T (float, v64sf, 16) \ + T (float, v64sf, 21) \ + T (float, v64sf, 31) \ + T (float, v64sf, 63) \ + T (_Float16, v128hf, 0) \ + T (_Float16, v128hf, 1) \ + T (_Float16, v128hf, 7) \ + T (_Float16, v128hf, 8) \ + T (_Float16, v128hf, 31) \ + T (_Float16, v128hf, 32) \ + T (_Float16, v128hf, 47) \ + T (_Float16, v128hf, 63) \ + T (_Float16, v128hf, 127) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #120\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #124\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #127\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fabs_1.c index 61ec667363a..33e1db5d1df 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fabs_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fabs_1.c @@ -9,9 +9,10 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - +DO_OPS (_Float16, fabsf) DO_OPS (float, fabsf) DO_OPS (double, fabs) +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1.c index 8fd41db0a1f..7c5f6ddc996 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1.c @@ -1,17 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ -void vfcvtz_32 (signed int *dst, float *src1, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vfcvtz_16 (int16_t *dst, _Float16 *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (int16_t) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vfcvtz_32 (int32_t *dst, float *src1, int size) { for (int i = 0; i < size; i++) - dst[i] = (signed int) src1[i]; + dst[i] = (int32_t) src1[i]; } -void vfcvtz_64 (signed long *dst, double *src1, int size) +void __attribute__ ((noinline, noclone)) +vfcvtz_64 (int64_t *dst, double *src1, int size) { for (int i = 0; i < size; i++) - dst[i] = (signed long) src1[i]; + dst[i] = (int64_t) src1[i]; } +/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1_run.c index 58ae7737a89..48968f8ce19 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1_run.c @@ -1,47 +1,47 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O3 -march=armv8-a+sve" } */ #include "sve_fcvtz_signed_1.c" #define ARRAY_SIZE 81 -#define VAL1 ((i * 237.86) - (29 * 237.86)) -#define VAL2 ((double) ((i * 0xf8dfef2f) - (11 * 0xf8dfef2f))) +#define VAL1 ((i * 17) - 180) +#define VAL2 ((i * 237.86) - (29 * 237.86)) +#define VAL3 ((double) ((i * 0xf8dfef2f) - (11 * 0xf8dfef2f))) int __attribute__ ((optimize (1))) main (void) { - static signed int array_desti[ARRAY_SIZE]; - static signed long array_destl[ARRAY_SIZE]; + static int16_t array_dest16[ARRAY_SIZE]; + static int32_t array_dest32[ARRAY_SIZE]; + static int64_t array_dest64[ARRAY_SIZE]; - float array_source_f[ARRAY_SIZE]; - double array_source_d[ARRAY_SIZE]; + _Float16 array_source16[ARRAY_SIZE]; + float array_source32[ARRAY_SIZE]; + double array_source64[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) { - array_source_f[i] = VAL1; - array_source_d[i] = VAL2; + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); } - vfcvtz_32 (array_desti, array_source_f, ARRAY_SIZE); + vfcvtz_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (int16_t) VAL1) + __builtin_abort (); + + vfcvtz_32 (array_dest32, array_source32, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_desti[i] != (int) VAL1) - { - fprintf (stderr,"%d: %d != %d\n", i, array_desti[i], (int) VAL1); - exit (1); - } + if (array_dest32[i] != (int32_t) VAL2) + __builtin_abort (); - vfcvtz_64 (array_destl, array_source_d, ARRAY_SIZE); + vfcvtz_64 (array_dest64, array_source64, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_destl[i] != (long) VAL2) - { - fprintf (stderr,"%d: %ld != %ld\n", i, array_destl[i], (long) VAL2); - exit (1); - } + if (array_dest64[i] != (int64_t) VAL3) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1.c index b4dcd26cfd0..2691cf0bc17 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1.c @@ -1,17 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ -void vfcvtz_32 (unsigned int *dst, float *src1, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vfcvtz_16 (uint16_t *dst, _Float16 *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (uint16_t) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vfcvtz_32 (uint32_t *dst, float *src1, int size) { for (int i = 0; i < size; i++) - dst[i] = (unsigned int) src1[i]; + dst[i] = (uint32_t) src1[i]; } -void vfcvtz_64 (unsigned long *dst, double *src1, int size) +void __attribute__ ((noinline, noclone)) +vfcvtz_64 (uint64_t *dst, double *src1, int size) { for (int i = 0; i < size; i++) - dst[i] = (unsigned long) src1[i]; + dst[i] = (uint64_t) src1[i]; } +/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1_run.c index e196d174c66..9c1be7c8a6f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1_run.c @@ -1,47 +1,47 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_fcvtz_unsigned_1.c" #define ARRAY_SIZE 75 -#define VAL1 (i * 2574.33) -#define VAL2 ((double) (i * 0xff23efef)) +#define VAL1 (i * 19) +#define VAL2 (i * 2574.33) +#define VAL3 ((double) (i * 0xff23efef)) int __attribute__ ((optimize (1))) main (void) { - static unsigned int array_desti[ARRAY_SIZE]; - static unsigned long array_destl[ARRAY_SIZE]; + static uint16_t array_dest16[ARRAY_SIZE]; + static uint32_t array_dest32[ARRAY_SIZE]; + static uint64_t array_dest64[ARRAY_SIZE]; - float array_source_f[ARRAY_SIZE]; - double array_source_d[ARRAY_SIZE]; + _Float16 array_source16[ARRAY_SIZE]; + float array_source32[ARRAY_SIZE]; + double array_source64[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) { - array_source_f[i] = VAL1; - array_source_d[i] = VAL2; + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); } - vfcvtz_32 (array_desti, array_source_f, ARRAY_SIZE); + vfcvtz_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (uint16_t) VAL1) + __builtin_abort (); + + vfcvtz_32 (array_dest32, array_source32, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_desti[i] != (int) VAL1) - { - fprintf (stderr,"%d: %d != %d\n", i, array_desti[i], (int) VAL1); - exit (1); - } + if (array_dest32[i] != (uint32_t) VAL2) + __builtin_abort (); - vfcvtz_64 (array_destl, array_source_d, ARRAY_SIZE); + vfcvtz_64 (array_dest64, array_source64, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_destl[i] != (long) VAL2) - { - fprintf (stderr,"%d: %ld != %ld\n", i, array_destl[i], (long) VAL2); - exit (1); - } + if (array_dest64[i] != (uint64_t) VAL3) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c index d0becaf25f1..b193726ea0a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c @@ -1,30 +1,41 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vdiv##TYPE (TYPE *dst, TYPE src1) \ -{ \ - *dst = *dst / src1; \ -} \ -void vdivr##TYPE (TYPE *_dst, TYPE _src1) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - dst = src1 / dst; \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vdiv_##TYPE (TYPE *x, TYPE y) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src asm("z2"); \ + dst = *x; \ + src = y; \ + asm volatile ("" :: "w" (dst), "w" (src)); \ + dst = dst / src; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} \ +void vdivr_##TYPE (TYPE *x, TYPE y) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src asm("z2"); \ + dst = *x; \ + src = y; \ + asm volatile ("" :: "w" (dst), "w" (src)); \ + dst = src / dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdup_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1.c index 9ed825b9d35..148e0f9bd89 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fdup_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1.c @@ -1,22 +1,24 @@ -/* { dg-do compile } */ -/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* -fno-tree-loop-distribute-patterns prevents conversion to memset. */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns --save-temps" } */ #include <stdint.h> #define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) -#define DEF_SET_IMM(TYPE,IMM,SUFFIX) \ -void set_##TYPE##SUFFIX (TYPE *restrict a) \ +#define DEF_SET_IMM(TYPE, IMM, SUFFIX) \ +void __attribute__ ((noinline, noclone)) \ +set_##TYPE##_##SUFFIX (TYPE *a) \ { \ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ a[i] = IMM; \ } #define DEF_SET_IMM_FP(IMM, SUFFIX) \ -DEF_SET_IMM (float, IMM, SUFFIX) \ -DEF_SET_IMM (double, IMM, SUFFIX) + DEF_SET_IMM (float, IMM, SUFFIX) \ + DEF_SET_IMM (double, IMM, SUFFIX) -//Valid +/* Valid. */ DEF_SET_IMM_FP (1, imm1) DEF_SET_IMM_FP (0x1.1p0, imm1p0) DEF_SET_IMM_FP (0x1.fp0, immfp0) @@ -25,8 +27,10 @@ DEF_SET_IMM_FP (0x1.1p-3, imm1pm3) DEF_SET_IMM_FP (0x1.fp4, immfp4) DEF_SET_IMM_FP (0x1.fp-3, immfpm3) -//Invalid +/* Should use MOV instead. */ DEF_SET_IMM_FP (0, imm0) + +/* Invalid. */ DEF_SET_IMM_FP (0x1.1fp0, imm1fp0) DEF_SET_IMM_FP (0x1.1p5, imm1p5) DEF_SET_IMM_FP (0x1.1p-4, imm1pm4) @@ -43,6 +47,8 @@ DEF_SET_IMM_FP (0x1.1fp-4, imm1fpm4) /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #3.1e\+1\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2.421875e-1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #0\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d,} 7 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.0e\+0\n} 1 } } */ @@ -52,3 +58,5 @@ DEF_SET_IMM_FP (0x1.1fp-4, imm1fpm4) /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.328125e-1\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3.1e\+1\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2.421875e-1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #0\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdup_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1_run.c index bcd5180bbc6..f4cb1a0bf71 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fdup_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1_run.c @@ -1,28 +1,24 @@ /* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ #include "sve_fdup_1.c" -#include <stdlib.h> - #define TEST_SET_IMM(TYPE,IMM,SUFFIX) \ { \ TYPE v[NUM_ELEMS (TYPE)]; \ - set_##TYPE##SUFFIX (v); \ + set_##TYPE##_##SUFFIX (v); \ for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ if (v[i] != IMM) \ - result++; \ + __builtin_abort (); \ } #define TEST_SET_IMM_FP(IMM, SUFFIX) \ -TEST_SET_IMM (float, IMM, SUFFIX) \ -TEST_SET_IMM (double, IMM, SUFFIX) - + TEST_SET_IMM (float, IMM, SUFFIX) \ + TEST_SET_IMM (double, IMM, SUFFIX) -int main (int argc, char **argv) +int __attribute__ ((optimize (1))) +main (int argc, char **argv) { - int result = 0; - TEST_SET_IMM_FP (1, imm1) TEST_SET_IMM_FP (0x1.1p0, imm1p0) TEST_SET_IMM_FP (0x1.fp0, immfp0) @@ -38,8 +34,5 @@ int main (int argc, char **argv) TEST_SET_IMM_FP (0x1.1fp5, imm1fp5) TEST_SET_IMM_FP (0x1.1fp-4, imm1fpm4) - if (result != 0) - abort (); - return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c index 75e39c4e3e4..2b1dbb087bc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vmad##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (dst * src1) + src2; \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (dst * src1) + src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfmad\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmad\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c index 657773eada1..d5e4df266bf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vmla##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (src1 * src2) + dst; \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (src1 * src2) + dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfmla\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmla\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c index 5aca3b145a9..c3f2c8a5823 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vmls##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (-src1 * src2) + dst; \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-src1 * src2) + dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfmls\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmls\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c index 5f4143fc5da..30e1895c8d5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vmsb##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (-dst * src1) + src2; \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-dst * src1) + src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfmsb\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmsb\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmul_1.c index f4fb574beac..3b648297963 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fmul_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmul_1.c @@ -1,31 +1,38 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ #define DO_REGREG_OPS(TYPE, OP, NAME) \ -void varith_##TYPE##_##NAME (TYPE* dst, TYPE* src, int count) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = dst[i] OP src[i]; \ } #define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ -void varithimm_##NAME##_##TYPE (TYPE* dst, int count) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ { \ for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] OP VALUE; \ + dst[i] = dst[i] OP (TYPE) VALUE; \ } #define DO_ARITH_OPS(TYPE, OP, NAME) \ -DO_REGREG_OPS (TYPE, OP, NAME); \ -DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## 0point5); \ -DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2); \ -DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5); \ -DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minus0point5); \ -DO_IMMEDIATE_OPS (-2, TYPE, OP, NAME ## minus2); + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## 0point5); \ + DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2); \ + DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5); \ + DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minus0point5); \ + DO_IMMEDIATE_OPS (-2, TYPE, OP, NAME ## minus2); +DO_ARITH_OPS (_Float16, *, mul) DO_ARITH_OPS (float, *, mul) DO_ARITH_OPS (double, *, mul) +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #5} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */ + /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ /* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fneg_1.c index ddb703bf875..7af81662fb9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fneg_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fneg_1.c @@ -1,15 +1,17 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ #define DO_OPS(TYPE) \ -void vneg_##TYPE (TYPE* dst, TYPE* src, int count) \ +void vneg_##TYPE (TYPE *dst, TYPE *src, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = -src[i]; \ } +DO_OPS (_Float16) DO_OPS (float) DO_OPS (double) +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c index 877261029db..84a95187314 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vfnmad##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (-src2) + (-dst * src1); \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-dst * src1) - src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfnmad\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfnmad\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c index 463c90550d6..dcc4811f1d8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vfnmla##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (-dst) + (-src1 * src2); \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-src1 * src2) - dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfnmla\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfnmla\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c index 312600fea20..7a89399f4be 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vfnmls##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (-dst) + (src1 * src2); \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (src1 * src2) - dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfnmls\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfnmls\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c index 71e36b0028f..6c95b0abc8e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c @@ -1,28 +1,29 @@ /* { dg-do assemble } */ /* { dg-options " -O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ +typedef _Float16 v16hf __attribute__((vector_size(32))); typedef float v8sf __attribute__((vector_size(32))); typedef double v4df __attribute__((vector_size(32))); -#define DO_OP(TYPE) \ -void vfnmsb##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (-src2) + (dst * src1); \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (dst * src1) - src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } +DO_OP (v16hf) DO_OP (v8sf) DO_OP (v4df) -/* { dg-final { scan-assembler-times {\tfnmsb\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfnmsb\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fp_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fp_arith_1.c index a4c09074d43..06fea806038 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fp_arith_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fp_arith_1.c @@ -1,36 +1,51 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ #define DO_REGREG_OPS(TYPE, OP, NAME) \ -void varith_##TYPE##_##NAME (TYPE* dst, TYPE* src, int count) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = dst[i] OP src[i]; \ } #define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ - void varithimm_##NAME##_##TYPE (TYPE* dst, int count) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ { \ for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] OP VALUE; \ + dst[i] = dst[i] OP (TYPE) VALUE; \ } #define DO_ARITH_OPS(TYPE, OP, NAME) \ -DO_REGREG_OPS (TYPE, OP, NAME); \ -DO_IMMEDIATE_OPS (1, TYPE, OP, NAME ## 1); \ -DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## pointfive); \ -DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2); \ -DO_IMMEDIATE_OPS (2.5, TYPE, OP, NAME ## twopoint5); \ -DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minuspointfive); \ -DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (1, TYPE, OP, NAME ## 1); \ + DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## pointfive); \ + DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2); \ + DO_IMMEDIATE_OPS (2.5, TYPE, OP, NAME ## twopoint5); \ + DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minuspointfive); \ + DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); +DO_ARITH_OPS (_Float16, +, add) DO_ARITH_OPS (float, +, add) DO_ARITH_OPS (double, +, add) + +DO_ARITH_OPS (_Float16, -, minus) DO_ARITH_OPS (float, -, minus) DO_ARITH_OPS (double, -, minus) /* No specific count because it's valid to use fadd or fsub for the out-of-range constants. */ +/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */ + +/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */ + /* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frinta_1.c index 37f26fc8203..bad2be4ed33 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_frinta_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_frinta_1.c @@ -9,7 +9,6 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - DO_OPS (float, roundf) DO_OPS (double, round) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frinti_1.c index 9faf2a3f81b..4407fb56caa 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_frinti_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_frinti_1.c @@ -9,7 +9,6 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - DO_OPS (float, nearbyintf) DO_OPS (double, nearbyint) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintm_1.c index b59d21ff0c7..01bf65db343 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_frintm_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintm_1.c @@ -9,7 +9,6 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - DO_OPS (float, floorf) DO_OPS (double, floor) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintp_1.c index d9a55e3ade5..f8b2c08ac63 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_frintp_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintp_1.c @@ -9,7 +9,6 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - DO_OPS (float, ceilf) DO_OPS (double, ceil) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintx_1.c index 012d9cb9de5..a062295011a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_frintx_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintx_1.c @@ -9,7 +9,6 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - DO_OPS (float, rintf) DO_OPS (double, rint) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintz_1.c index 2ae8f0026a7..207814f5506 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_frintz_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintz_1.c @@ -9,7 +9,6 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - DO_OPS (float, truncf) DO_OPS (double, trunc) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fsqrt_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fsqrt_1.c index 224c6ccfe6f..55081c3bf4f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fsqrt_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fsqrt_1.c @@ -9,7 +9,6 @@ vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ dst[i] = __builtin_##OP (src[i]); \ } - DO_OPS (float, sqrtf) DO_OPS (double, sqrt) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fsubr_1.c index e664bf38c29..b252ef059ce 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_fsubr_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_fsubr_1.c @@ -1,23 +1,30 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ #define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ -void vsubrarithimm_##NAME##_##TYPE (TYPE* dst, int count) \ +void vsubrarithimm_##NAME##_##TYPE (TYPE *dst, int count) \ { \ for (int i = 0; i < count; ++i) \ - dst[i] = VALUE - dst[i]; \ + dst[i] = (TYPE) VALUE - dst[i]; \ } #define DO_ARITH_OPS(TYPE) \ -DO_IMMEDIATE_OPS (0, TYPE, 0); \ -DO_IMMEDIATE_OPS (1, TYPE, 1); \ -DO_IMMEDIATE_OPS (0.5, TYPE, 0point5); \ -DO_IMMEDIATE_OPS (2, TYPE, 2); \ -DO_IMMEDIATE_OPS (3.5, TYPE, 3point5); + DO_IMMEDIATE_OPS (0, TYPE, 0); \ + DO_IMMEDIATE_OPS (1, TYPE, 1); \ + DO_IMMEDIATE_OPS (0.5, TYPE, 0point5); \ + DO_IMMEDIATE_OPS (2, TYPE, 2); \ + DO_IMMEDIATE_OPS (3.5, TYPE, 3point5); +DO_ARITH_OPS (_Float16) DO_ARITH_OPS (float) DO_ARITH_OPS (double) +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #3} } } */ + /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_1.C b/gcc/testsuite/gcc.target/aarch64/sve_index_1.c index b7ae2d19f1e..09e65cf0fc3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_index_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_index_1.c @@ -1,50 +1,57 @@ -/* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256" } */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ #include <stdint.h> #define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) -#define DEF_LOOP(TYPE,BASE,STEP,SUFFIX) \ -void loop_##TYPE##SUFFIX (TYPE *__restrict__ a) \ +#define DEF_LOOP(TYPE, BASE, STEP, SUFFIX) \ +void __attribute__ ((noinline, noclone)) \ +loop_##TYPE##_##SUFFIX (TYPE *a) \ { \ - for (TYPE i = 0; i < NUM_ELEMS (TYPE); ++i) \ - a[i] = TYPE (BASE) + TYPE (i * (STEP)); \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + a[i] = (BASE) + i * (STEP); \ } -#define DEF_LOOPS_ALL_UNSIGNED_TYPES(BASE,STEP,SUFFIX) \ -DEF_LOOP (uint8_t, BASE, STEP, SUFFIX) \ -DEF_LOOP (uint16_t, BASE, STEP, SUFFIX) \ -DEF_LOOP (uint32_t, BASE, STEP, SUFFIX) \ -DEF_LOOP (uint64_t, BASE, STEP, SUFFIX) +#define TEST_ALL_UNSIGNED_TYPES(T, BASE, STEP, SUFFIX) \ + T (uint8_t, BASE, STEP, SUFFIX) \ + T (uint16_t, BASE, STEP, SUFFIX) \ + T (uint32_t, BASE, STEP, SUFFIX) \ + T (uint64_t, BASE, STEP, SUFFIX) -#define DEF_LOOPS_ALL_SIGNED_TYPES(BASE,STEP,SUFFIX) \ -DEF_LOOP (int8_t, BASE, STEP, SUFFIX) \ -DEF_LOOP (int16_t, BASE, STEP, SUFFIX) \ -DEF_LOOP (int32_t, BASE, STEP, SUFFIX) \ -DEF_LOOP (int64_t, BASE, STEP, SUFFIX) +#define TEST_ALL_SIGNED_TYPES(T, BASE, STEP, SUFFIX) \ + T (int8_t, BASE, STEP, SUFFIX) \ + T (int16_t, BASE, STEP, SUFFIX) \ + T (int32_t, BASE, STEP, SUFFIX) \ + T (int64_t, BASE, STEP, SUFFIX) -/* Immediate Loops. */ -DEF_LOOPS_ALL_UNSIGNED_TYPES (0, 1, b0s1) -DEF_LOOPS_ALL_SIGNED_TYPES (0, 1, b0s1) -DEF_LOOPS_ALL_UNSIGNED_TYPES (0, 15, b0s15) -DEF_LOOPS_ALL_SIGNED_TYPES (0, 15, b0s15) -DEF_LOOPS_ALL_SIGNED_TYPES (0, -1, b0sm1) -DEF_LOOPS_ALL_SIGNED_TYPES (0, -16, b0sm16) -DEF_LOOPS_ALL_SIGNED_TYPES (-16, 1, bm16s1) -DEF_LOOPS_ALL_UNSIGNED_TYPES (15, 1, b15s1) -DEF_LOOPS_ALL_SIGNED_TYPES (15, 1, b15s1) +/* Immediate loops. */ +#define TEST_IMMEDIATE(T) \ + TEST_ALL_UNSIGNED_TYPES (T, 0, 1, b0s1) \ + TEST_ALL_SIGNED_TYPES (T, 0, 1, b0s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 0, 15, b0s15) \ + TEST_ALL_SIGNED_TYPES (T, 0, 15, b0s15) \ + TEST_ALL_SIGNED_TYPES (T, 0, -1, b0sm1) \ + TEST_ALL_SIGNED_TYPES (T, 0, -16, b0sm16) \ + TEST_ALL_SIGNED_TYPES (T, -16, 1, bm16s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 15, 1, b15s1) \ + TEST_ALL_SIGNED_TYPES (T, 15, 1, b15s1) -/* Non Immediate Loops. */ -DEF_LOOPS_ALL_UNSIGNED_TYPES (0, 16, b0s16) -DEF_LOOPS_ALL_SIGNED_TYPES (0, 16, b0s16) -DEF_LOOPS_ALL_SIGNED_TYPES (0, -17, b0sm17) -DEF_LOOPS_ALL_SIGNED_TYPES (-17, 1, bm17s1) -DEF_LOOPS_ALL_UNSIGNED_TYPES (16, 1, b16s1) -DEF_LOOPS_ALL_SIGNED_TYPES (16, 1, b16s1) -DEF_LOOPS_ALL_UNSIGNED_TYPES (16, 16, b16s16) -DEF_LOOPS_ALL_SIGNED_TYPES (16, 16, b16s16) -DEF_LOOPS_ALL_SIGNED_TYPES (-17, -17, bm17sm17) +/* Non-immediate loops. */ +#define TEST_NONIMMEDIATE(T) \ + TEST_ALL_UNSIGNED_TYPES (T, 0, 16, b0s16) \ + TEST_ALL_SIGNED_TYPES (T, 0, 16, b0s16) \ + TEST_ALL_SIGNED_TYPES (T, 0, -17, b0sm17) \ + TEST_ALL_SIGNED_TYPES (T, -17, 1, bm17s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 16, 1, b16s1) \ + TEST_ALL_SIGNED_TYPES (T, 16, 1, b16s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 16, 16, b16s16) \ + TEST_ALL_SIGNED_TYPES (T, 16, 16, b16s16) \ + TEST_ALL_SIGNED_TYPES (T, -17, -17, bm17sm17) + +#define TEST_ALL(T) TEST_IMMEDIATE (T) TEST_NONIMMEDIATE (T) + +TEST_ALL (DEF_LOOP) /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #1\n} 2 } } */ /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #15\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.C deleted file mode 100644 index 0698eaba6eb..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.C +++ /dev/null @@ -1,79 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include "sve_index_1.C" - -#include <stdlib.h> -#include <stdio.h> - -#define SUM_VECTOR(TYPE) \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ - { \ - result += r_##TYPE[i]; \ - } - -#define TEST_LOOPS_ALL_UNSIGNED_TYPES(SUFFIX) \ -loop_uint8_t##SUFFIX (r_uint8_t); \ -loop_uint16_t##SUFFIX (r_uint16_t); \ -loop_uint32_t##SUFFIX (r_uint32_t); \ -loop_uint64_t##SUFFIX (r_uint64_t); \ -SUM_VECTOR (uint8_t); \ -SUM_VECTOR (uint16_t); \ -SUM_VECTOR (uint32_t); \ -SUM_VECTOR (uint64_t); - -#define TEST_LOOPS_ALL_SIGNED_TYPES(SUFFIX) \ -loop_int8_t##SUFFIX (r_int8_t); \ -loop_int16_t##SUFFIX (r_int16_t); \ -loop_int32_t##SUFFIX (r_int32_t); \ -loop_int64_t##SUFFIX (r_int64_t); \ -SUM_VECTOR (int8_t); \ -SUM_VECTOR (int16_t); \ -SUM_VECTOR (int32_t); \ -SUM_VECTOR (int64_t); - - -#define DEF_INIT_VECTOR(TYPE) \ - TYPE r_##TYPE[NUM_ELEMS (TYPE)]; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - r_##TYPE[i] = 0; - -int main () -{ - int result = 0; - DEF_INIT_VECTOR (int8_t) - DEF_INIT_VECTOR (int16_t) - DEF_INIT_VECTOR (int32_t) - DEF_INIT_VECTOR (int64_t) - DEF_INIT_VECTOR (uint8_t) - DEF_INIT_VECTOR (uint16_t) - DEF_INIT_VECTOR (uint32_t) - DEF_INIT_VECTOR (uint64_t) - - TEST_LOOPS_ALL_UNSIGNED_TYPES (b0s1) - TEST_LOOPS_ALL_SIGNED_TYPES (b0s1) - TEST_LOOPS_ALL_UNSIGNED_TYPES (b0s15) - TEST_LOOPS_ALL_SIGNED_TYPES (b0s15) - TEST_LOOPS_ALL_SIGNED_TYPES (b0sm1) - TEST_LOOPS_ALL_SIGNED_TYPES (b0sm16) - TEST_LOOPS_ALL_SIGNED_TYPES (bm16s1) - TEST_LOOPS_ALL_UNSIGNED_TYPES (b15s1) - TEST_LOOPS_ALL_SIGNED_TYPES (b15s1) - - TEST_LOOPS_ALL_UNSIGNED_TYPES (b0s16) - TEST_LOOPS_ALL_SIGNED_TYPES (b0s16) - TEST_LOOPS_ALL_SIGNED_TYPES (b0sm17) - TEST_LOOPS_ALL_SIGNED_TYPES (bm17s1) - TEST_LOOPS_ALL_UNSIGNED_TYPES (b16s1) - TEST_LOOPS_ALL_SIGNED_TYPES (b16s1) - TEST_LOOPS_ALL_UNSIGNED_TYPES (b16s16) - TEST_LOOPS_ALL_SIGNED_TYPES (b16s16) - TEST_LOOPS_ALL_SIGNED_TYPES (bm17sm17) - - if (result != 24270) - { - fprintf (stderr, "result = %d\n", result); - abort (); - } - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.c new file mode 100644 index 00000000000..7492ed3f756 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.c @@ -0,0 +1,20 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include "sve_index_1.c" + +#define TEST_LOOP(TYPE, BASE, STEP, SUFFIX) \ + { \ + TYPE array[NUM_ELEMS (TYPE)] = {}; \ + loop_##TYPE##_##SUFFIX (array); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (array[i] != (TYPE) (BASE + i * STEP)) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffsetlarge_1.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffsetlarge_1.c deleted file mode 100644 index 4c9aab4aada..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_indexoffsetlarge_1.c +++ /dev/null @@ -1,31 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve --save-temps" } */ - -//Test sizes that are too big for an index register -#define SIZE 4294967297 - -#define INDEX_OFFSET_TEST(SIGNED, TYPE)\ -void set_##SIGNED##TYPE (SIGNED TYPE *out, SIGNED TYPE *in)\ -{\ - unsigned long i;\ - for (i = 0; i < SIZE; i++)\ - {\ - out[i] = in[i];\ - }\ -} - -INDEX_OFFSET_TEST (signed, int) -INDEX_OFFSET_TEST (unsigned, int) -INDEX_OFFSET_TEST (signed, short) -INDEX_OFFSET_TEST (unsigned, short) -INDEX_OFFSET_TEST (signed, char) -INDEX_OFFSET_TEST (unsigned, char) - -/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, w\[0-9\]+, .xtw 3\\\]" } } */ -/* { dg-final { scan-assembler-not "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, w\[0-9\]+, .xtw 3\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, w\[0-9\]+, .xtw 2\\\]" } } */ -/* { dg-final { scan-assembler-not "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, w\[0-9\]+, .xtw 2\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, w\[0-9\]+, .xtw 1\\\]" } } */ -/* { dg-final { scan-assembler-not "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, w\[0-9\]+, .xtw 1\\\]" } } */ -/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, w\[0-9\]+, .xtw\\\]" } } */ -/* { dg-final { scan-assembler-not "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, w\[0-9\]+, .xtw\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_infloop_1.c b/gcc/testsuite/gcc.target/aarch64/sve_infloop_1.c deleted file mode 100644 index 11681c05409..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_infloop_1.c +++ /dev/null @@ -1,64 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ -/* { dg-timeout 60 } */ - -#include <stdint.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <limits.h> - -/* Make sure that in cases where - n <= TYPE_MAX && n > (TYPE_MAX + 1 - (sizeof (SVE_VEC) / sizeof (TYPE))) - that we don't iterate more times than we should due to overflow in the last - iteration of loop. If n == TYPE_MAX we could spin forever. */ - -#define SIMPLE_LOOP(TYPE) \ -TYPE foo_##TYPE (TYPE n, TYPE * __restrict__ a) \ -{ \ - TYPE i; \ - TYPE v = 0; \ - for (i = 0; i < n; i++) \ - v += a[i]; \ - return v; \ -} - -SIMPLE_LOOP (uint8_t) -SIMPLE_LOOP (uint16_t) - -/* Minimum architected SVE vector = 128 bits, i.e. 16 bytes. Just choose - something that meets the critera shown above. */ -#define N_uint8_t (UCHAR_MAX - 1) -#define N_uint16_t (USHRT_MAX - 1) - -#define N_MAX 1024 -#define DEF_VAR(TYPE) \ - TYPE *a_##TYPE = (TYPE *) malloc (N_##TYPE * sizeof (TYPE)); \ - for (i = 0; i < N_##TYPE; i++) \ - a_##TYPE[i] = 1; \ - TYPE r_##TYPE; - -#define TEST_SIMPLE_LOOP(TYPE) r_##TYPE = foo_##TYPE (N_##TYPE, a_##TYPE); - -#define VERIFY(TYPE) \ - if (r_##TYPE != N_##TYPE) \ - { \ - fprintf (stderr, "r_" #TYPE " = %ld\n", (uint64_t) r_##TYPE); \ - abort (); \ - } - -int main () -{ - int i; - DEF_VAR (uint8_t) - DEF_VAR (uint16_t) - - /* We only test 8 and 16 bit as others take too long. */ - TEST_SIMPLE_LOOP (uint8_t) - TEST_SIMPLE_LOOP (uint16_t) - - VERIFY (uint8_t) - VERIFY (uint16_t) - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.c new file mode 100644 index 00000000000..314c2b89624 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.c @@ -0,0 +1,53 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +#define DUP4(X) X, X, X, X +#define DUP8(X) DUP4 (X), DUP4 (X) +#define DUP16(X) DUP8 (X), DUP8 (X) +#define DUP32(X) DUP16 (X), DUP16 (X) + +typedef uint8_t vuint8_t __attribute__ ((vector_size (32))); +typedef uint16_t vuint16_t __attribute__ ((vector_size (32))); +typedef uint32_t vuint32_t __attribute__ ((vector_size (32))); +typedef uint64_t vuint64_t __attribute__ ((vector_size (32))); + +#define TEST(TYPE, NAME, INIT) \ + void \ + NAME##_##TYPE (TYPE *dest, __typeof__(dest[0][0]) *ptr) \ + { \ + TYPE x = { INIT }; \ + *dest = x; \ + } + +#define TEST_GROUP(TYPE, NAME, DUP) \ + TEST (TYPE, NAME_##m1, DUP (ptr[-1])) \ + TEST (TYPE, NAME_##0, DUP (ptr[0])) \ + TEST (TYPE, NAME_##63, DUP (ptr[63])) \ + TEST (TYPE, NAME_##64, DUP (ptr[64])) + +TEST_GROUP (vuint8_t, t8, DUP32) +TEST_GROUP (vuint16_t, t16, DUP16) +TEST_GROUP (vuint32_t, t16, DUP8) +TEST_GROUP (vuint64_t, t16, DUP4) + +/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, 63\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, 64\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, 126\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, 128\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, 252\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, 256\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, 504\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, 512\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.C deleted file mode 100644 index d209b48d249..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_2.C +++ /dev/null @@ -1,51 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256" } */ - -#define DUP4(X) X, X, X, X -#define DUP8(X) DUP4 (X), DUP4 (X) -#define DUP16(X) DUP8 (X), DUP8 (X) -#define DUP32(X) DUP16 (X), DUP16 (X) - -typedef unsigned char vuint8_t __attribute__ ((vector_size (32))); -typedef unsigned short vuint16_t __attribute__ ((vector_size (32))); -typedef unsigned int vuint32_t __attribute__ ((vector_size (32))); -typedef unsigned long vuint64_t __attribute__ ((vector_size (32))); - -#define TEST(TYPE, NAME, INIT) \ - void \ - NAME (TYPE *dest, __typeof__(dest[0][0]) *ptr) \ - { \ - TYPE x = { INIT }; \ - *dest = x; \ - } - -#define TEST_GROUP(TYPE, NAME, DUP) \ - TEST (TYPE, NAME_##m1, DUP (ptr[-1])) \ - TEST (TYPE, NAME_##0, DUP (ptr[0])) \ - TEST (TYPE, NAME_##63, DUP (ptr[63])) \ - TEST (TYPE, NAME_##64, DUP (ptr[64])) - -TEST_GROUP (vuint8_t, t8, DUP32) -TEST_GROUP (vuint16_t, t16, DUP16) -TEST_GROUP (vuint32_t, t16, DUP8) -TEST_GROUP (vuint64_t, t16, DUP4) - -/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]*.b, p[0-7]/z, \[x1, -1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rb\tz[0-9]*.b, p[0-7]/z, \[x1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rb\tz[0-9]*.b, p[0-7]/z, \[x1, 63\]\n} } } */ -/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]*.b, p[0-7]/z, \[x1, 64\]\n} } } */ - -/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]*.h, p[0-7]/z, \[x1, -1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rh\tz[0-9]*.h, p[0-7]/z, \[x1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rh\tz[0-9]*.h, p[0-7]/z, \[x1, 126\]\n} } } */ -/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]*.h, p[0-7]/z, \[x1, 128\]\n} } } */ - -/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]*.s, p[0-7]/z, \[x1, -1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rw\tz[0-9]*.s, p[0-7]/z, \[x1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rw\tz[0-9]*.s, p[0-7]/z, \[x1, 252\]\n} } } */ -/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]*.s, p[0-7]/z, \[x1, 256\]\n} } } */ - -/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]*.d, p[0-7]/z, \[x1, -1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rd\tz[0-9]*.d, p[0-7]/z, \[x1\]\n} } } */ -/* { dg-final { scan-assembler {\tld1rd\tz[0-9]*.d, p[0-7]/z, \[x1, 504\]\n} } } */ -/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]*.d, p[0-7]/z, \[x1, 512\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c index 3bbae95f332..0bc757907cf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c @@ -1,10 +1,12 @@ /* { dg-do assemble } */ -/* { dg-options "-O -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef long v4di __attribute__ ((vector_size (32))); -typedef int v8si __attribute__ ((vector_size (32))); -typedef short v16hi __attribute__ ((vector_size (32))); -typedef char v32qi __attribute__ ((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__ ((vector_size (32))); +typedef int32_t v8si __attribute__ ((vector_size (32))); +typedef int16_t v16hi __attribute__ ((vector_size (32))); +typedef int8_t v32qi __attribute__ ((vector_size (32))); #define TEST_TYPE(TYPE) \ void sve_load_##TYPE##_neg9 (TYPE *a) \ @@ -52,26 +54,26 @@ TEST_TYPE (v32qi) /* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 16\n} 4 } } */ /* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 256\n} 4 } } */ -/* { dg-final { scan-assembler-not {\tld1d\tz0.d, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz0.d, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz0.d, p[0-7]/z, \[x0\]\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tld1d\tz0.d, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-not {\tld1d\tz0.d, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1d\tz0\.d, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1d\tz0\.d, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ -/* { dg-final { scan-assembler-not {\tld1w\tz0.s, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -/* { dg-final { scan-assembler-times {\tld1w\tz0.s, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1w\tz0.s, p[0-7]/z, \[x0\]\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tld1w\tz0.s, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-not {\tld1w\tz0.s, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1w\tz0\.s, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1w\tz0\.s, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ -/* { dg-final { scan-assembler-not {\tld1h\tz0.h, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz0.h, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz0.h, p[0-7]/z, \[x0\]\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz0.h, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-not {\tld1h\tz0.h, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1h\tz0\.h, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1h\tz0\.h, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ -/* { dg-final { scan-assembler-not {\tld1b\tz0.b, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -/* { dg-final { scan-assembler-times {\tld1b\tz0.b, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tld1b\tz0.b, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ -/* { dg-final { scan-assembler-not {\tld1b\tz0.b, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1b\tz0\.b, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1b\tz0\.b, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c index 77a3ef82f62..9163702db1d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c @@ -1,68 +1,70 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef long v4di __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef char v32qi __attribute__((vector_size(32))); +#include <stdint.h> -void sve_load_64_u_lsl (unsigned long *a) +typedef int64_t v4di __attribute__ ((vector_size (32))); +typedef int32_t v8si __attribute__ ((vector_size (32))); +typedef int16_t v16hi __attribute__ ((vector_size (32))); +typedef int8_t v32qi __attribute__ ((vector_size (32))); + +void sve_load_64_u_lsl (uint64_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v4di *)&a[i])); } -void sve_load_64_s_lsl (signed long *a) +void sve_load_64_s_lsl (int64_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v4di *)&a[i])); } -void sve_load_32_u_lsl (unsigned int *a) +void sve_load_32_u_lsl (uint32_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v8si *)&a[i])); } -void sve_load_32_s_lsl (signed int *a) +void sve_load_32_s_lsl (int32_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v8si *)&a[i])); } -void sve_load_16_z_lsl (unsigned short *a) +void sve_load_16_z_lsl (uint16_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v16hi *)&a[i])); } -void sve_load_16_s_lsl (signed short *a) +void sve_load_16_s_lsl (int16_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v16hi *)&a[i])); } -void sve_load_8_z (unsigned char *a) +void sve_load_8_z (uint8_t *a) { register unsigned long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v32qi *)&a[i])); } -void sve_load_8_s (signed char *a) +void sve_load_8_s (int8_t *a) { register long i asm("x1"); asm volatile ("" : "=r" (i)); asm volatile ("" :: "w" (*(v32qi *)&a[i])); } -/* { dg-final { scan-assembler-times {\tld1d\tz0.d, p[0-7]/z, \[x0, x1, lsl 3\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1w\tz0.s, p[0-7]/z, \[x0, x1, lsl 2\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1h\tz0.h, p[0-7]/z, \[x0, x1, lsl 1\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tld1b\tz0.b, p[0-7]/z, \[x0, x1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, x1, lsl 3\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, x1, lsl 2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, x1, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, x1\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_2.c deleted file mode 100644 index 7a36cce95cd..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_2.c +++ /dev/null @@ -1,68 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ - -typedef long v4di __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef char v32qi __attribute__((vector_size(32))); - -void sve_load_64_u_lsl (unsigned long *a) -{ - register unsigned long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v4di *)&a[i])); -} - -void sve_load_64_s_lsl (signed long *a) -{ - register long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v4di *)&a[i])); -} - -void sve_load_32_u_lsl (unsigned int *a) -{ - register unsigned long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v8si *)&a[i])); -} - -void sve_load_32_s_lsl (signed int *a) -{ - register long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v8si *)&a[i])); -} - -void sve_load_16_z_lsl (unsigned short *a) -{ - register unsigned long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v16hi *)&a[i])); -} - -void sve_load_16_s_lsl (signed short *a) -{ - register long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v16hi *)&a[i])); -} - -void sve_load_8_z (unsigned char *a) -{ - register unsigned long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v32qi *)&a[i])); -} - -void sve_load_8_s (signed char *a) -{ - register long i asm("x1"); - asm volatile ("" : "=r" (i)); - asm volatile ("" :: "w" (*(v32qi *)&a[i])); -} - -/* { dg-final { scan-assembler-times "ld1d\\tz0.d, p\[0-9\]+/z, \\\[x0, x1, lsl 3\\\]" 2 } } */ -/* { dg-final { scan-assembler-times "ld1w\\tz0.s, p\[0-9\]+/z, \\\[x0, x1, lsl 2\\\]" 2 } } */ -/* { dg-final { scan-assembler-times "ld1h\\tz0.h, p\[0-9\]+/z, \\\[x0, x1, lsl 1\\\]" 2 } } */ -/* { dg-final { scan-assembler-times "ld1b\\tz0.b, p\[0-9\]+/z, \\\[x0, x1\\\]" 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_logical_1.c b/gcc/testsuite/gcc.target/aarch64/sve_logical_1.c index 02f92b95733..aa39adf85f8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_logical_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_logical_1.c @@ -1,82 +1,82 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ -#define DO_CONSTANT(VALUE, TYPE, OP, NAME) \ -void vlogical_imm_##NAME##_##TYPE (TYPE* dst, unsigned long count) \ -{ \ - for (int i = 0; i < count; i++) \ - dst[i] = dst[i] OP VALUE; \ +#define DO_CONSTANT(VALUE, TYPE, OP, NAME) \ +void vlogical_imm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; i++) \ + dst[i] = dst[i] OP VALUE; \ } #define DO_LOGICAL_OPS_BRIEF(TYPE, OP, NAME) \ -DO_CONSTANT (1, TYPE, OP, NAME ## 1) \ -DO_CONSTANT (2, TYPE, OP, NAME ## 2) \ -DO_CONSTANT (5, TYPE, OP, NAME ## 5) \ -DO_CONSTANT (6, TYPE, OP, NAME ## 6) \ -DO_CONSTANT (8, TYPE, OP, NAME ## 8) \ -DO_CONSTANT (9, TYPE, OP, NAME ## 9) \ -DO_CONSTANT (-1, TYPE, OP, NAME ## minus1) \ -DO_CONSTANT (-2, TYPE, OP, NAME ## minus2) \ -DO_CONSTANT (-5, TYPE, OP, NAME ## minus5) \ -DO_CONSTANT (-6, TYPE, OP, NAME ## minus6) + DO_CONSTANT (1, TYPE, OP, NAME ## 1) \ + DO_CONSTANT (2, TYPE, OP, NAME ## 2) \ + DO_CONSTANT (5, TYPE, OP, NAME ## 5) \ + DO_CONSTANT (6, TYPE, OP, NAME ## 6) \ + DO_CONSTANT (8, TYPE, OP, NAME ## 8) \ + DO_CONSTANT (9, TYPE, OP, NAME ## 9) \ + DO_CONSTANT (-1, TYPE, OP, NAME ## minus1) \ + DO_CONSTANT (-2, TYPE, OP, NAME ## minus2) \ + DO_CONSTANT (-5, TYPE, OP, NAME ## minus5) \ + DO_CONSTANT (-6, TYPE, OP, NAME ## minus6) -#define DO_LOGICAL_OPS(TYPE, OP, NAME) \ -DO_CONSTANT (1, TYPE, OP, NAME ## 1) \ -DO_CONSTANT (2, TYPE, OP, NAME ## 2) \ -DO_CONSTANT (3, TYPE, OP, NAME ## 3) \ -DO_CONSTANT (4, TYPE, OP, NAME ## 4) \ -DO_CONSTANT (5, TYPE, OP, NAME ## 5) \ -DO_CONSTANT (6, TYPE, OP, NAME ## 6) \ -DO_CONSTANT (7, TYPE, OP, NAME ## 7) \ -DO_CONSTANT (8, TYPE, OP, NAME ## 8) \ -DO_CONSTANT (9, TYPE, OP, NAME ## 9) \ -DO_CONSTANT (10, TYPE, OP, NAME ## 10) \ -DO_CONSTANT (11, TYPE, OP, NAME ## 11) \ -DO_CONSTANT (12, TYPE, OP, NAME ## 12) \ -DO_CONSTANT (13, TYPE, OP, NAME ## 13) \ -DO_CONSTANT (14, TYPE, OP, NAME ## 14) \ -DO_CONSTANT (15, TYPE, OP, NAME ## 15) \ -DO_CONSTANT (16, TYPE, OP, NAME ## 16) \ -DO_CONSTANT (17, TYPE, OP, NAME ## 17) \ -DO_CONSTANT (18, TYPE, OP, NAME ## 18) \ -DO_CONSTANT (19, TYPE, OP, NAME ## 19) \ -DO_CONSTANT (20, TYPE, OP, NAME ## 20) \ -DO_CONSTANT (21, TYPE, OP, NAME ## 21) \ -DO_CONSTANT (22, TYPE, OP, NAME ## 22) \ -DO_CONSTANT (23, TYPE, OP, NAME ## 23) \ -DO_CONSTANT (24, TYPE, OP, NAME ## 24) \ -DO_CONSTANT (25, TYPE, OP, NAME ## 25) \ -DO_CONSTANT (26, TYPE, OP, NAME ## 26) \ -DO_CONSTANT (27, TYPE, OP, NAME ## 27) \ -DO_CONSTANT (28, TYPE, OP, NAME ## 28) \ -DO_CONSTANT (29, TYPE, OP, NAME ## 29) \ -DO_CONSTANT (30, TYPE, OP, NAME ## 30) \ -DO_CONSTANT (31, TYPE, OP, NAME ## 31) \ -DO_CONSTANT (32, TYPE, OP, NAME ## 32) \ -DO_CONSTANT (33, TYPE, OP, NAME ## 33) \ -DO_CONSTANT (34, TYPE, OP, NAME ## 34) \ -DO_CONSTANT (35, TYPE, OP, NAME ## 35) \ -DO_CONSTANT (252, TYPE, OP, NAME ## 252) \ -DO_CONSTANT (253, TYPE, OP, NAME ## 253) \ -DO_CONSTANT (254, TYPE, OP, NAME ## 254) \ -DO_CONSTANT (255, TYPE, OP, NAME ## 255) \ -DO_CONSTANT (256, TYPE, OP, NAME ## 256) \ -DO_CONSTANT (257, TYPE, OP, NAME ## 257) \ -DO_CONSTANT (65535, TYPE, OP, NAME ## 65535) \ -DO_CONSTANT (65536, TYPE, OP, NAME ## 65536) \ -DO_CONSTANT (65537, TYPE, OP, NAME ## 65537) \ -DO_CONSTANT (2147483646, TYPE, OP, NAME ## 2147483646) \ -DO_CONSTANT (2147483647, TYPE, OP, NAME ## 2147483647) \ -DO_CONSTANT (2147483648, TYPE, OP, NAME ## 2147483648) \ -DO_CONSTANT (-1, TYPE, OP, NAME ## minus1) \ -DO_CONSTANT (-2, TYPE, OP, NAME ## minus2) \ -DO_CONSTANT (-3, TYPE, OP, NAME ## minus3) \ -DO_CONSTANT (-4, TYPE, OP, NAME ## minus4) \ -DO_CONSTANT (-5, TYPE, OP, NAME ## minus5) \ -DO_CONSTANT (-6, TYPE, OP, NAME ## minus6) \ -DO_CONSTANT (-7, TYPE, OP, NAME ## minus7) \ -DO_CONSTANT (-8, TYPE, OP, NAME ## minus8) \ -DO_CONSTANT (-9, TYPE, OP, NAME ## minus9) +#define DO_LOGICAL_OPS(TYPE, OP, NAME) \ + DO_CONSTANT (1, TYPE, OP, NAME ## 1) \ + DO_CONSTANT (2, TYPE, OP, NAME ## 2) \ + DO_CONSTANT (3, TYPE, OP, NAME ## 3) \ + DO_CONSTANT (4, TYPE, OP, NAME ## 4) \ + DO_CONSTANT (5, TYPE, OP, NAME ## 5) \ + DO_CONSTANT (6, TYPE, OP, NAME ## 6) \ + DO_CONSTANT (7, TYPE, OP, NAME ## 7) \ + DO_CONSTANT (8, TYPE, OP, NAME ## 8) \ + DO_CONSTANT (9, TYPE, OP, NAME ## 9) \ + DO_CONSTANT (10, TYPE, OP, NAME ## 10) \ + DO_CONSTANT (11, TYPE, OP, NAME ## 11) \ + DO_CONSTANT (12, TYPE, OP, NAME ## 12) \ + DO_CONSTANT (13, TYPE, OP, NAME ## 13) \ + DO_CONSTANT (14, TYPE, OP, NAME ## 14) \ + DO_CONSTANT (15, TYPE, OP, NAME ## 15) \ + DO_CONSTANT (16, TYPE, OP, NAME ## 16) \ + DO_CONSTANT (17, TYPE, OP, NAME ## 17) \ + DO_CONSTANT (18, TYPE, OP, NAME ## 18) \ + DO_CONSTANT (19, TYPE, OP, NAME ## 19) \ + DO_CONSTANT (20, TYPE, OP, NAME ## 20) \ + DO_CONSTANT (21, TYPE, OP, NAME ## 21) \ + DO_CONSTANT (22, TYPE, OP, NAME ## 22) \ + DO_CONSTANT (23, TYPE, OP, NAME ## 23) \ + DO_CONSTANT (24, TYPE, OP, NAME ## 24) \ + DO_CONSTANT (25, TYPE, OP, NAME ## 25) \ + DO_CONSTANT (26, TYPE, OP, NAME ## 26) \ + DO_CONSTANT (27, TYPE, OP, NAME ## 27) \ + DO_CONSTANT (28, TYPE, OP, NAME ## 28) \ + DO_CONSTANT (29, TYPE, OP, NAME ## 29) \ + DO_CONSTANT (30, TYPE, OP, NAME ## 30) \ + DO_CONSTANT (31, TYPE, OP, NAME ## 31) \ + DO_CONSTANT (32, TYPE, OP, NAME ## 32) \ + DO_CONSTANT (33, TYPE, OP, NAME ## 33) \ + DO_CONSTANT (34, TYPE, OP, NAME ## 34) \ + DO_CONSTANT (35, TYPE, OP, NAME ## 35) \ + DO_CONSTANT (252, TYPE, OP, NAME ## 252) \ + DO_CONSTANT (253, TYPE, OP, NAME ## 253) \ + DO_CONSTANT (254, TYPE, OP, NAME ## 254) \ + DO_CONSTANT (255, TYPE, OP, NAME ## 255) \ + DO_CONSTANT (256, TYPE, OP, NAME ## 256) \ + DO_CONSTANT (257, TYPE, OP, NAME ## 257) \ + DO_CONSTANT (65535, TYPE, OP, NAME ## 65535) \ + DO_CONSTANT (65536, TYPE, OP, NAME ## 65536) \ + DO_CONSTANT (65537, TYPE, OP, NAME ## 65537) \ + DO_CONSTANT (2147483646, TYPE, OP, NAME ## 2147483646) \ + DO_CONSTANT (2147483647, TYPE, OP, NAME ## 2147483647) \ + DO_CONSTANT (2147483648, TYPE, OP, NAME ## 2147483648) \ + DO_CONSTANT (-1, TYPE, OP, NAME ## minus1) \ + DO_CONSTANT (-2, TYPE, OP, NAME ## minus2) \ + DO_CONSTANT (-3, TYPE, OP, NAME ## minus3) \ + DO_CONSTANT (-4, TYPE, OP, NAME ## minus4) \ + DO_CONSTANT (-5, TYPE, OP, NAME ## minus5) \ + DO_CONSTANT (-6, TYPE, OP, NAME ## minus6) \ + DO_CONSTANT (-7, TYPE, OP, NAME ## minus7) \ + DO_CONSTANT (-8, TYPE, OP, NAME ## minus8) \ + DO_CONSTANT (-9, TYPE, OP, NAME ## minus9) DO_LOGICAL_OPS_BRIEF (char, &, and) DO_LOGICAL_OPS_BRIEF (long, &, and) @@ -215,8 +215,7 @@ DO_LOGICAL_OPS (int, ^, xor) /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff8\n} 1 } } */ /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff7\n} 1 } } */ -/* No specific number because this also doubles as a move. */ -/* { dg-final { scan-assembler {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 22 } } */ /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1\n} 1 } } */ /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x2\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1.c index 73d78bdc8be..5546cefe686 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1.c @@ -1,13 +1,13 @@ /* { dg-do compile } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve" } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ -__attribute__((noinline, noclone)) -void vadd (int *dst, int *op1, int *op2, int count) +void __attribute__((noinline, noclone)) +vadd (int *dst, int *op1, int *op2, int count) { for (int i = 0; i < count; ++i) dst[i] = op1[i] + op2[i]; } -/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+.s, p[0-7]/z,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+.s, p[0-7],} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 1 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1_run.c index 6f06ce6e8a6..c7d0352e273 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1_run.c @@ -1,11 +1,12 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve" } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ #include "sve_loop_add_1.c" #define ELEMS 10 -int main (void) +int __attribute__ ((optimize (1))) +main (void) { int in1[ELEMS] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; int in2[ELEMS] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; @@ -16,7 +17,7 @@ int main (void) for (int i = 0; i < ELEMS; ++i) if (out[i] != check[i]) - return 1; + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5.c index 30891703a63..a27bde6f9da 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5.c @@ -12,8 +12,8 @@ /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7]+, \[x[0-9]+, x[0-9]+\]} 8 } } */ /* The induction vector is invariant for steps of -16 and 16. */ -/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #} 3 } } */ -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #} 3 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #} 6 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 8 } } */ /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #-16\n} 1 { xfail *-*-* } } } */ @@ -25,8 +25,8 @@ /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 1\]} 8 } } */ /* The (-)17 * 16 is out of range. */ -/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #} 3 } } */ -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #} 3 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #} 4 } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 10 } } */ /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #-16\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c index 6da2e115782..ccb20b4191f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c @@ -1,34 +1,34 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef char v32qi __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef long v4di __attribute__((vector_size(32))); +#include <stdint.h> -#define DO_OP(TYPE) \ -void vmla##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (dst * src1) + src2; \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (dst * src1) + src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } -DO_OP(v32qi) -DO_OP(v16hi) -DO_OP(v8si) -DO_OP(v4di) +DO_OP (v32qi) +DO_OP (v16hi) +DO_OP (v8si) +DO_OP (v4di) -/* { dg-final { scan-assembler-times {\tmad\tz0.b, p[0-7]/m, z2.b, z4.b} 1 } } */ -/* { dg-final { scan-assembler-times {\tmad\tz0.h, p[0-7]/m, z2.h, z4.h} 1 } } */ -/* { dg-final { scan-assembler-times {\tmad\tz0.s, p[0-7]/m, z2.s, z4.s} 1 } } */ -/* { dg-final { scan-assembler-times {\tmad\tz0.d, p[0-7]/m, z2.d, z4.d} 1 } } */ +/* { dg-final { scan-assembler-times {\tmad\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */ +/* { dg-final { scan-assembler-times {\tmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1.C b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1.c index f42eb6e9edf..733ffd1b765 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1.c @@ -1,39 +1,45 @@ /* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -ffast-math -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include <stdint.h> #define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) -#define DEF_MAXMIN(TYPE,NAME,CMP_OP) \ -void fun_##NAME##TYPE (TYPE *__restrict__ r, TYPE *__restrict__ a, \ - TYPE *__restrict__ b) \ -{ \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - r[i] = a[i] CMP_OP b[i] ? a[i] : b[i]; \ +#define DEF_MAXMIN(TYPE, NAME, CMP_OP) \ +void __attribute__ ((noinline, noclone)) \ +fun_##NAME##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ + TYPE *restrict b) \ +{ \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + r[i] = a[i] CMP_OP b[i] ? a[i] : b[i]; \ } -DEF_MAXMIN (int8_t, max, >) -DEF_MAXMIN (int16_t, max, >) -DEF_MAXMIN (int32_t, max, >) -DEF_MAXMIN (int64_t, max, >) -DEF_MAXMIN (uint8_t, max, >) -DEF_MAXMIN (uint16_t, max, >) -DEF_MAXMIN (uint32_t, max, >) -DEF_MAXMIN (uint64_t, max, >) -DEF_MAXMIN (float, max, >) -DEF_MAXMIN (double, max, >) - -DEF_MAXMIN (int8_t, min, <) -DEF_MAXMIN (int16_t, min, <) -DEF_MAXMIN (int32_t, min, <) -DEF_MAXMIN (int64_t, min, <) -DEF_MAXMIN (uint8_t, min, <) -DEF_MAXMIN (uint16_t, min, <) -DEF_MAXMIN (uint32_t, min, <) -DEF_MAXMIN (uint64_t, min, <) -DEF_MAXMIN (float, min, <) -DEF_MAXMIN (double, min, <) +#define TEST_ALL(T) \ + T (int8_t, max, >) \ + T (int16_t, max, >) \ + T (int32_t, max, >) \ + T (int64_t, max, >) \ + T (uint8_t, max, >) \ + T (uint16_t, max, >) \ + T (uint32_t, max, >) \ + T (uint64_t, max, >) \ + T (_Float16, max, >) \ + T (float, max, >) \ + T (double, max, >) \ + \ + T (int8_t, min, <) \ + T (int16_t, min, <) \ + T (int32_t, min, <) \ + T (int64_t, min, <) \ + T (uint8_t, min, <) \ + T (uint16_t, min, <) \ + T (uint32_t, min, <) \ + T (uint64_t, min, <) \ + T (_Float16, min, <) \ + T (float, min, <) \ + T (double, min, <) + +TEST_ALL (DEF_MAXMIN) /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ @@ -45,6 +51,7 @@ DEF_MAXMIN (double, min, <) /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ @@ -58,5 +65,6 @@ DEF_MAXMIN (double, min, <) /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.C deleted file mode 100644 index 37dc9a4cdec..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.C +++ /dev/null @@ -1,88 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -ffast-math -fno-inline -march=armv8-a+sve" } */ - -#include "sve_maxmin_1.C" - -#include <stdlib.h> -#include <stdio.h> - -#define DEF_INIT_VECTOR(TYPE) \ - TYPE a_##TYPE[NUM_ELEMS (TYPE)]; \ - TYPE b_##TYPE[NUM_ELEMS (TYPE)]; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ - { \ - a_##TYPE[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1); \ - b_##TYPE[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1); \ - } - -#define TEST_MAX(RES,TYPE) \ -{ \ - TYPE r_##TYPE[NUM_ELEMS (TYPE)]; \ - fun_max##TYPE (r_##TYPE, a_##TYPE, b_##TYPE); \ - TYPE tmp = 0; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - tmp += r_##TYPE[i]; \ - (RES) += tmp; \ -} - -#define TEST_MIN(RES,TYPE) \ -{ \ - TYPE r_##TYPE[NUM_ELEMS (TYPE)]; \ - fun_max##TYPE (r_##TYPE, a_##TYPE, b_##TYPE); \ - TYPE tmp = 0; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - tmp += r_##TYPE[i]; \ - (RES) += tmp; \ -} - -int main () -{ - int result = 0; - double resultF = 0.0; - DEF_INIT_VECTOR (int8_t) - DEF_INIT_VECTOR (int16_t) - DEF_INIT_VECTOR (int32_t) - DEF_INIT_VECTOR (int64_t) - DEF_INIT_VECTOR (uint8_t) - DEF_INIT_VECTOR (uint16_t) - DEF_INIT_VECTOR (uint32_t) - DEF_INIT_VECTOR (uint64_t) - DEF_INIT_VECTOR (float) - DEF_INIT_VECTOR (double) - - TEST_MIN (result, int8_t) - TEST_MIN (result, int16_t) - TEST_MIN (result, int32_t) - TEST_MIN (result, int64_t) - TEST_MIN (result, uint8_t) - TEST_MIN (result, uint16_t) - TEST_MIN (result, uint32_t) - TEST_MIN (result, uint64_t) - TEST_MIN (resultF, float) - TEST_MIN (resultF, double) - - TEST_MAX (result, int8_t) - TEST_MAX (result, int16_t) - TEST_MAX (result, int32_t) - TEST_MAX (result, int64_t) - TEST_MAX (result, uint8_t) - TEST_MAX (result, uint16_t) - TEST_MAX (result, uint32_t) - TEST_MAX (result, uint64_t) - TEST_MAX (resultF, float) - TEST_MAX (resultF, double) - - if (result != 131400) - { - fprintf (stderr, "result = %d\n", result); - abort (); - } - - if (resultF != 362) - { - fprintf (stderr, "resultF = %1.16lf\n", resultF); - abort (); - } - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.c new file mode 100644 index 00000000000..d3130bff8fe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include "sve_maxmin_1.c" + +#define TEST_LOOP(TYPE, NAME, CMP_OP) \ + { \ + TYPE a[NUM_ELEMS (TYPE)]; \ + TYPE b[NUM_ELEMS (TYPE)]; \ + TYPE r[NUM_ELEMS (TYPE)]; \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + { \ + a[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1); \ + b[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1); \ + asm volatile ("" ::: "memory"); \ + } \ + fun_##NAME##_##TYPE (r, a, b); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (r[i] != (a[i] CMP_OP b[i] ? a[i] : b[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1.C b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1.c index 8a5ce725bf1..27561d19694 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1.c @@ -1,23 +1,27 @@ /* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include <math.h> #define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) -#define DEF_MAXMIN(TYPE,FUN) \ -void test_##FUN (TYPE *__restrict__ r, TYPE *__restrict__ a, \ - TYPE *__restrict__ b) \ +#define DEF_MAXMIN(TYPE, FUN) \ +void __attribute__ ((noinline, noclone)) \ +test_##FUN##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ + TYPE *restrict b) \ { \ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ r[i] = FUN (a[i], b[i]); \ } -DEF_MAXMIN (float, fmaxf) -DEF_MAXMIN (double, fmax) +#define TEST_ALL(T) \ + T (float, fmaxf) \ + T (double, fmax) \ + \ + T (float, fminf) \ + T (double, fmin) -DEF_MAXMIN (float, fminf) -DEF_MAXMIN (double, fmin) +TEST_ALL (DEF_MAXMIN) /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.C deleted file mode 100644 index 06c868638e9..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.C +++ /dev/null @@ -1,56 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <stdlib.h> -#include <stdio.h> -#include "sve_maxmin_strict_1.C" - -#define DEF_INIT_VECTOR(TYPE) \ - TYPE a_##TYPE[NUM_ELEMS (TYPE)]; \ - TYPE b_##TYPE[NUM_ELEMS (TYPE)]; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ - { \ - a_##TYPE[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1); \ - b_##TYPE[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1); \ - } - -#define TEST_MAX(RES,FUN,TYPE) \ -{ \ - TYPE r_##TYPE[NUM_ELEMS (TYPE)]; \ - test_##FUN (r_##TYPE, a_##TYPE, b_##TYPE); \ - TYPE tmp = 0; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - tmp += r_##TYPE[i]; \ - (RES) += tmp; \ -} - -#define TEST_MIN(RES,FUN,TYPE) \ -{ \ - TYPE r_##TYPE[NUM_ELEMS (TYPE)]; \ - test_##FUN (r_##TYPE, a_##TYPE, b_##TYPE); \ - TYPE tmp = 0; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - tmp += r_##TYPE[i]; \ - (RES) += tmp; \ -} - -int main () -{ - double resultF = 0.0; - DEF_INIT_VECTOR (float) - DEF_INIT_VECTOR (double) - - TEST_MIN (resultF, fminf, float) - TEST_MIN (resultF, fmin, double) - - TEST_MAX (resultF, fmaxf, float) - TEST_MAX (resultF, fmax, double) - - if (resultF != -57) - { - fprintf (stderr, "resultF = %1.16lf\n", resultF); - abort (); - } - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.c new file mode 100644 index 00000000000..2b869c62a5d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_maxmin_strict_1.c" + +#define TEST_LOOP(TYPE, FUN) \ + { \ + TYPE a[NUM_ELEMS (TYPE)]; \ + TYPE b[NUM_ELEMS (TYPE)]; \ + TYPE r[NUM_ELEMS (TYPE)]; \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + { \ + a[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1); \ + b[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##FUN##_##TYPE (r, a, b); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (r[i] != FUN (a[i], b[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c index 329cba68ffb..a4d705e38ba 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c @@ -1,26 +1,26 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef char v32qi __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef long v4di __attribute__((vector_size(32))); +#include <stdint.h> -#define DO_OP(TYPE) \ -void vmla##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = (src1 * src2) + dst; \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (src1 * src2) + dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } DO_OP (v32qi) @@ -28,7 +28,7 @@ DO_OP (v16hi) DO_OP (v8si) DO_OP (v4di) -/* { dg-final { scan-assembler-times {\tmla\tz0.b, p[0-7]/m, z2.b, z4.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmla\tz0.h, p[0-7]/m, z2.h, z4.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmla\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmla\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmla\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c index abcfc5f40e9..b7cc1dba087 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c @@ -1,26 +1,26 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef char v32qi __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef long v4di __attribute__((vector_size(32))); +#include <stdint.h> -#define DO_OP(TYPE) \ -void vmls##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ - dst = dst - (src1 * src2); \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = dst - (src1 * src2); \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } DO_OP (v32qi) @@ -28,7 +28,7 @@ DO_OP (v16hi) DO_OP (v8si) DO_OP (v4di) -/* { dg-final { scan-assembler-times {\tmls\tz0.b, p[0-7]/m, z2.b, z4.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmls\tz0.h, p[0-7]/m, z2.h, z4.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmls\tz0.s, p[0-7]/m, z2.s, z4.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmls\tz0.d, p[0-7]/m, z2.d, z4.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmls\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c index d5a3a38442b..a38375af017 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c @@ -11,4 +11,4 @@ void sve_copy_rr (void) asm volatile ("#foo" :: "w" (y)); } -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+.d, z[0-9]+.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c index 132740b0866..fc05837a920 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c @@ -1,34 +1,34 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef char v32qi __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef long v4di __attribute__((vector_size(32))); +#include <stdint.h> -#define DO_OP(TYPE) \ -void vmla##TYPE (TYPE *_dst, TYPE _src1, TYPE _src2) \ -{ \ - register TYPE dst asm("z0"); \ - register TYPE src1 asm("z2"); \ - register TYPE src2 asm("z4"); \ - dst = *_dst; \ - asm volatile ("" :: "w" (dst)); \ - src1 = _src1; \ - asm volatile ("" :: "w" (src1)); \ - src2 = _src2; \ - asm volatile ("" :: "w" (src2)); \ +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ dst = src2 - (dst * src1); \ - asm volatile ("" :: "w" (dst)); \ - *_dst = dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ } -DO_OP(v32qi) -DO_OP(v16hi) -DO_OP(v8si) -DO_OP(v4di) +DO_OP (v32qi) +DO_OP (v16hi) +DO_OP (v8si) +DO_OP (v4di) -/* { dg-final { scan-assembler-times {\tmsb\tz0.b, p[0-7]/m, z2.b, z4.b} 1 } } */ -/* { dg-final { scan-assembler-times {\tmsb\tz0.h, p[0-7]/m, z2.h, z4.h} 1 } } */ -/* { dg-final { scan-assembler-times {\tmsb\tz0.s, p[0-7]/m, z2.s, z4.s} 1 } } */ -/* { dg-final { scan-assembler-times {\tmsb\tz0.d, p[0-7]/m, z2.d, z4.d} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mul_1.c index ae6f8688c58..2b1cd4a7a93 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_mul_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_mul_1.c @@ -1,34 +1,36 @@ /* { dg-do assemble } */ -/* { dg-options {-std=c99 -O3 -march=armv8-a+sve --save-temps} } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> #define DO_REGREG_OPS(TYPE, OP, NAME) \ -void varith_##TYPE##_##NAME (TYPE* dst, TYPE* src, int count) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = dst[i] OP src[i]; \ } #define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ -void varithimm_##NAME##_##TYPE (TYPE* dst, int count) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = dst[i] OP VALUE; \ } #define DO_ARITH_OPS(TYPE, OP, NAME) \ -DO_REGREG_OPS (TYPE, OP, NAME); \ -DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0); \ -DO_IMMEDIATE_OPS (86, TYPE, OP, NAME ## 86); \ -DO_IMMEDIATE_OPS (109, TYPE, OP, NAME ## 109); \ -DO_IMMEDIATE_OPS (141, TYPE, OP, NAME ## 141); \ -DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); \ -DO_IMMEDIATE_OPS (-110, TYPE, OP, NAME ## minus110); \ -DO_IMMEDIATE_OPS (-141, TYPE, OP, NAME ## minus141); - -DO_ARITH_OPS (char, *, mul) -DO_ARITH_OPS (short, *, mul) -DO_ARITH_OPS (int, *, mul) -DO_ARITH_OPS (long, *, mul) + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0); \ + DO_IMMEDIATE_OPS (86, TYPE, OP, NAME ## 86); \ + DO_IMMEDIATE_OPS (109, TYPE, OP, NAME ## 109); \ + DO_IMMEDIATE_OPS (141, TYPE, OP, NAME ## 141); \ + DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); \ + DO_IMMEDIATE_OPS (-110, TYPE, OP, NAME ## minus110); \ + DO_IMMEDIATE_OPS (-141, TYPE, OP, NAME ## minus141); + +DO_ARITH_OPS (int8_t, *, mul) +DO_ARITH_OPS (int16_t, *, mul) +DO_ARITH_OPS (int32_t, *, mul) +DO_ARITH_OPS (int64_t, *, mul) /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_neg_1.c b/gcc/testsuite/gcc.target/aarch64/sve_neg_1.c index 8e5e8e58b07..b463c2c0580 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_neg_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_neg_1.c @@ -1,17 +1,21 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> #define DO_OPS(TYPE) \ -void vneg_##TYPE (TYPE* dst, TYPE* src, int count) \ +void vneg_##TYPE (TYPE *dst, TYPE *src, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = -src[i]; \ } -DO_OPS (char) -DO_OPS (int) -DO_OPS (long) +DO_OPS (int8_t) +DO_OPS (int16_t) +DO_OPS (int32_t) +DO_OPS (int64_t) /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1.c b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1.c index 8f54a2a3143..3871451bc1d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1.c @@ -1,25 +1,30 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ -#define DO_VNLOGICAL(TYPE) \ -void __attribute__ ((weak)) \ -vnlogical_not_##TYPE (TYPE *dst, unsigned long count) \ -{ \ - for (int i = 0; i < count; i++) \ - dst[i] = ~dst[i]; \ -} \ - \ -void __attribute__ ((weak)) \ -vnlogical_bic_##TYPE (TYPE *dst, TYPE *src, unsigned long count) \ -{ \ - for (int i = 0; i < count; i++) \ - dst[i] = dst[i] & ~src[i]; \ +#include <stdint.h> + +#define DO_VNLOGICAL(TYPE) \ +void __attribute__ ((noinline, noclone)) \ +vnlogical_not_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; i++) \ + dst[i] = ~dst[i]; \ +} \ + \ +void __attribute__ ((noinline, noclone)) \ +vnlogical_bic_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; i++) \ + dst[i] = dst[i] & ~src[i]; \ } -DO_VNLOGICAL (char) -DO_VNLOGICAL (short) -DO_VNLOGICAL (int) -DO_VNLOGICAL (long) +#define TEST_ALL(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) + +TEST_ALL (DO_VNLOGICAL) /* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1_run.c index ca3f47134fa..905d44b8265 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1_run.c @@ -9,7 +9,10 @@ { \ TYPE dst[N], src[N]; \ for (int i = 0; i < N; ++i) \ - dst[i] = i ^ 42; \ + { \ + dst[i] = i ^ 42; \ + asm volatile ("" ::: "memory"); \ + } \ vnlogical_not_##TYPE (dst, N); \ for (int i = 0; i < N; ++i) \ if (dst[i] != (TYPE) ~(i ^ 42)) \ @@ -18,6 +21,7 @@ { \ dst[i] = i ^ 42; \ src[i] = i % 5; \ + asm volatile ("" ::: "memory"); \ } \ vnlogical_bic_##TYPE (dst, src, N); \ for (int i = 0; i < N; ++i) \ @@ -25,12 +29,9 @@ __builtin_abort (); \ } -int +int __attribute__ ((optimize (1))) main (void) { - TEST_VNLOGICAL (char); - TEST_VNLOGICAL (short); - TEST_VNLOGICAL (int); - TEST_VNLOGICAL (long); + TEST_ALL (TEST_VNLOGICAL) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_1.c index 12fa945a794..723b4e3433b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_1.c @@ -1,20 +1,25 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -#define PACK(TYPED, TYPES, SIGN) \ -void pack_##TYPED##_##TYPES##_##SIGN (SIGN TYPED *d, \ - SIGN TYPES *s, int size) \ -{ \ - for (int i = 0; i < size; i++) \ - d[i] = s[i] + 1; \ +#include <stdint.h> + +#define PACK(TYPED, TYPES) \ +void __attribute__ ((noinline, noclone)) \ +pack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ +{ \ + for (int i = 0; i < size; i++) \ + d[i] = s[i] + 1; \ } -PACK (int, long, signed) \ -PACK (short, int, signed) \ -PACK (char, short, signed) \ -PACK (int, long, unsigned) \ -PACK (short, int, unsigned) \ -PACK (char, short, unsigned) +#define TEST_ALL(T) \ + T (int32_t, int64_t) \ + T (int16_t, int32_t) \ + T (int8_t, int16_t) \ + T (uint32_t, uint64_t) \ + T (uint16_t, uint32_t) \ + T (uint8_t, uint16_t) + +TEST_ALL (PACK) /* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_1_run.c index 208889f86b8..cb7876cb135 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_1_run.c @@ -1,46 +1,28 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_pack_1.c" #define ARRAY_SIZE 57 -#define RUN_AND_CHECK_LOOP(TYPED, TYPES, VALUED, VALUES) \ -{ \ - int value = 0; \ - TYPED arrayd[ARRAY_SIZE]; \ - TYPES arrays[ARRAY_SIZE]; \ - memset (arrayd, 67, ARRAY_SIZE * sizeof (TYPED)); \ - memset (arrays, VALUES, ARRAY_SIZE * sizeof (TYPES)); \ - pack_##TYPED##_##TYPES##_signed (arrayd, arrays, ARRAY_SIZE); \ - for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != VALUED) \ - { \ - fprintf (stderr,"%d: %d != %d\n", i, arrayd[i], VALUED); \ - exit (1); \ - } \ - memset (arrayd, 74, ARRAY_SIZE*sizeof (TYPED)); \ - pack_##TYPED##_##TYPES##_unsigned (arrayd, arrays, ARRAY_SIZE); \ - for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != VALUED) \ - { \ - fprintf (stderr,"%d: %d != %d\n", i, arrayd[i], VALUED); \ - exit (1); \ - } \ -} +#define TEST_LOOP(TYPED, TYPES) \ + { \ + TYPED arrayd[ARRAY_SIZE]; \ + TYPES arrays[ARRAY_SIZE]; \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + { \ + arrays[i] = (i - 10) * 3; \ + asm volatile ("" ::: "memory"); \ + } \ + pack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + __builtin_abort (); \ + } -int main (void) +int __attribute__ ((optimize (1))) +main (void) { - int total = 5; - RUN_AND_CHECK_LOOP (char, short, total + 1, total); - total = (total << 8) + 5; - RUN_AND_CHECK_LOOP (short, int, total + 1, total); - total = (total << 8) + 5; - total = (total << 8) + 5; - RUN_AND_CHECK_LOOP (int, long, total + 1, total); + TEST_ALL (TEST_LOOP) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1.c index 2d1918cc2cd..a99d227e4c8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1.c @@ -1,7 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -void pack_int_double_plus_3 (signed int *d, double *s, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +pack_int_double_plus_3 (int32_t *d, double *s, int size) { for (int i = 0; i < size; i++) d[i] = s[i] + 3; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1_run.c index 11d85fc8eb0..2a45bb5b1e8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1_run.c @@ -1,9 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_pack_fcvt_signed_1.c" @@ -14,19 +10,19 @@ int __attribute__ ((optimize (1))) main (void) { - static signed int array_dest[ARRAY_SIZE]; + static int32_t array_dest[ARRAY_SIZE]; double array_source[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) - array_source[i] = VAL1; + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } pack_int_double_plus_3 (array_dest, array_source, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_dest[i] != (int) VAL1 + 3) - { - fprintf (stderr,"%d: %d != %d\n", i, array_dest[i], (int) VAL1 + 3); - exit (1); - } + if (array_dest[i] != (int32_t) VAL1 + 3) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1.c index f7692989a71..a039d6fdd66 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1.c @@ -1,7 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -void pack_int_double_plus_7 (unsigned int *d, double *s, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +pack_int_double_plus_7 (uint32_t *d, double *s, int size) { for (int i = 0; i < size; i++) d[i] = s[i] + 7; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1_run.c index 196b6de358a..8a1e72485ad 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1_run.c @@ -1,9 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_pack_fcvt_unsigned_1.c" @@ -14,19 +10,19 @@ int __attribute__ ((optimize (1))) main (void) { - static unsigned int array_dest[ARRAY_SIZE]; + static uint32_t array_dest[ARRAY_SIZE]; double array_source[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) - array_source[i] = VAL1; + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } pack_int_double_plus_7 (array_dest, array_source, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_dest[i] != (int) VAL1 + 7) - { - fprintf (stderr,"%d: %d != %d\n", i, array_dest[i], (int) VAL1 + 7); - exit (1); - } + if (array_dest[i] != (uint32_t) VAL1 + 7) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1.c index 7faf7652e75..746154e530d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1.c @@ -1,7 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -void pack_float_plus_1point1 (float *d, double *s, int size) +void __attribute__ ((noinline, noclone)) +pack_float_plus_1point1 (float *d, double *s, int size) { for (int i = 0; i < size; i++) d[i] = s[i] + 1.1; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1_run.c index 85a7eca9173..91e8a699f0b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1_run.c @@ -1,9 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_pack_float_1.c" @@ -18,16 +14,15 @@ main (void) double array_source[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) - array_source[i] = VAL1; + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } pack_float_plus_1point1 (array_dest, array_source, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) if (array_dest[i] != (float) (VAL1 + 1.1)) - { - fprintf (stderr, "%d: %f != %f\n", i, array_dest[i], - (float) (VAL1 + 1.1)); - exit (1); - } + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_popcount_1.c b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1.c index 0e640dab810..c3bb2756b2a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_popcount_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1.c @@ -1,16 +1,17 @@ /* { dg-do assemble } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -void -popcount_32 (unsigned int *restrict dst, unsigned int *restrict src, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +popcount_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) { for (int i = 0; i < size; ++i) dst[i] = __builtin_popcount (src[i]); } -void -popcount_64 (unsigned int *restrict dst, unsigned long *restrict src, - int size) +void __attribute__ ((noinline, noclone)) +popcount_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) { for (int i = 0; i < size; ++i) dst[i] = __builtin_popcountl (src[i]); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_popcount_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1_run.c index 9ef47bcbf2c..6be828fa81a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_popcount_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_popcount_1.c" @@ -16,24 +16,31 @@ unsigned int data[] = { 0x0, 0 }; -int +int __attribute__ ((optimize (1))) main (void) { unsigned int count = sizeof (data) / sizeof (data[0]) / 2; - unsigned int in32[count], out32[count]; + uint32_t in32[count]; + unsigned int out32[count]; for (unsigned int i = 0; i < count; ++i) - in32[i] = data[i * 2]; + { + in32[i] = data[i * 2]; + asm volatile ("" ::: "memory"); + } popcount_32 (out32, in32, count); for (unsigned int i = 0; i < count; ++i) if (out32[i] != data[i * 2 + 1]) abort (); count /= 2; - unsigned long in64[count]; + uint64_t in64[count]; unsigned int out64[count]; for (unsigned int i = 0; i < count; ++i) - in64[i] = ((unsigned long) data[i * 4] << 32) | data[i * 4 + 2]; + { + in64[i] = ((uint64_t) data[i * 4] << 32) | data[i * 4 + 2]; + asm volatile ("" ::: "memory"); + } popcount_64 (out64, in64, count); for (unsigned int i = 0; i < count; ++i) if (out64[i] != data[i * 4 + 1] + data[i * 4 + 3]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c index da3b5fa1963..4c26e78fae8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c @@ -1,10 +1,11 @@ /* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -ffast-math -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include <stdint.h> #define DEF_REDUC_PLUS(TYPE) \ -TYPE reduc_plus_##TYPE (TYPE *a, int n) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_plus_##TYPE (TYPE *a, int n) \ { \ TYPE r = 0; \ for (int i = 0; i < n; ++i) \ @@ -12,19 +13,24 @@ TYPE reduc_plus_##TYPE (TYPE *a, int n) \ return r; \ } -DEF_REDUC_PLUS (int8_t) -DEF_REDUC_PLUS (int16_t) -DEF_REDUC_PLUS (int32_t) -DEF_REDUC_PLUS (int64_t) -DEF_REDUC_PLUS (uint8_t) -DEF_REDUC_PLUS (uint16_t) -DEF_REDUC_PLUS (uint32_t) -DEF_REDUC_PLUS (uint64_t) -DEF_REDUC_PLUS (float) -DEF_REDUC_PLUS (double) - -#define DEF_REDUC_MAXMIN(TYPE,NAME,CMP_OP) \ -TYPE reduc_##NAME##TYPE (TYPE *a, int n) \ +#define TEST_PLUS(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) \ + T (uint8_t) \ + T (uint16_t) \ + T (uint32_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_PLUS (DEF_REDUC_PLUS) + +#define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE *a, int n) \ { \ TYPE r = 13; \ for (int i = 0; i < n; ++i) \ @@ -32,30 +38,36 @@ TYPE reduc_##NAME##TYPE (TYPE *a, int n) \ return r; \ } -DEF_REDUC_MAXMIN (int8_t, max, >) -DEF_REDUC_MAXMIN (int16_t, max, >) -DEF_REDUC_MAXMIN (int32_t, max, >) -DEF_REDUC_MAXMIN (int64_t, max, >) -DEF_REDUC_MAXMIN (uint8_t, max, >) -DEF_REDUC_MAXMIN (uint16_t, max, >) -DEF_REDUC_MAXMIN (uint32_t, max, >) -DEF_REDUC_MAXMIN (uint64_t, max, >) -DEF_REDUC_MAXMIN (float, max, >) -DEF_REDUC_MAXMIN (double, max, >) - -DEF_REDUC_MAXMIN (int8_t, min, <) -DEF_REDUC_MAXMIN (int16_t, min, <) -DEF_REDUC_MAXMIN (int32_t, min, <) -DEF_REDUC_MAXMIN (int64_t, min, <) -DEF_REDUC_MAXMIN (uint8_t, min, <) -DEF_REDUC_MAXMIN (uint16_t, min, <) -DEF_REDUC_MAXMIN (uint32_t, min, <) -DEF_REDUC_MAXMIN (uint64_t, min, <) -DEF_REDUC_MAXMIN (float, min, <) -DEF_REDUC_MAXMIN (double, min, <) - -#define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP) \ -TYPE reduc_##NAME##TYPE (TYPE *a, int n) \ +#define TEST_MAXMIN(T) \ + T (int8_t, max, >) \ + T (int16_t, max, >) \ + T (int32_t, max, >) \ + T (int64_t, max, >) \ + T (uint8_t, max, >) \ + T (uint16_t, max, >) \ + T (uint32_t, max, >) \ + T (uint64_t, max, >) \ + T (_Float16, max, >) \ + T (float, max, >) \ + T (double, max, >) \ + \ + T (int8_t, min, <) \ + T (int16_t, min, <) \ + T (int32_t, min, <) \ + T (int64_t, min, <) \ + T (uint8_t, min, <) \ + T (uint16_t, min, <) \ + T (uint32_t, min, <) \ + T (uint64_t, min, <) \ + T (_Float16, min, <) \ + T (float, min, <) \ + T (double, min, <) + +TEST_MAXMIN (DEF_REDUC_MAXMIN) + +#define DEF_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE *a, int n) \ { \ TYPE r = 13; \ for (int i = 0; i < n; ++i) \ @@ -63,80 +75,93 @@ TYPE reduc_##NAME##TYPE (TYPE *a, int n) \ return r; \ } -DEF_REDUC_BITWISE (int8_t, and, &=) -DEF_REDUC_BITWISE (int16_t, and, &=) -DEF_REDUC_BITWISE (int32_t, and, &=) -DEF_REDUC_BITWISE (int64_t, and, &=) -DEF_REDUC_BITWISE (uint8_t, and, &=) -DEF_REDUC_BITWISE (uint16_t, and, &=) -DEF_REDUC_BITWISE (uint32_t, and, &=) -DEF_REDUC_BITWISE (uint64_t, and, &=) - -DEF_REDUC_BITWISE (int8_t, ior, |=) -DEF_REDUC_BITWISE (int16_t, ior, |=) -DEF_REDUC_BITWISE (int32_t, ior, |=) -DEF_REDUC_BITWISE (int64_t, ior, |=) -DEF_REDUC_BITWISE (uint8_t, ior, |=) -DEF_REDUC_BITWISE (uint16_t, ior, |=) -DEF_REDUC_BITWISE (uint32_t, ior, |=) -DEF_REDUC_BITWISE (uint64_t, ior, |=) - -DEF_REDUC_BITWISE (int8_t, xor, ^=) -DEF_REDUC_BITWISE (int16_t, xor, ^=) -DEF_REDUC_BITWISE (int32_t, xor, ^=) -DEF_REDUC_BITWISE (int64_t, xor, ^=) -DEF_REDUC_BITWISE (uint8_t, xor, ^=) -DEF_REDUC_BITWISE (uint16_t, xor, ^=) -DEF_REDUC_BITWISE (uint32_t, xor, ^=) -DEF_REDUC_BITWISE (uint64_t, xor, ^=) - -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 2 } } */ - -/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 1 } } */ - -/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 1 } } */ - -/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 1 } } */ - -/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 1 } } */ - -/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 1 } } */ - -/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 2 } } */ - -/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 2 } } */ - -/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m, z[0-9]\.b, z[0-9]\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]\.h, z[0-9]\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]\.s, z[0-9]\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]\.d, z[0-9]\.d\n} 2 } } */ +#define TEST_BITWISE(T) \ + T (int8_t, and, &=) \ + T (int16_t, and, &=) \ + T (int32_t, and, &=) \ + T (int64_t, and, &=) \ + T (uint8_t, and, &=) \ + T (uint16_t, and, &=) \ + T (uint32_t, and, &=) \ + T (uint64_t, and, &=) \ + \ + T (int8_t, ior, |=) \ + T (int16_t, ior, |=) \ + T (int32_t, ior, |=) \ + T (int64_t, ior, |=) \ + T (uint8_t, ior, |=) \ + T (uint16_t, ior, |=) \ + T (uint32_t, ior, |=) \ + T (uint64_t, ior, |=) \ + \ + T (int8_t, xor, ^=) \ + T (int16_t, xor, ^=) \ + T (int32_t, xor, ^=) \ + T (int64_t, xor, ^=) \ + T (uint8_t, xor, ^=) \ + T (uint16_t, xor, ^=) \ + T (uint32_t, xor, ^=) \ + T (uint64_t, xor, ^=) + +TEST_BITWISE (DEF_REDUC_BITWISE) + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ @@ -148,6 +173,7 @@ DEF_REDUC_BITWISE (uint64_t, xor, ^=) /* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ @@ -159,6 +185,7 @@ DEF_REDUC_BITWISE (uint64_t, xor, ^=) /* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.C deleted file mode 100644 index 17c978de7f7..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.C +++ /dev/null @@ -1,117 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -ffast-math -fno-inline -march=armv8-a+sve" } */ - -#include "sve_reduc_1.C" - -#include <stdlib.h> -#include <stdio.h> - -#define NUM_ELEMS(TYPE) (73 + sizeof (TYPE)) - -#define DEF_INIT_VECTOR(TYPE) \ - TYPE r_##TYPE[NUM_ELEMS (TYPE) + 1]; \ - for (int i = 0; i < NUM_ELEMS (TYPE) + 1; i++) \ - r_##TYPE[i] = (i * 2) * (i & 1 ? 1 : -1); - -#define TEST_REDUC_PLUS(RES,TYPE) \ - (RES) += reduc_plus_##TYPE (r_##TYPE, NUM_ELEMS (TYPE)); -#define TEST_REDUC_MAX(RES,TYPE) \ - (RES) += reduc_max##TYPE (r_##TYPE, NUM_ELEMS (TYPE)); -#define TEST_REDUC_MIN(RES,TYPE) \ - (RES) += reduc_min##TYPE (r_##TYPE, NUM_ELEMS (TYPE)); -#define TEST_REDUC_AND(RES,TYPE) \ - (RES) += reduc_and##TYPE (r_##TYPE, NUM_ELEMS (TYPE)); -#define TEST_REDUC_IOR(RES,TYPE) \ - (RES) += reduc_ior##TYPE (r_##TYPE, NUM_ELEMS (TYPE)); -#define TEST_REDUC_XOR(RES,TYPE) \ - (RES) += reduc_xor##TYPE (r_##TYPE, NUM_ELEMS (TYPE)); - -int main () -{ - int result = 0; - double resultF = 0.0; - DEF_INIT_VECTOR (int8_t) - DEF_INIT_VECTOR (int16_t) - DEF_INIT_VECTOR (int32_t) - DEF_INIT_VECTOR (int64_t) - DEF_INIT_VECTOR (uint8_t) - DEF_INIT_VECTOR (uint16_t) - DEF_INIT_VECTOR (uint32_t) - DEF_INIT_VECTOR (uint64_t) - DEF_INIT_VECTOR (float) - DEF_INIT_VECTOR (double) - - TEST_REDUC_PLUS (result, int8_t) - TEST_REDUC_PLUS (result, int16_t) - TEST_REDUC_PLUS (result, int32_t) - TEST_REDUC_PLUS (result, int64_t) - TEST_REDUC_PLUS (result, uint8_t) - TEST_REDUC_PLUS (result, uint16_t) - TEST_REDUC_PLUS (result, uint32_t) - TEST_REDUC_PLUS (result, uint64_t) - TEST_REDUC_PLUS (resultF, float) - TEST_REDUC_PLUS (resultF, double) - - TEST_REDUC_MIN (result, int8_t) - TEST_REDUC_MIN (result, int16_t) - TEST_REDUC_MIN (result, int32_t) - TEST_REDUC_MIN (result, int64_t) - TEST_REDUC_MIN (result, uint8_t) - TEST_REDUC_MIN (result, uint16_t) - TEST_REDUC_MIN (result, uint32_t) - TEST_REDUC_MIN (result, uint64_t) - TEST_REDUC_MIN (resultF, float) - TEST_REDUC_MIN (resultF, double) - - TEST_REDUC_MAX (result, int8_t) - TEST_REDUC_MAX (result, int16_t) - TEST_REDUC_MAX (result, int32_t) - TEST_REDUC_MAX (result, int64_t) - TEST_REDUC_MAX (result, uint8_t) - TEST_REDUC_MAX (result, uint16_t) - TEST_REDUC_MAX (result, uint32_t) - TEST_REDUC_MAX (result, uint64_t) - TEST_REDUC_MAX (resultF, float) - TEST_REDUC_MAX (resultF, double) - - TEST_REDUC_AND (result, int8_t) - TEST_REDUC_AND (result, int16_t) - TEST_REDUC_AND (result, int32_t) - TEST_REDUC_AND (result, int64_t) - TEST_REDUC_AND (result, uint8_t) - TEST_REDUC_AND (result, uint16_t) - TEST_REDUC_AND (result, uint32_t) - TEST_REDUC_AND (result, uint64_t) - - TEST_REDUC_IOR (result, int8_t) - TEST_REDUC_IOR (result, int16_t) - TEST_REDUC_IOR (result, int32_t) - TEST_REDUC_IOR (result, int64_t) - TEST_REDUC_IOR (result, uint8_t) - TEST_REDUC_IOR (result, uint16_t) - TEST_REDUC_IOR (result, uint32_t) - TEST_REDUC_IOR (result, uint64_t) - - TEST_REDUC_XOR (result, int8_t) - TEST_REDUC_XOR (result, int16_t) - TEST_REDUC_XOR (result, int32_t) - TEST_REDUC_XOR (result, int64_t) - TEST_REDUC_XOR (result, uint8_t) - TEST_REDUC_XOR (result, uint16_t) - TEST_REDUC_XOR (result, uint32_t) - TEST_REDUC_XOR (result, uint64_t) - - if (result != 262400) - { - fprintf (stderr, "result = %d\n", result); - abort (); - } - - if (resultF != -160) - { - fprintf (stderr, "resultF = %1.16lf\n", resultF); - abort (); - } - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.c new file mode 100644 index 00000000000..9f4afbcf3a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.c @@ -0,0 +1,56 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include "sve_reduc_1.c" + +#define NUM_ELEMS(TYPE) (73 + sizeof (TYPE)) + +#define INIT_VECTOR(TYPE) \ + TYPE a[NUM_ELEMS (TYPE) + 1]; \ + for (int i = 0; i < NUM_ELEMS (TYPE) + 1; i++) \ + { \ + a[i] = ((i * 2) * (i & 1 ? 1 : -1) | 3); \ + asm volatile ("" ::: "memory"); \ + } + +#define TEST_REDUC_PLUS(TYPE) \ + { \ + INIT_VECTOR (TYPE); \ + TYPE r1 = reduc_plus_##TYPE (a, NUM_ELEMS (TYPE)); \ + volatile TYPE r2 = 0; \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + r2 += a[i]; \ + if (r1 != r2) \ + __builtin_abort (); \ + } + +#define TEST_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ + { \ + INIT_VECTOR (TYPE); \ + TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE)); \ + volatile TYPE r2 = 13; \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + r2 = a[i] CMP_OP r2 ? a[i] : r2; \ + if (r1 != r2) \ + __builtin_abort (); \ + } + +#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ + { \ + INIT_VECTOR (TYPE); \ + TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE)); \ + volatile TYPE r2 = 13; \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + r2 BIT_OP a[i]; \ + if (r1 != r2) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_PLUS (TEST_REDUC_PLUS) + TEST_MAXMIN (TEST_REDUC_MAXMIN) + TEST_BITWISE (TEST_REDUC_BITWISE) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_2.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2.c index 6ac37570164..669306549d3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_2.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2.c @@ -1,109 +1,126 @@ /* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -ffast-math -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ #include <stdint.h> #define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) -#define DEF_REDUC_PLUS(TYPE) \ -void reduc_plus_##TYPE (TYPE (*__restrict__ a)[NUM_ELEMS (TYPE)], \ - TYPE *__restrict__ r, int n) \ -{ \ - for (int i = 0; i < n; i++) \ - { \ - r[i] = 0; \ - for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ - r[i] += a[i][j]; \ - } \ +#define DEF_REDUC_PLUS(TYPE) \ +void __attribute__ ((noinline, noclone)) \ +reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = 0; \ + for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ + r[i] += a[i][j]; \ + } \ } -DEF_REDUC_PLUS (int8_t) -DEF_REDUC_PLUS (int16_t) -DEF_REDUC_PLUS (int32_t) -DEF_REDUC_PLUS (int64_t) -DEF_REDUC_PLUS (uint8_t) -DEF_REDUC_PLUS (uint16_t) -DEF_REDUC_PLUS (uint32_t) -DEF_REDUC_PLUS (uint64_t) -DEF_REDUC_PLUS (float) -DEF_REDUC_PLUS (double) - -#define DEF_REDUC_MAXMIN(TYPE,NAME,CMP_OP) \ -void reduc_##NAME##TYPE (TYPE (*__restrict__ a)[NUM_ELEMS (TYPE)], \ - TYPE *__restrict__ r, int n) \ -{ \ - for (int i = 0; i < n; i++) \ - { \ - r[i] = a[i][0]; \ - for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ - r[i] = a[i][j] CMP_OP r[i] ? a[i][j] : r[i]; \ - } \ +#define TEST_PLUS(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) \ + T (uint8_t) \ + T (uint16_t) \ + T (uint32_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_PLUS (DEF_REDUC_PLUS) + +#define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ +void __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = a[i][0]; \ + for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ + r[i] = a[i][j] CMP_OP r[i] ? a[i][j] : r[i]; \ + } \ } -DEF_REDUC_MAXMIN (int8_t, max, >) -DEF_REDUC_MAXMIN (int16_t, max, >) -DEF_REDUC_MAXMIN (int32_t, max, >) -DEF_REDUC_MAXMIN (int64_t, max, >) -DEF_REDUC_MAXMIN (uint8_t, max, >) -DEF_REDUC_MAXMIN (uint16_t, max, >) -DEF_REDUC_MAXMIN (uint32_t, max, >) -DEF_REDUC_MAXMIN (uint64_t, max, >) -DEF_REDUC_MAXMIN (float, max, >) -DEF_REDUC_MAXMIN (double, max, >) - -DEF_REDUC_MAXMIN (int8_t, min, <) -DEF_REDUC_MAXMIN (int16_t, min, <) -DEF_REDUC_MAXMIN (int32_t, min, <) -DEF_REDUC_MAXMIN (int64_t, min, <) -DEF_REDUC_MAXMIN (uint8_t, min, <) -DEF_REDUC_MAXMIN (uint16_t, min, <) -DEF_REDUC_MAXMIN (uint32_t, min, <) -DEF_REDUC_MAXMIN (uint64_t, min, <) -DEF_REDUC_MAXMIN (float, min, <) -DEF_REDUC_MAXMIN (double, min, <) - -#define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP)\ -void reduc_##NAME##TYPE (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\ -{\ - for (int i = 0; i < n; i++)\ - {\ - r[i] = a[i][0];\ - for (int j = 0; j < NUM_ELEMS(TYPE); j++)\ - r[i] BIT_OP a[i][j];\ - }\ -}\ - -DEF_REDUC_BITWISE (int8_t, and, &=) -DEF_REDUC_BITWISE (int16_t, and, &=) -DEF_REDUC_BITWISE (int32_t, and, &=) -DEF_REDUC_BITWISE (int64_t, and, &=) -DEF_REDUC_BITWISE (uint8_t, and, &=) -DEF_REDUC_BITWISE (uint16_t, and, &=) -DEF_REDUC_BITWISE (uint32_t, and, &=) -DEF_REDUC_BITWISE (uint64_t, and, &=) - -DEF_REDUC_BITWISE (int8_t, ior, |=) -DEF_REDUC_BITWISE (int16_t, ior, |=) -DEF_REDUC_BITWISE (int32_t, ior, |=) -DEF_REDUC_BITWISE (int64_t, ior, |=) -DEF_REDUC_BITWISE (uint8_t, ior, |=) -DEF_REDUC_BITWISE (uint16_t, ior, |=) -DEF_REDUC_BITWISE (uint32_t, ior, |=) -DEF_REDUC_BITWISE (uint64_t, ior, |=) - -DEF_REDUC_BITWISE (int8_t, xor, ^=) -DEF_REDUC_BITWISE (int16_t, xor, ^=) -DEF_REDUC_BITWISE (int32_t, xor, ^=) -DEF_REDUC_BITWISE (int64_t, xor, ^=) -DEF_REDUC_BITWISE (uint8_t, xor, ^=) -DEF_REDUC_BITWISE (uint16_t, xor, ^=) -DEF_REDUC_BITWISE (uint32_t, xor, ^=) -DEF_REDUC_BITWISE (uint64_t, xor, ^=) +#define TEST_MAXMIN(T) \ + T (int8_t, max, >) \ + T (int16_t, max, >) \ + T (int32_t, max, >) \ + T (int64_t, max, >) \ + T (uint8_t, max, >) \ + T (uint16_t, max, >) \ + T (uint32_t, max, >) \ + T (uint64_t, max, >) \ + T (_Float16, max, >) \ + T (float, max, >) \ + T (double, max, >) \ + \ + T (int8_t, min, <) \ + T (int16_t, min, <) \ + T (int32_t, min, <) \ + T (int64_t, min, <) \ + T (uint8_t, min, <) \ + T (uint16_t, min, <) \ + T (uint32_t, min, <) \ + T (uint64_t, min, <) \ + T (_Float16, min, <) \ + T (float, min, <) \ + T (double, min, <) + +TEST_MAXMIN (DEF_REDUC_MAXMIN) + +#define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP) \ +void __attribute__ ((noinline, noclone)) \ +reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = a[i][0]; \ + for (int j = 0; j < NUM_ELEMS(TYPE); j++) \ + r[i] BIT_OP a[i][j]; \ + } \ +} + +#define TEST_BITWISE(T) \ + T (int8_t, and, &=) \ + T (int16_t, and, &=) \ + T (int32_t, and, &=) \ + T (int64_t, and, &=) \ + T (uint8_t, and, &=) \ + T (uint16_t, and, &=) \ + T (uint32_t, and, &=) \ + T (uint64_t, and, &=) \ + \ + T (int8_t, ior, |=) \ + T (int16_t, ior, |=) \ + T (int32_t, ior, |=) \ + T (int64_t, ior, |=) \ + T (uint8_t, ior, |=) \ + T (uint16_t, ior, |=) \ + T (uint32_t, ior, |=) \ + T (uint64_t, ior, |=) \ + \ + T (int8_t, xor, ^=) \ + T (int16_t, xor, ^=) \ + T (int32_t, xor, ^=) \ + T (int64_t, xor, ^=) \ + T (uint8_t, xor, ^=) \ + T (uint16_t, xor, ^=) \ + T (uint32_t, xor, ^=) \ + T (uint64_t, xor, ^=) + +TEST_BITWISE (DEF_REDUC_BITWISE) /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ @@ -115,6 +132,7 @@ DEF_REDUC_BITWISE (uint64_t, xor, ^=) /* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ @@ -126,6 +144,7 @@ DEF_REDUC_BITWISE (uint64_t, xor, ^=) /* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.C deleted file mode 100644 index 6f170fb0de6..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.C +++ /dev/null @@ -1,135 +0,0 @@ -/* { dg-do run { target { aarch64_sve_hw } } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -ffast-math -fno-inline -march=armv8-a+sve" } */ - -#include "sve_reduc_2.C" - -#include <stdlib.h> -#include <stdio.h> -#include <math.h> - -#define NROWS 5 - -#define DEF_INIT_VECTOR(TYPE) \ - TYPE mat_##TYPE[NROWS][NUM_ELEMS (TYPE)]; \ - TYPE r_##TYPE[NROWS]; \ - for (int i = 0; i < NROWS; i++) \ - for (int j = 0; j < NUM_ELEMS (TYPE); j++ ) \ - mat_##TYPE[i][j] = i + (j * 2) * (j & 1 ? 1 : -1); - -#define TEST_REDUC_PLUS(TYPE) reduc_plus_##TYPE (mat_##TYPE, r_##TYPE, NROWS); -#define TEST_REDUC_MAX(TYPE) reduc_max##TYPE (mat_##TYPE, r_##TYPE, NROWS); -#define TEST_REDUC_MIN(TYPE) reduc_min##TYPE (mat_##TYPE, r_##TYPE, NROWS); -#define TEST_REDUC_AND(TYPE) reduc_and##TYPE (mat_##TYPE, r_##TYPE, NROWS); -#define TEST_REDUC_IOR(TYPE) reduc_ior##TYPE (mat_##TYPE, r_##TYPE, NROWS); -#define TEST_REDUC_XOR(TYPE) reduc_xor##TYPE (mat_##TYPE, r_##TYPE, NROWS); - -#define SUM_VECTOR(RES, TYPE)\ - for (int i = 0; i < NROWS; i++)\ - (RES) += r_##TYPE[i]; - -#define SUM_INT_RESULT(RES)\ - SUM_VECTOR (RES, int8_t);\ - SUM_VECTOR (RES, int16_t);\ - SUM_VECTOR (RES, int32_t);\ - SUM_VECTOR (RES, int64_t);\ - SUM_VECTOR (RES, uint8_t);\ - SUM_VECTOR (RES, uint16_t);\ - SUM_VECTOR (RES, uint32_t);\ - SUM_VECTOR (RES, uint64_t);\ - -#define SUM_FLOAT_RESULT(RES)\ - SUM_VECTOR (RES, float);\ - SUM_VECTOR (RES, double);\ - -int main () -{ - int result = 0; - double resultF = 0.0; - DEF_INIT_VECTOR (int8_t) - DEF_INIT_VECTOR (int16_t) - DEF_INIT_VECTOR (int32_t) - DEF_INIT_VECTOR (int64_t) - DEF_INIT_VECTOR (uint8_t) - DEF_INIT_VECTOR (uint16_t) - DEF_INIT_VECTOR (uint32_t) - DEF_INIT_VECTOR (uint64_t) - DEF_INIT_VECTOR (float) - DEF_INIT_VECTOR (double) - - TEST_REDUC_PLUS (int8_t) - TEST_REDUC_PLUS (int16_t) - TEST_REDUC_PLUS (int32_t) - TEST_REDUC_PLUS (int64_t) - TEST_REDUC_PLUS (uint8_t) - TEST_REDUC_PLUS (uint16_t) - TEST_REDUC_PLUS (uint32_t) - TEST_REDUC_PLUS (uint64_t) - TEST_REDUC_PLUS (float) - TEST_REDUC_PLUS (double) - - SUM_INT_RESULT (result); - SUM_FLOAT_RESULT (resultF); - - TEST_REDUC_MIN (int8_t) - TEST_REDUC_MIN (int16_t) - TEST_REDUC_MIN (int32_t) - TEST_REDUC_MIN (int64_t) - TEST_REDUC_MIN (uint8_t) - TEST_REDUC_MIN (uint16_t) - TEST_REDUC_MIN (uint32_t) - TEST_REDUC_MIN (uint64_t) - TEST_REDUC_MIN (float) - TEST_REDUC_MIN (double) - - TEST_REDUC_MAX (int8_t) - TEST_REDUC_MAX (int16_t) - TEST_REDUC_MAX (int32_t) - TEST_REDUC_MAX (int64_t) - TEST_REDUC_MAX (uint8_t) - TEST_REDUC_MAX (uint16_t) - TEST_REDUC_MAX (uint32_t) - TEST_REDUC_MAX (uint64_t) - TEST_REDUC_MAX (float) - TEST_REDUC_MAX (double) - - TEST_REDUC_AND (int8_t) - TEST_REDUC_AND (int16_t) - TEST_REDUC_AND (int32_t) - TEST_REDUC_AND (int64_t) - TEST_REDUC_AND (uint8_t) - TEST_REDUC_AND (uint16_t) - TEST_REDUC_AND (uint32_t) - TEST_REDUC_AND (uint64_t) - - TEST_REDUC_IOR (int8_t) - TEST_REDUC_IOR (int16_t) - TEST_REDUC_IOR (int32_t) - TEST_REDUC_IOR (int64_t) - TEST_REDUC_IOR (uint8_t) - TEST_REDUC_IOR (uint16_t) - TEST_REDUC_IOR (uint32_t) - TEST_REDUC_IOR (uint64_t) - - TEST_REDUC_XOR (int8_t) - TEST_REDUC_XOR (int16_t) - TEST_REDUC_XOR (int32_t) - TEST_REDUC_XOR (int64_t) - TEST_REDUC_XOR (uint8_t) - TEST_REDUC_XOR (uint16_t) - TEST_REDUC_XOR (uint32_t) - TEST_REDUC_XOR (uint64_t) - - if (result != 26880) - { - fprintf (stderr, "result = %d\n", result); - abort (); - } - - if (resultF != double (5760)) - { - fprintf (stderr, "resultF = %1.16lf\n", resultF); - abort (); - } - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.c new file mode 100644 index 00000000000..041db66c8cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.c @@ -0,0 +1,79 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include "sve_reduc_2.c" + +#define NROWS 53 + +/* -ffast-math fuzz for PLUS. */ +#define CMP__Float16(X, Y) ((X) >= (Y) * 0.875 && (X) <= (Y) * 1.125) +#define CMP_float(X, Y) ((X) == (Y)) +#define CMP_double(X, Y) ((X) == (Y)) +#define CMP_int8_t(X, Y) ((X) == (Y)) +#define CMP_int16_t(X, Y) ((X) == (Y)) +#define CMP_int32_t(X, Y) ((X) == (Y)) +#define CMP_int64_t(X, Y) ((X) == (Y)) +#define CMP_uint8_t(X, Y) ((X) == (Y)) +#define CMP_uint16_t(X, Y) ((X) == (Y)) +#define CMP_uint32_t(X, Y) ((X) == (Y)) +#define CMP_uint64_t(X, Y) ((X) == (Y)) + +#define INIT_MATRIX(TYPE) \ + TYPE mat[NROWS][NUM_ELEMS (TYPE)]; \ + TYPE r[NROWS]; \ + for (int i = 0; i < NROWS; i++) \ + for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ + { \ + mat[i][j] = i + (j * 2) * (j & 1 ? 1 : -1); \ + asm volatile ("" ::: "memory"); \ + } + +#define TEST_REDUC_PLUS(TYPE) \ + { \ + INIT_MATRIX (TYPE); \ + reduc_plus_##TYPE (mat, r, NROWS); \ + for (int i = 0; i < NROWS; i++) \ + { \ + volatile TYPE r2 = 0; \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + r2 += mat[i][j]; \ + if (!CMP_##TYPE (r[i], r2)) \ + __builtin_abort (); \ + } \ + } + +#define TEST_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ + { \ + INIT_MATRIX (TYPE); \ + reduc_##NAME##_##TYPE (mat, r, NROWS); \ + for (int i = 0; i < NROWS; i++) \ + { \ + volatile TYPE r2 = mat[i][0]; \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + r2 = mat[i][j] CMP_OP r2 ? mat[i][j] : r2; \ + if (r[i] != r2) \ + __builtin_abort (); \ + } \ + } + +#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ + { \ + INIT_MATRIX (TYPE); \ + reduc_##NAME##_##TYPE (mat, r, NROWS); \ + for (int i = 0; i < NROWS; i++) \ + { \ + volatile TYPE r2 = mat[i][0]; \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + r2 BIT_OP mat[i][j]; \ + if (r[i] != r2) \ + __builtin_abort (); \ + } \ + } + +int main () +{ + TEST_PLUS (TEST_REDUC_PLUS) + TEST_MAXMIN (TEST_REDUC_MAXMIN) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_3.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_3.c index 9e997adedca..7daf3ae130e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_3.c @@ -1,18 +1,52 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ -double -f (double *restrict a, double *restrict b, int *lookup) -{ - double res = 0.0; - for (int i = 0; i < 512; ++i) - res += a[lookup[i]] * b[i]; - return res; +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) + +#define REDUC_PTR(DSTTYPE, SRCTYPE) \ +void reduc_ptr_##DSTTYPE##_##SRCTYPE (DSTTYPE *restrict sum, \ + SRCTYPE *restrict array, \ + int count) \ +{ \ + *sum = 0; \ + for (int i = 0; i < count; ++i) \ + *sum += array[i]; \ } -/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+.d, p[0-7]/m, } 2 } } */ -/* Check that the vector instructions are the only instructions. */ -/* { dg-final { scan-assembler-times {\tfmla\t} 2 } } */ -/* { dg-final { scan-assembler-not {\tfadd\t} } } */ -/* { dg-final { scan-assembler-times {\tfaddv\td0,} 1 } } */ -/* { dg-final { scan-assembler-not {\tsel\t} } } */ +REDUC_PTR (int8_t, int8_t) +REDUC_PTR (int16_t, int16_t) + +REDUC_PTR (int32_t, int32_t) +REDUC_PTR (int64_t, int64_t) + +REDUC_PTR (_Float16, _Float16) +REDUC_PTR (float, float) +REDUC_PTR (double, double) + +/* Widening reductions. */ +REDUC_PTR (int32_t, int8_t) +REDUC_PTR (int32_t, int16_t) + +REDUC_PTR (int64_t, int8_t) +REDUC_PTR (int64_t, int16_t) +REDUC_PTR (int64_t, int32_t) + +REDUC_PTR (float, _Float16) +REDUC_PTR (double, float) + +/* Float<>Int conversions */ +REDUC_PTR (_Float16, int16_t) +REDUC_PTR (float, int32_t) +REDUC_PTR (double, int64_t) + +REDUC_PTR (int16_t, _Float16) +REDUC_PTR (int32_t, float) +REDUC_PTR (int64_t, double) + +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c index 2ba09b14851..9e997adedca 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c @@ -1,47 +1,18 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-inline -march=armv8-a+sve" } */ - -#include <stdint.h> - -#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) - -#define REDUC_PTR(DSTTYPE, SRCTYPE) \ -void reduc_ptr_##DSTTYPE##_##SRCTYPE (DSTTYPE *restrict sum, \ - SRCTYPE *restrict array, \ - int count) \ -{ \ - *sum = 0; \ - for (int i = 0; i < count; ++i) \ - *sum += array[i]; \ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +double +f (double *restrict a, double *restrict b, int *lookup) +{ + double res = 0.0; + for (int i = 0; i < 512; ++i) + res += a[lookup[i]] * b[i]; + return res; } -REDUC_PTR (int8_t, int8_t) -REDUC_PTR (int16_t, int16_t) - -REDUC_PTR (int32_t, int32_t) -REDUC_PTR (int64_t, int64_t) - -REDUC_PTR (float, float) -REDUC_PTR (double, double) - -/* Widening reductions. */ -REDUC_PTR (int32_t, int8_t) -REDUC_PTR (int32_t, int16_t) - -REDUC_PTR (int64_t, int8_t) -REDUC_PTR (int64_t, int16_t) -REDUC_PTR (int64_t, int32_t) - -REDUC_PTR (double, float) - -/* Float<>Int conversions */ -REDUC_PTR (float, int32_t) -REDUC_PTR (double, int64_t) - -REDUC_PTR (int32_t, float) -REDUC_PTR (int64_t, double) - -/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+.d, p[0-7]/m, } 2 } } */ +/* Check that the vector instructions are the only instructions. */ +/* { dg-final { scan-assembler-times {\tfmla\t} 2 } } */ +/* { dg-final { scan-assembler-not {\tfadd\t} } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td0,} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c index 2b8c6e523ca..9307200fb05 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c @@ -1,7 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int8_t v32qi __attribute__((vector_size (32))); #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c index aaa08dc03e2..fb238373c4e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c @@ -1,7 +1,10 @@ /* { dg-do assemble } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef unsigned short v16hi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef uint16_t v16hi __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) @@ -21,11 +24,13 @@ typedef unsigned short v16hi __attribute__((vector_size (32))); #define TEST_ALL(T) \ T (v16hi, 16, 2) \ - T (v16hi, 16, 4) + T (v16hi, 16, 4) \ + T (v16hf, 16, 2) \ + T (v16hf, 16, 4) TEST_ALL (PERMUTE) /* { dg-final { scan-assembler-not {\ttbl\t} } } */ -/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 1 } } */ -/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c index ac7ef0ef267..4834e2c2b01 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c @@ -1,7 +1,9 @@ /* { dg-do assemble } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef unsigned int v8si __attribute__((vector_size (32))); +#include <stdint.h> + +typedef uint32_t v8si __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve_shift_1.c index 24aa47488ef..b19cd7a3161 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_shift_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_shift_1.c @@ -1,77 +1,84 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ -#define DO_REG_OPS(TYPE) \ -void ashiftr_##TYPE (signed TYPE* dst, signed TYPE src, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] >> src; \ -} \ -void lshiftr_##TYPE (unsigned TYPE* dst, unsigned TYPE src, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] >> src; \ -} \ -void lshiftl_##TYPE (unsigned TYPE* dst, unsigned TYPE src, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] << src; \ -} \ -void vashiftr_##TYPE (signed TYPE* dst, signed TYPE* src, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] >> src[i]; \ -} \ -void vlshiftr_##TYPE (unsigned TYPE* dst, unsigned TYPE* src, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] >> src[i]; \ -} \ -void vlshiftl_##TYPE (unsigned TYPE* dst, unsigned TYPE* src, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] << src[i]; \ +#include <stdint.h> + +#define DO_REG_OPS(TYPE) \ +void ashiftr_##TYPE (TYPE *dst, TYPE src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src; \ +} \ +void lshiftr_##TYPE (u##TYPE *dst, u##TYPE src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src; \ +} \ +void lshiftl_##TYPE (u##TYPE *dst, u##TYPE src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] << src; \ +} \ +void vashiftr_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src[i]; \ +} \ +void vlshiftr_##TYPE (u##TYPE *dst, u##TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src[i]; \ +} \ +void vlshiftl_##TYPE (u##TYPE *dst, u##TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] << src[i]; \ } -#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ -void vashiftr_imm_##NAME##_##TYPE (signed TYPE* dst, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] >> VALUE; \ -} \ -void vlshiftr_imm_##NAME##_##TYPE (unsigned TYPE* dst, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] >> VALUE; \ -} \ -void vlshiftl_imm_##NAME##_##TYPE (unsigned TYPE* dst, int count) \ -{ \ - for (int i = 0; i < count; ++i) \ - dst[i] = dst[i] << VALUE; \ +#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ +void vashiftr_imm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> VALUE; \ +} \ +void vlshiftr_imm_##NAME##_##TYPE (u##TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> VALUE; \ +} \ +void vlshiftl_imm_##NAME##_##TYPE (u##TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] << VALUE; \ } -DO_REG_OPS (int); +DO_REG_OPS (int32_t); +DO_REG_OPS (int64_t); -DO_IMMEDIATE_OPS (0, char, 0); -DO_IMMEDIATE_OPS (5, char, 5); -DO_IMMEDIATE_OPS (7, char, 7); +DO_IMMEDIATE_OPS (0, int8_t, 0); +DO_IMMEDIATE_OPS (5, int8_t, 5); +DO_IMMEDIATE_OPS (7, int8_t, 7); -DO_IMMEDIATE_OPS (0, short, 0); -DO_IMMEDIATE_OPS (5, short, 5); -DO_IMMEDIATE_OPS (15, short, 15); +DO_IMMEDIATE_OPS (0, int16_t, 0); +DO_IMMEDIATE_OPS (5, int16_t, 5); +DO_IMMEDIATE_OPS (15, int16_t, 15); -DO_IMMEDIATE_OPS (0, int, 0); -DO_IMMEDIATE_OPS (5, int, 5); -DO_IMMEDIATE_OPS (31, int, 31); +DO_IMMEDIATE_OPS (0, int32_t, 0); +DO_IMMEDIATE_OPS (5, int32_t, 5); +DO_IMMEDIATE_OPS (31, int32_t, 31); -DO_IMMEDIATE_OPS (0, long, 0); -DO_IMMEDIATE_OPS (5, long, 5); -DO_IMMEDIATE_OPS (63, long, 63); +DO_IMMEDIATE_OPS (0, int64_t, 0); +DO_IMMEDIATE_OPS (5, int64_t, 5); +DO_IMMEDIATE_OPS (63, int64_t, 63); /* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ /* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_single_1.c index e50d7064858..f7aeed06907 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_1.c @@ -5,25 +5,28 @@ #define N 32 #endif -#define TEST_LOOP(NAME, TYPE, VALUE) \ +#include <stdint.h> + +#define TEST_LOOP(TYPE, VALUE) \ void \ - NAME (TYPE *data) \ + test_##TYPE (TYPE *data) \ { \ _Pragma ("omp simd") \ for (int i = 0; i < N / sizeof (TYPE); ++i) \ data[i] = VALUE; \ } -TEST_LOOP (uc, unsigned char, 1) -TEST_LOOP (sc, signed char, 2) -TEST_LOOP (us, unsigned short, 3) -TEST_LOOP (ss, signed short, 4) -TEST_LOOP (ui, unsigned int, 5) -TEST_LOOP (si, signed int, 6) -TEST_LOOP (ul, unsigned long, 7) -TEST_LOOP (sl, signed long, 8) -TEST_LOOP (f, float, 1.0f) -TEST_LOOP (d, double, 2.0) +TEST_LOOP (uint8_t, 1) +TEST_LOOP (int8_t, 2) +TEST_LOOP (uint16_t, 3) +TEST_LOOP (int16_t, 4) +TEST_LOOP (uint32_t, 5) +TEST_LOOP (int32_t, 6) +TEST_LOOP (uint64_t, 7) +TEST_LOOP (int64_t, 8) +TEST_LOOP (_Float16, 1.0f) +TEST_LOOP (float, 2.0f) +TEST_LOOP (double, 3.0) /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */ @@ -33,16 +36,17 @@ TEST_LOOP (d, double, 2.0) /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl8\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl4\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_2.c b/gcc/testsuite/gcc.target/aarch64/sve_single_2.c index e167782323b..7daea6262d6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_single_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_2.c @@ -12,16 +12,17 @@ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl16\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl8\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_3.c b/gcc/testsuite/gcc.target/aarch64/sve_single_3.c index 8967586bf50..e779d6c50d9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_single_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_3.c @@ -12,16 +12,17 @@ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl32\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl16\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_4.c b/gcc/testsuite/gcc.target/aarch64/sve_single_4.c index 99e2284b164..7c8b3015551 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_single_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_4.c @@ -12,16 +12,17 @@ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl64\n} 3 } } */ /* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl32\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c index 3fa4f187fa4..3e7367cd9fa 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c @@ -1,53 +1,55 @@ /* { dg-do assemble } */ /* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ -typedef long v4di __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef char v32qi __attribute__((vector_size(32))); +#include <stdint.h> -void sve_store_64_z_lsl (unsigned long *a, unsigned long i) +typedef int64_t v4di __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int8_t v32qi __attribute__((vector_size(32))); + +void sve_store_64_z_lsl (uint64_t *a, unsigned long i) { asm volatile ("" : "=w" (*(v4di *) &a[i])); } -void sve_store_64_s_lsl (signed long *a, signed long i) +void sve_store_64_s_lsl (int64_t *a, signed long i) { asm volatile ("" : "=w" (*(v4di *) &a[i])); } -void sve_store_32_z_lsl (unsigned int *a, unsigned long i) +void sve_store_32_z_lsl (uint32_t *a, unsigned long i) { asm volatile ("" : "=w" (*(v8si *) &a[i])); } -void sve_store_32_s_lsl (signed int *a, signed long i) +void sve_store_32_s_lsl (int32_t *a, signed long i) { asm volatile ("" : "=w" (*(v8si *) &a[i])); } -void sve_store_16_z_lsl (unsigned short *a, unsigned long i) +void sve_store_16_z_lsl (uint16_t *a, unsigned long i) { asm volatile ("" : "=w" (*(v16hi *) &a[i])); } -void sve_store_16_s_lsl (signed short *a, signed long i) +void sve_store_16_s_lsl (int16_t *a, signed long i) { asm volatile ("" : "=w" (*(v16hi *) &a[i])); } /* ??? The other argument order leads to a redundant move. */ -void sve_store_8_z (unsigned long i, unsigned char *a) +void sve_store_8_z (unsigned long i, uint8_t *a) { asm volatile ("" : "=w" (*(v32qi *) &a[i])); } -void sve_store_8_s (signed long i, signed char *a) +void sve_store_8_s (signed long i, int8_t *a) { asm volatile ("" : "=w" (*(v32qi *) &a[i])); } -/* { dg-final { scan-assembler-times {\tst1d\tz0.d, p[0-7], \[x0, x1, lsl 3\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz0.s, p[0-7], \[x0, x1, lsl 2\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz0.h, p[0-7], \[x0, x1, lsl 1\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz0.b, p[0-7], \[x1, x0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz0\.d, p[0-7], \[x0, x1, lsl 3\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz0\.s, p[0-7], \[x0, x1, lsl 2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz0\.h, p[0-7], \[x0, x1, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz0\.b, p[0-7], \[x1, x0\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_2.c deleted file mode 100644 index 586e9726396..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_2.c +++ /dev/null @@ -1,53 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ - -typedef long v4di __attribute__((vector_size(32))); -typedef int v8si __attribute__((vector_size(32))); -typedef short v16hi __attribute__((vector_size(32))); -typedef char v32qi __attribute__((vector_size(32))); - -void sve_store_64_z_lsl (unsigned long *a, unsigned long i) -{ - asm volatile ("" : "=w" (*(v4di *)&a[i])); -} - -void sve_store_64_s_lsl (signed long *a, signed long i) -{ - asm volatile ("" : "=w" (*(v4di *)&a[i])); -} - -void sve_store_32_z_lsl (unsigned int *a, unsigned long i) -{ - asm volatile ("" : "=w" (*(v8si *)&a[i])); -} - -void sve_store_32_s_lsl (signed int *a, signed long i) -{ - asm volatile ("" : "=w" (*(v8si *)&a[i])); -} - -void sve_store_16_z_lsl (unsigned short *a, unsigned long i) -{ - asm volatile ("" : "=w" (*(v16hi *)&a[i])); -} - -void sve_store_16_s_lsl (signed short *a, signed long i) -{ - asm volatile ("" : "=w" (*(v16hi *)&a[i])); -} - -/* ??? The other argument order leads to a redundant move. */ -void sve_store_8_z (unsigned long i, unsigned char *a) -{ - asm volatile ("" : "=w" (*(v32qi *)&a[i])); -} - -void sve_store_8_s (signed long i, signed char *a) -{ - asm volatile ("" : "=w" (*(v32qi *)&a[i])); -} - -/* { dg-final { scan-assembler-times "st1d\\tz0.d, p\[0-9\]+, \\\[x0, x1, lsl 3\\\]" 2 } } */ -/* { dg-final { scan-assembler-times "st1w\\tz0.s, p\[0-9\]+, \\\[x0, x1, lsl 2\\\]" 2 } } */ -/* { dg-final { scan-assembler-times "st1h\\tz0.h, p\[0-9\]+, \\\[x0, x1, lsl 1\\\]" 2 } } */ -/* { dg-final { scan-assembler-times "st1b\\tz0.b, p\[0-9\]+, \\\[x1, x0\\\]" 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_subr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_subr_1.c index de4dbe8c6cc..1d8dc76719d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_subr_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_subr_1.c @@ -1,28 +1,32 @@ /* { dg-do assemble } */ -/* { dg-options "-std=c99 -O3 -march=armv8-a+sve --save-temps" } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> #define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ -void vsubrarithimm_##NAME##_##TYPE (TYPE* dst, int count) \ +void vsubr_arithimm_##NAME##_##TYPE (TYPE *dst, int count) \ { \ for (int i = 0; i < count; ++i) \ dst[i] = VALUE - dst[i]; \ } #define DO_ARITH_OPS(TYPE) \ -DO_IMMEDIATE_OPS (0, TYPE, 0); \ -DO_IMMEDIATE_OPS (5, TYPE, 5); \ -DO_IMMEDIATE_OPS (255, TYPE, 255); \ -DO_IMMEDIATE_OPS (256, TYPE, 256); \ -DO_IMMEDIATE_OPS (257, TYPE, 257); \ -DO_IMMEDIATE_OPS (65280, TYPE, 65280); \ -DO_IMMEDIATE_OPS (65281, TYPE, 65281); \ -DO_IMMEDIATE_OPS (-1, TYPE, minus1); - -DO_ARITH_OPS (char) -DO_ARITH_OPS (int) -DO_ARITH_OPS (long) + DO_IMMEDIATE_OPS (0, TYPE, 0); \ + DO_IMMEDIATE_OPS (5, TYPE, 5); \ + DO_IMMEDIATE_OPS (255, TYPE, 255); \ + DO_IMMEDIATE_OPS (256, TYPE, 256); \ + DO_IMMEDIATE_OPS (257, TYPE, 257); \ + DO_IMMEDIATE_OPS (65280, TYPE, 65280); \ + DO_IMMEDIATE_OPS (65281, TYPE, 65281); \ + DO_IMMEDIATE_OPS (-1, TYPE, minus1); + +DO_ARITH_OPS (int8_t) +DO_ARITH_OPS (int16_t) +DO_ARITH_OPS (int32_t) +DO_ARITH_OPS (int64_t) /* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ @@ -35,6 +39,14 @@ DO_ARITH_OPS (long) /* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */ /* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */ + /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #256\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c index c82f30e9578..0c7b887d232 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c @@ -5,12 +5,15 @@ #define BIAS 0 #endif -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_2(X, Y) X, Y + X #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) @@ -37,7 +40,8 @@ typedef float v8sf __attribute__((vector_size (32))); T (v16hi, 16) \ T (v32qi, 32) \ T (v4df, 4) \ - T (v8sf, 8) + T (v8sf, 8) \ + T (v16hf, 16) TEST_ALL (PERMUTE) @@ -45,5 +49,5 @@ TEST_ALL (PERMUTE) /* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ /* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ -/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ /* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_trn2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_trn2_1.c index a4b3ea40a21..6654781bbd5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_trn2_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_trn2_1.c @@ -8,5 +8,5 @@ /* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ /* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ -/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ /* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1.c index de010318fd1..c415c4bf5d1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1.c @@ -1,7 +1,10 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ -void unpack_double_int_plus8 (double *d, signed int *s, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +unpack_double_int_plus8 (double *d, int32_t *s, int size) { for (int i = 0; i < size; i++) d[i] = s[i] + 8; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1_run.c index 083f1b346d3..f8d9cc2b2ca 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1_run.c @@ -1,9 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_unpack_fcvt_signed_1.c" @@ -15,19 +11,18 @@ int __attribute__ ((optimize (1))) main (void) { double array_dest[ARRAY_SIZE]; - signed int array_source[ARRAY_SIZE]; + int32_t array_source[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) - array_source[i] = VAL1; + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } unpack_double_int_plus8 (array_dest, array_source, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) - if (array_dest[i] != (float) (VAL1 + 8)) - { - fprintf (stderr,"%d: %f != %f\n", i, array_dest[i], - (float) (VAL1 + 8)); - exit (1); - } + if (array_dest[i] != (double) (VAL1 + 8)) + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1.c index cc1b5e576f4..fb9fe810cf9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1.c @@ -1,7 +1,10 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -void unpack_double_int_plus9 (double *d, unsigned int *s, int size) +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +unpack_double_int_plus9 (double *d, uint32_t *s, int size) { for (int i = 0; i < size; i++) d[i] = (double) (s[i] + 9); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1_run.c index 1c31e18c410..93788a342ce 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1_run.c @@ -1,9 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ - -#include <string.h> -#include <stdio.h> -#include <stdlib.h> +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include "sve_unpack_fcvt_unsigned_1.c" @@ -15,19 +11,18 @@ int __attribute__ ((optimize (1))) main (void) { double array_dest[ARRAY_SIZE]; - unsigned int array_source[ARRAY_SIZE]; + uint32_t array_source[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) - array_source[i] = VAL1; + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } unpack_double_int_plus9 (array_dest, array_source, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) if (array_dest[i] != (double) (VAL1 + 9)) - { - fprintf (stderr,"%d: %lf != %lf\n", i, array_dest[i], - (double) (VAL1 + 9)); - exit (1); - } + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1.c index 86bd60918e2..73c7a815e36 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1.c @@ -1,7 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -void unpack_float_plus_7point9 (double *d, float *s, int size) +void __attribute__ ((noinline, noclone)) +unpack_float_plus_7point9 (double *d, float *s, int size) { for (int i = 0; i < size; i++) d[i] = s[i] + 7.9; diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1_run.c index 4e280dd10f9..2a645b33d4b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1_run.c @@ -1,10 +1,6 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ -#include <string.h> -#include <stdio.h> -#include <stdlib.h> - #include "sve_unpack_float_1.c" #define ARRAY_SIZE 199 @@ -18,16 +14,15 @@ main (void) float array_source[ARRAY_SIZE]; for (int i = 0; i < ARRAY_SIZE; i++) - array_source[i] = VAL1; + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } unpack_float_plus_7point9 (array_dest, array_source, ARRAY_SIZE); for (int i = 0; i < ARRAY_SIZE; i++) if (array_dest[i] != (double) (VAL1 + 7.9)) - { - fprintf (stderr,"%d: %f != %f\n", i, array_dest[i], - (double) (VAL1 + 7.9)); - exit (1); - } + __builtin_abort (); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c index e4d0393b047..4d345cf81e9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c @@ -1,20 +1,25 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ -#define UNPACK(TYPED, TYPES, SIGND) \ -void unpack_##TYPED##_##TYPES##_##SIGND (SIGND TYPED *d, signed TYPES *s, \ - int size) \ -{ \ - for (int i = 0; i < size; i++) \ - d[i] = s[i] + 1; \ +#include <stdint.h> + +#define UNPACK(TYPED, TYPES) \ +void __attribute__ ((noinline, noclone)) \ +unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ +{ \ + for (int i = 0; i < size; i++) \ + d[i] = s[i] + 1; \ } -UNPACK (long, int, signed) -UNPACK (int, short, signed) -UNPACK (short, char, signed) -UNPACK (long, int, unsigned) -UNPACK (int, short, unsigned) -UNPACK (short, char, unsigned) +#define TEST_ALL(T) \ + T (int64_t, int32_t) \ + T (int32_t, int16_t) \ + T (int16_t, int8_t) \ + T (uint64_t, int32_t) \ + T (uint32_t, int16_t) \ + T (uint16_t, int8_t) + +TEST_ALL (UNPACK) /* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c index b63aa5b7d1d..d183408d124 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c @@ -1,46 +1,28 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ -#include <string.h> -#include <stdio.h> -#include <stdlib.h> - #include "sve_unpack_signed_1.c" #define ARRAY_SIZE 33 -#define RUN_AND_CHECK_LOOP(TYPED, TYPES, VALUED, VALUES) \ -{ \ - int value = 0; \ - TYPED arrayd[ARRAY_SIZE]; \ - TYPES arrays[ARRAY_SIZE]; \ - memset (arrayd, 67, ARRAY_SIZE * sizeof (TYPED)); \ - memset (arrays, VALUES, ARRAY_SIZE * sizeof (TYPES)); \ - unpack_##TYPED##_##TYPES##_signed (arrayd, arrays, ARRAY_SIZE); \ - for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != VALUED) \ - { \ - fprintf (stderr,"%d: %d != %d\n", i, arrayd[i], VALUED); \ - exit (1); \ - } \ - memset (arrayd, 74, ARRAY_SIZE * sizeof (TYPED)); \ - unpack_##TYPED##_##TYPES##_unsigned (arrayd, arrays, ARRAY_SIZE); \ - for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != VALUED) \ - { \ - fprintf (stderr,"%d: %d != %d\n", i, arrayd[i], VALUED); \ - exit (1); \ - } \ -} +#define TEST_LOOP(TYPED, TYPES) \ + { \ + TYPED arrayd[ARRAY_SIZE]; \ + TYPES arrays[ARRAY_SIZE]; \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + { \ + arrays[i] = (i - 10) * 3; \ + asm volatile ("" ::: "memory"); \ + } \ + unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + __builtin_abort (); \ + } -int main (void) +int __attribute__ ((optimize (1))) +main (void) { - int total = 5; - RUN_AND_CHECK_LOOP (short, char, total+1, total); - total = (total << 8) + 5; - RUN_AND_CHECK_LOOP (int, short, total+1, total); - total = (total << 8) + 5; - total = (total << 8) + 5; - RUN_AND_CHECK_LOOP (long, int, total+1, total); + TEST_ALL (TEST_LOOP) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c index 94192d977f4..fa8de963264 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c @@ -1,20 +1,25 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ -#define UNPACK(TYPED, TYPES, SIGND) \ -void unpack_##TYPED##_##TYPES##_##SIGND (SIGND TYPED *d, unsigned TYPES *s, \ - int size) \ -{ \ - for (int i = 0; i < size; i++) \ - d[i] = s[i] + 1; \ +#include <stdint.h> + +#define UNPACK(TYPED, TYPES) \ +void __attribute__ ((noinline, noclone)) \ +unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ +{ \ + for (int i = 0; i < size; i++) \ + d[i] = s[i] + 1; \ } -UNPACK (long, int, signed) \ -UNPACK (int, short, signed) \ -UNPACK (short, char, signed) \ -UNPACK (long, int, unsigned) \ -UNPACK (int, short, unsigned) \ -UNPACK (short, char, unsigned) +#define TEST_ALL(T) \ + T (int64_t, uint32_t) \ + T (int32_t, uint16_t) \ + T (int16_t, uint8_t) \ + T (uint64_t, uint32_t) \ + T (uint32_t, uint16_t) \ + T (uint16_t, uint8_t) + +TEST_ALL (UNPACK) /* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c index 33f5f939c84..3fa66220f17 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c @@ -1,46 +1,28 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ -#include <string.h> -#include <stdio.h> -#include <stdlib.h> - #include "sve_unpack_unsigned_1.c" #define ARRAY_SIZE 85 -#define RUN_AND_CHECK_LOOP(TYPED, TYPES, VALUED, VALUES) \ -{ \ - int value = 0; \ - TYPED arrayd[ARRAY_SIZE]; \ - TYPES arrays[ARRAY_SIZE]; \ - memset (arrayd, 67, ARRAY_SIZE * sizeof (TYPED)); \ - memset (arrays, VALUES, ARRAY_SIZE * sizeof (TYPES)); \ - unpack_##TYPED##_##TYPES##_signed (arrayd, arrays, ARRAY_SIZE); \ - for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != VALUED) \ - { \ - fprintf (stderr,"%d: %d != %d\n", i, arrayd[i], VALUED); \ - exit (1); \ - } \ - memset (arrayd, 74, ARRAY_SIZE * sizeof (TYPED)); \ - unpack_##TYPED##_##TYPES##_unsigned (arrayd, arrays, ARRAY_SIZE); \ - for (int i = 0; i < ARRAY_SIZE; i++) \ - if (arrayd[i] != VALUED) \ - { \ - fprintf (stderr,"%d: %d != %d\n", i, arrayd[i], VALUED); \ - exit (1); \ - } \ -} +#define TEST_LOOP(TYPED, TYPES) \ + { \ + TYPED arrayd[ARRAY_SIZE]; \ + TYPES arrays[ARRAY_SIZE]; \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + { \ + arrays[i] = (i - 10) * 3; \ + asm volatile ("" ::: "memory"); \ + } \ + unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + __builtin_abort (); \ + } -int main (void) +int __attribute__ ((optimize (1))) +main (void) { - int total = 5; - RUN_AND_CHECK_LOOP (short, char, total + 1, total); - total = (total << 8) + 5; - RUN_AND_CHECK_LOOP (int, short, total + 1, total); - total = (total << 8) + 5; - total = (total << 8) + 5; - RUN_AND_CHECK_LOOP (long, int, total + 1, total); + TEST_ALL (TEST_LOOP) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c index 22fc84f066c..aaa4fdccbf0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c @@ -1,12 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define UZP1(TYPE, MASK) \ TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \ @@ -25,6 +28,8 @@ UZP1 (v32qi, ((v32qi) { 0, 2, 4, 6, 8, 10, 12, 14, 48, 50, 52, 54, 56, 58, 60, 62 })); UZP1 (v4df, ((v4di) { 0, 2, 4, 6 })); UZP1 (v8sf, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 })); +UZP1 (v16hf, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30 })); /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ @@ -33,5 +38,5 @@ UZP1 (v8sf, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 })); /* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c index 338670c03af..d35dad0ffca 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c @@ -2,7 +2,6 @@ /* { dg-options "-O -march=armv8-a+sve" } */ #include "sve_uzp1_1.c" -extern void abort (void); #define TEST_UZP1(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ { \ @@ -12,7 +11,7 @@ extern void abort (void); TYPE dest; \ dest = uzp1_##TYPE (values1, values2); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ } int main (void) @@ -53,5 +52,12 @@ int main (void) ((v8sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }), ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); + TEST_UZP1 (v16hf, + ((v16hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, + 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c index 39c8ff43368..1bb84d80eb0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c @@ -1,12 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define UZP2(TYPE, MASK) \ TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \ @@ -24,6 +27,8 @@ UZP2 (v32qi, ((v32qi) { 1, 3, 5, 7, 9, 11, 13, 15, 49, 51, 53, 55, 57, 59, 61, 63 })); UZP2 (v4df, ((v4di) { 1, 3, 5, 7 })); UZP2 (v8sf, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 })); +UZP2 (v16hf, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31 })); /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ /* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ @@ -32,5 +37,5 @@ UZP2 (v8sf, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 })); /* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ /* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c index b9b8cccfafe..d7a241c1258 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c @@ -2,7 +2,6 @@ /* { dg-options "-O -march=armv8-a+sve" } */ #include "sve_uzp2_1.c" -extern void abort (void); #define TEST_UZP2(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ { \ @@ -12,7 +11,7 @@ extern void abort (void); TYPE dest; \ dest = uzp2_##TYPE (values1, values2); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ } int main (void) @@ -53,5 +52,12 @@ int main (void) ((v8sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }), ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); + TEST_UZP2 (v16hf, + ((v16hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, + 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C index 48ad92d0ab7..9be09546c80 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C @@ -1,5 +1,5 @@ -/* { dg-do compile { target { ! *-*-* } } } */ -/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ #include <stdint.h> @@ -13,231 +13,233 @@ typedef uint16_t v16hu __attribute__((vector_size(32))); typedef uint32_t v8su __attribute__((vector_size(32))); typedef uint64_t v4du __attribute__((vector_size(32))); -#define NUM_ELEMS(TYPE) (sizeof (r_##TYPE) / sizeof (r_##TYPE[0])) - -#define DEF_VCOND(TYPE,COND,SUFFIX) \ -TYPE vcond_##TYPE##SUFFIX (TYPE x, TYPE y, TYPE a, TYPE b) \ +#define DEF_VCOND_VAR(TYPE, COND, SUFFIX) \ +TYPE vcond_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a, TYPE b) \ { \ TYPE r; \ r = a COND b ? x : y; \ return r; \ } -#define DEF_VCOND_IMM(TYPE,COND,IMM,SUFFIX) \ -TYPE vcond_imm_##TYPE##SUFFIX (TYPE x, TYPE y, TYPE a) \ -{ \ - TYPE r; \ - r = a COND IMM ? x : y; \ - return r; \ +#define DEF_VCOND_IMM(TYPE, COND, IMM, SUFFIX) \ +TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \ +{ \ + TYPE r; \ + r = a COND IMM ? x : y; \ + return r; \ } -#define DEF_VCOND_SIGNED_ALL(COND,SUFFIX) \ -DEF_VCOND (v32qi,COND,SUFFIX) \ -DEF_VCOND (v16hi,COND,SUFFIX) \ -DEF_VCOND (v8si,COND,SUFFIX) \ -DEF_VCOND (v4di,COND,SUFFIX) - -#define DEF_VCOND_UNSIGNED_ALL(COND,SUFFIX) \ -DEF_VCOND (v32qu,COND,SUFFIX) \ -DEF_VCOND (v16hu,COND,SUFFIX) \ -DEF_VCOND (v8su,COND,SUFFIX) \ -DEF_VCOND (v4du,COND,SUFFIX) - -#define DEF_VCOND_ALL(COND,SUFFIX) \ -DEF_VCOND_SIGNED_ALL (COND,SUFFIX) \ -DEF_VCOND_UNSIGNED_ALL (COND,SUFFIX) - -#define DEF_VCOND_IMM_SIGNED_ALL(COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v32qi,COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v16hi,COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v8si,COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v4di,COND,IMM,SUFFIX) - -#define DEF_VCOND_IMM_UNSIGNED_ALL(COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v32qu,COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v16hu,COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v8su,COND,IMM,SUFFIX) \ -DEF_VCOND_IMM (v4du,COND,IMM,SUFFIX) - -#define DEF_VCOND_IMM_ALL(COND,IMM,SUFFIX) \ -DEF_VCOND_IMM_SIGNED_ALL (COND,IMM,SUFFIX) \ -DEF_VCOND_IMM_UNSIGNED_ALL (COND,IMM,SUFFIX) - -DEF_VCOND_ALL (>, _gt) -DEF_VCOND_ALL (<, _lt) -DEF_VCOND_ALL (>=, _ge) -DEF_VCOND_ALL (<=, _le) -DEF_VCOND_ALL (==, _eq) -DEF_VCOND_ALL (!=, _ne) - -/* == Expect immediates to make it into the encoding == */ - -DEF_VCOND_IMM_ALL (>, 5, _gt) -DEF_VCOND_IMM_ALL (<, 5, _lt) -DEF_VCOND_IMM_ALL (>=, 5, _ge) -DEF_VCOND_IMM_ALL (<=, 5, _le) -DEF_VCOND_IMM_ALL (==, 5, _eq) -DEF_VCOND_IMM_ALL (!=, 5, _ne) - -DEF_VCOND_IMM_SIGNED_ALL (>, 15, _gt2) -DEF_VCOND_IMM_SIGNED_ALL (<, 15, _lt2) -DEF_VCOND_IMM_SIGNED_ALL (>=, 15, _ge2) -DEF_VCOND_IMM_SIGNED_ALL (<=, 15, _le2) -DEF_VCOND_IMM_SIGNED_ALL (==, 15, _eq2) -DEF_VCOND_IMM_SIGNED_ALL (!=, 15, _ne2) - -DEF_VCOND_IMM_SIGNED_ALL (>, -16, _gt3) -DEF_VCOND_IMM_SIGNED_ALL (<, -16, _lt3) -DEF_VCOND_IMM_SIGNED_ALL (>=, -16, _ge3) -DEF_VCOND_IMM_SIGNED_ALL (<=, -16, _le3) -DEF_VCOND_IMM_SIGNED_ALL (==, -16, _eq3) -DEF_VCOND_IMM_SIGNED_ALL (!=, -16, _ne3) - -DEF_VCOND_IMM_UNSIGNED_ALL (>, 0, _gt4) -/* Testing if an unsigned value >= 0 or < 0 is pointless as it will - get folded away by the compiler. */ -DEF_VCOND_IMM_UNSIGNED_ALL (<=, 0, _le4) - -DEF_VCOND_IMM_UNSIGNED_ALL (>, 31, _gt5) -DEF_VCOND_IMM_UNSIGNED_ALL (<, 31, _lt5) -DEF_VCOND_IMM_UNSIGNED_ALL (>=, 31, _ge5) -DEF_VCOND_IMM_UNSIGNED_ALL (<=, 31, _le5) - -/* Expect immediates to NOT make it into the encoding, and instead be - forced into a register. == */ -DEF_VCOND_IMM_ALL (>, 32, _gt6) -DEF_VCOND_IMM_ALL (<, 32, _lt6) -DEF_VCOND_IMM_ALL (>=, 32, _ge6) -DEF_VCOND_IMM_ALL (<=, 32, _le6) -DEF_VCOND_IMM_ALL (==, 32, _eq6) -DEF_VCOND_IMM_ALL (!=, 32, _ne6) - -/* { dg-final { scan-assembler {\tsel\tz[0-9]+.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} } } */ -/* { dg-final { scan-assembler {\tsel\tz[0-9]+.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} } } */ -/* { dg-final { scan-assembler {\tsel\tz[0-9]+.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} } } */ -/* { dg-final { scan-assembler {\tsel\tz[0-9]+.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} } } */ - -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ - -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ - -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ - -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ - -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ - -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ - - - -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ - -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ - -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ - -/* { dg-final { scan-assembler {\tcmple\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmple\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmple\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmple\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ - -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ - -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ - -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpgt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ - -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmplt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ - -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpge\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ - -/* { dg-final { scan-assembler {\tcmple\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmple\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmple\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmple\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ - -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpeq\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ - -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ -/* { dg-final { scan-assembler {\tcmpne\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ - - - -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */ - -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */ -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */ -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */ -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */ - - -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmphi\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ - -/* { dg-final { scan-assembler {\tcmplo\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmplo\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmplo\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmplo\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ - -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmphs\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ - -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ -/* { dg-final { scan-assembler {\tcmpls\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ + T (v32qi, COND, SUFFIX) \ + T (v16hi, COND, SUFFIX) \ + T (v8si, COND, SUFFIX) \ + T (v4di, COND, SUFFIX) + +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ + T (v32qu, COND, SUFFIX) \ + T (v16hu, COND, SUFFIX) \ + T (v8su, COND, SUFFIX) \ + T (v4du, COND, SUFFIX) + +#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ + TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ + TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) + +#define TEST_VAR_ALL(T) \ + TEST_COND_VAR_ALL (T, >, gt) \ + TEST_COND_VAR_ALL (T, <, lt) \ + TEST_COND_VAR_ALL (T, >=, ge) \ + TEST_COND_VAR_ALL (T, <=, le) \ + TEST_COND_VAR_ALL (T, ==, eq) \ + TEST_COND_VAR_ALL (T, !=, ne) + +#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (v32qi, COND, IMM, SUFFIX) \ + T (v16hi, COND, IMM, SUFFIX) \ + T (v8si, COND, IMM, SUFFIX) \ + T (v4di, COND, IMM, SUFFIX) + +#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (v32qu, COND, IMM, SUFFIX) \ + T (v16hu, COND, IMM, SUFFIX) \ + T (v8su, COND, IMM, SUFFIX) \ + T (v4du, COND, IMM, SUFFIX) + +#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX) + +#define TEST_IMM_ALL(T) \ + /* Expect immediates to make it into the encoding. */ \ + TEST_COND_IMM_ALL (T, >, 5, gt) \ + TEST_COND_IMM_ALL (T, <, 5, lt) \ + TEST_COND_IMM_ALL (T, >=, 5, ge) \ + TEST_COND_IMM_ALL (T, <=, 5, le) \ + TEST_COND_IMM_ALL (T, ==, 5, eq) \ + TEST_COND_IMM_ALL (T, !=, 5, ne) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, 15, gt2) \ + TEST_COND_IMM_SIGNED_ALL (T, <, 15, lt2) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, 15, ge2) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, 15, le2) \ + TEST_COND_IMM_SIGNED_ALL (T, ==, 15, eq2) \ + TEST_COND_IMM_SIGNED_ALL (T, !=, 15, ne2) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, -16, gt3) \ + TEST_COND_IMM_SIGNED_ALL (T, <, -16, lt3) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, -16, ge3) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, -16, le3) \ + TEST_COND_IMM_SIGNED_ALL (T, ==, -16, eq3) \ + TEST_COND_IMM_SIGNED_ALL (T, !=, -16, ne3) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, gt4) \ + /* Testing if an unsigned value >= 0 or < 0 is pointless as it will \ + get folded away by the compiler. */ \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, le4) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 31, gt5) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <, 31, lt5) \ + TEST_COND_IMM_UNSIGNED_ALL (T, >=, 31, ge5) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 31, le5) \ + \ + /* Expect immediates to NOT make it into the encoding, and instead be \ + forced into a register. */ \ + TEST_COND_IMM_ALL (T, >, 32, gt6) \ + TEST_COND_IMM_ALL (T, <, 32, lt6) \ + TEST_COND_IMM_ALL (T, >=, 32, ge6) \ + TEST_COND_IMM_ALL (T, <=, 32, le6) \ + TEST_COND_IMM_ALL (T, ==, 32, eq6) \ + TEST_COND_IMM_ALL (T, !=, 32, ne6) + +TEST_VAR_ALL (DEF_VCOND_VAR) +TEST_IMM_ALL (DEF_VCOND_IMM) + +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + + + +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + + + +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */ + +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */ + + +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ + +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ + +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ + +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1_run.C index e2b1c62a667..42e09d94393 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1_run.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1_run.C @@ -1,100 +1,46 @@ -/* { dg-do run { target { ! *-*-* } } } */ -/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ #include "sve_vcond_1.C" -#include <stdlib.h> +#define NUM_ELEMS(X) (sizeof (X) / sizeof (X[0])) -#define TEST_VCOND(TYPE,COND,SUFFIX) \ +#define TEST_VCOND_VAR(TYPE, COND, SUFFIX) \ { \ - TYPE x = { 1 }, y = { 2 }, a = { 3 }, b = { 4 }; \ - r_##TYPE += vcond_##TYPE##SUFFIX (x, y, a, b); \ + TYPE x, y, a, b; \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + { \ + a[i] = i - 2; \ + b[i] = NUM_ELEMS (x) - 2 - i; \ + x[i] = i * 2; \ + y[i] = -i * 3; \ + } \ + TYPE r = vcond_##TYPE##_##SUFFIX (x, y, a, b); \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + if (r[i] != (a[i] COND b[i] ? x[i] : y[i])) \ + __builtin_abort (); \ } -#define TEST_VCOND_IMM(TYPE,COND,IMM,SUFFIX) \ +#define TEST_VCOND_IMM(TYPE, COND, IMM, SUFFIX) \ { \ - TYPE x = { 1 }, y = { 2 }, a = { 3 }; \ - r_##TYPE += vcond_imm_##TYPE##SUFFIX (x, y, a); \ + TYPE x, y, a; \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + { \ + a[i] = IMM - 2 + i; \ + x[i] = i * 2; \ + y[i] = -i * 3; \ + } \ + TYPE r = vcond_imm_##TYPE##_##SUFFIX (x, y, a); \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + if (r[i] != (a[i] COND IMM ? x[i] : y[i])) \ + __builtin_abort (); \ } -#define TEST_VCOND_SIGNED_ALL(COND, SUFFIX) \ -TEST_VCOND (v32qi, COND, SUFFIX) \ -TEST_VCOND (v16hi, COND, SUFFIX) \ -TEST_VCOND (v8si, COND, SUFFIX) \ -TEST_VCOND (v4di, COND, SUFFIX) - -#define TEST_VCOND_UNSIGNED_ALL(COND, SUFFIX) \ -TEST_VCOND (v32qu, COND, SUFFIX) \ -TEST_VCOND (v16hu, COND, SUFFIX) \ -TEST_VCOND (v8su, COND, SUFFIX) \ -TEST_VCOND (v4du, COND, SUFFIX) - -#define TEST_VCOND_ALL(COND, SUFFIX) \ -TEST_VCOND_SIGNED_ALL (COND, SUFFIX) \ -TEST_VCOND_UNSIGNED_ALL(COND, SUFFIX) - -#define TEST_VCOND_IMM_SIGNED_ALL(COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v32qi, COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v16hi, COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v8si, COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v4di, COND, IMM, SUFFIX) - -#define TEST_VCOND_IMM_UNSIGNED_ALL(COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v32qu, COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v16hu, COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v8su, COND, IMM, SUFFIX) \ -TEST_VCOND_IMM (v4du, COND, IMM, SUFFIX) - -#define TEST_VCOND_IMM_ALL(COND,IMM,SUFFIX) \ -TEST_VCOND_IMM_SIGNED_ALL (COND,IMM,SUFFIX) \ -TEST_VCOND_IMM_UNSIGNED_ALL (COND,IMM,SUFFIX) - -#define DEF_INIT_VECTOR(TYPE) \ - TYPE r_##TYPE; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ - r_##TYPE[i] = i * 3; - -#define SUM_VECTOR(VAL,TYPE) \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ - VAL += r_##TYPE[i]; - int main (int argc, char **argv) { - int result = 0; - DEF_INIT_VECTOR (v32qi) - DEF_INIT_VECTOR (v16hi) - DEF_INIT_VECTOR (v8si) - DEF_INIT_VECTOR (v4di) - DEF_INIT_VECTOR (v32qu) - DEF_INIT_VECTOR (v16hu) - DEF_INIT_VECTOR (v8su) - DEF_INIT_VECTOR (v4du) - - TEST_VCOND_ALL (>, _gt) - TEST_VCOND_ALL (<, _lt) - TEST_VCOND_ALL (>=, _ge) - TEST_VCOND_ALL (<=, _le) - TEST_VCOND_ALL (==, _eq) - TEST_VCOND_ALL (!=, _ne) - - TEST_VCOND_IMM_ALL (>, 5, _gt) - TEST_VCOND_IMM_ALL (<, 5, _lt) - TEST_VCOND_IMM_ALL (>=, 5, _ge) - TEST_VCOND_IMM_ALL (<=, 5, _le) - TEST_VCOND_IMM_ALL (==, 5, _eq) - TEST_VCOND_IMM_ALL (!=, 5, _ne) - - SUM_VECTOR (result, v32qi) - SUM_VECTOR (result, v16hi) - SUM_VECTOR (result, v8si) - SUM_VECTOR (result, v4di) - SUM_VECTOR (result, v32qu) - SUM_VECTOR (result, v16hu) - SUM_VECTOR (result, v8su) - SUM_VECTOR (result, v4du) - - if (result != 4044) - abort (); + TEST_VAR_ALL (TEST_VCOND_VAR) + TEST_IMM_ALL (TEST_VCOND_IMM) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.C deleted file mode 100644 index 80299d7e4b8..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.C +++ /dev/null @@ -1,310 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -march=armv8-a+sve -fno-inline -fno-ipa-icf" } */ - -#include <stdint.h> - -#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) - -#define DEF_VCOND(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ - void vcond_##CMP_TYPE##SUFFIX (DATA_TYPE *__restrict__ r, \ - DATA_TYPE *__restrict__ a, \ - DATA_TYPE *__restrict__ b, \ - CMP_TYPE *__restrict__ x, \ - CMP_TYPE *__restrict__ y, \ - int n) \ - { \ - for (int i = 0; i < n; i++) \ - { \ - CMP_TYPE yval = y[i], xval = x[i]; \ - DATA_TYPE aval = a[i], bval = b[i]; \ - r[i] = xval COND yval ? aval : bval; \ - } \ - } - -#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \ - void vcond_imm_##CMP_TYPE##SUFFIX (DATA_TYPE *__restrict__ r, \ - DATA_TYPE *__restrict__ a, \ - DATA_TYPE *__restrict__ b, \ - CMP_TYPE *__restrict__ x, \ - int n) \ - { \ - for (int i = 0; i < n; i++) \ - { \ - CMP_TYPE xval = x[i]; \ - DATA_TYPE aval = a[i], bval = b[i]; \ - r[i] = xval COND (CMP_TYPE) IMM ? aval : bval; \ - } \ - } - -#define DEF_VCOND_SIGNED_ALL(COND, SUFFIX) \ - DEF_VCOND (int8_t, int8_t, COND, SUFFIX) \ - DEF_VCOND (int16_t, int16_t, COND, SUFFIX) \ - DEF_VCOND (int32_t, int32_t, COND, SUFFIX) \ - DEF_VCOND (int64_t, int64_t, COND, SUFFIX) \ - DEF_VCOND (float, int32_t, COND, SUFFIX##_float) \ - DEF_VCOND (double, int64_t, COND, SUFFIX##_double) - -#define DEF_VCOND_UNSIGNED_ALL(COND, SUFFIX) \ - DEF_VCOND (uint8_t, uint8_t, COND, SUFFIX) \ - DEF_VCOND (uint16_t, uint16_t, COND, SUFFIX) \ - DEF_VCOND (uint32_t, uint32_t, COND, SUFFIX) \ - DEF_VCOND (uint64_t, uint64_t, COND, SUFFIX) \ - DEF_VCOND (float, uint32_t, COND, SUFFIX##_float) \ - DEF_VCOND (double, uint64_t, COND, SUFFIX##_double) - -#define DEF_VCOND_ALL(COND, SUFFIX) \ - DEF_VCOND_SIGNED_ALL (COND, SUFFIX) \ - DEF_VCOND_UNSIGNED_ALL (COND, SUFFIX) - -#define DEF_VCOND_IMM_SIGNED_ALL(COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (int8_t, int8_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (int16_t, int16_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (int32_t, int32_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (int64_t, int64_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (float, int32_t, COND, IMM, SUFFIX##_float) \ - DEF_VCOND_IMM (double, int64_t, COND, IMM, SUFFIX##_double) - -#define DEF_VCOND_IMM_UNSIGNED_ALL(COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (uint8_t, uint8_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (uint16_t, uint16_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (uint32_t, uint32_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (uint64_t, uint64_t, COND, IMM, SUFFIX) \ - DEF_VCOND_IMM (float, uint32_t, COND, IMM, SUFFIX##_float) \ - DEF_VCOND_IMM (double, uint64_t, COND, IMM, SUFFIX##_double) - -#define DEF_VCOND_IMM_ALL(COND, IMM, SUFFIX) \ - DEF_VCOND_IMM_SIGNED_ALL (COND, IMM, SUFFIX) \ - DEF_VCOND_IMM_UNSIGNED_ALL (COND, IMM, SUFFIX) - -DEF_VCOND_ALL (>, _gt) -DEF_VCOND_ALL (<, _lt) -DEF_VCOND_ALL (>=, _ge) -DEF_VCOND_ALL (<=, _le) -DEF_VCOND_ALL (==, _eq) -DEF_VCOND_ALL (!=, _ne) - -/* == Expect immediates to make it into the encoding == */ - -DEF_VCOND_IMM_ALL (>, 5, _gt) -DEF_VCOND_IMM_ALL (<, 5, _lt) -DEF_VCOND_IMM_ALL (>=, 5, _ge) -DEF_VCOND_IMM_ALL (<=, 5, _le) -DEF_VCOND_IMM_ALL (==, 5, _eq) -DEF_VCOND_IMM_ALL (!=, 5, _ne) - -DEF_VCOND_IMM_SIGNED_ALL (>, 15, _gt2) -DEF_VCOND_IMM_SIGNED_ALL (<, 15, _lt2) -DEF_VCOND_IMM_SIGNED_ALL (>=, 15, _ge2) -DEF_VCOND_IMM_SIGNED_ALL (<=, 15, _le2) -DEF_VCOND_IMM_ALL (==, 15, _eq2) -DEF_VCOND_IMM_ALL (!=, 15, _ne2) - -DEF_VCOND_IMM_SIGNED_ALL (>, 16, _gt3) -DEF_VCOND_IMM_SIGNED_ALL (<, 16, _lt3) -DEF_VCOND_IMM_SIGNED_ALL (>=, 16, _ge3) -DEF_VCOND_IMM_SIGNED_ALL (<=, 16, _le3) -DEF_VCOND_IMM_ALL (==, 16, _eq3) -DEF_VCOND_IMM_ALL (!=, 16, _ne3) - -DEF_VCOND_IMM_SIGNED_ALL (>, -16, _gt4) -DEF_VCOND_IMM_SIGNED_ALL (<, -16, _lt4) -DEF_VCOND_IMM_SIGNED_ALL (>=, -16, _ge4) -DEF_VCOND_IMM_SIGNED_ALL (<=, -16, _le4) -DEF_VCOND_IMM_ALL (==, -16, _eq4) -DEF_VCOND_IMM_ALL (!=, -16, _ne4) - -DEF_VCOND_IMM_SIGNED_ALL (>, -17, _gt5) -DEF_VCOND_IMM_SIGNED_ALL (<, -17, _lt5) -DEF_VCOND_IMM_SIGNED_ALL (>=, -17, _ge5) -DEF_VCOND_IMM_SIGNED_ALL (<=, -17, _le5) -DEF_VCOND_IMM_ALL (==, -17, _eq5) -DEF_VCOND_IMM_ALL (!=, -17, _ne5) - -DEF_VCOND_IMM_UNSIGNED_ALL (>, 0, _gt6) -/* Testing if an unsigned value >= 0 or < 0 is pointless as it will get - folded away by the compiler. */ -DEF_VCOND_IMM_UNSIGNED_ALL (<=, 0, _le6) - -DEF_VCOND_IMM_UNSIGNED_ALL (>, 127, _gt7) -DEF_VCOND_IMM_UNSIGNED_ALL (<, 127, _lt7) -DEF_VCOND_IMM_UNSIGNED_ALL (>=, 127, _ge7) -DEF_VCOND_IMM_UNSIGNED_ALL (<=, 127, _le7) - -/* == Expect immediates to NOT make it into the encoding, and instead be - forced into a register. == */ -DEF_VCOND_IMM_UNSIGNED_ALL (>, 128, _gt8) -DEF_VCOND_IMM_UNSIGNED_ALL (<, 128, _lt8) -DEF_VCOND_IMM_UNSIGNED_ALL (>=, 128, _ge8) -DEF_VCOND_IMM_UNSIGNED_ALL (<=, 128, _le8) - -/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} 66 } } */ -/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 66 } } */ -/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 132 } } */ -/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 132 } } */ - -/* There are two signed ordered register comparisons for each of .b and .h, - one for a variable comparison and one for one of the two out-of-range - constant comparisons. The other out-of-ranger constant comparison can - be adjusted to an in-range value by inverting the handling of equality. - - The same pattern appears twice for each .s and .d, once for integer data - and once for floating-point data. */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ - -/* Out-of-range >= is converted to in-range >. */ -/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ - -/* Out-of-range < is converted to in-range <=. */ -/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ - -/* 6 for .b and .h: {signed, unsigned\n} x {variable, too high, too low\n}. */ -/* 12 for .s and .d: the above 6 repeated for integer and floating-point - data. */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ - -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ - -/* Also used for >= 16. */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ - -/* gcc converts "a < 15" into "a <= 14". */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #14\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */ - -/* gcc converts "a >= 15" into "a > 14". */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #14\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */ - -/* Also used for < 16. */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmple\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ - -/* Appears once for each signedness. */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ - -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ - -/* gcc converts "a > -16" into "a >= -15". */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-15\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */ - -/* Also used for <= -17. */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ - -/* Also used for > -17. */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-16\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ - -/* gcc converts "a <= -16" into "a < -15". */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #-15\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */ - -/* gcc converts "a > 0" into "a != 0". */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */ - -/* gcc converts "a <= 0" into "a == 0". */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */ - -/* Also used for >= 128. */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #127\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */ - -/* gcc converts "a < 127" into "a <= 126". */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #126\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */ - -/* gcc converts "a >= 127" into "a > 126". */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #126\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */ - -/* Also used for < 128. */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].h, p[0-7]/z, z[0-9]+\.h, #127\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */ -/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7].d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.c new file mode 100644 index 00000000000..0c67f8147c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.c @@ -0,0 +1,318 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ + void __attribute__ ((noinline, noclone)) \ + vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ + DATA_TYPE *__restrict__ x, \ + DATA_TYPE *__restrict__ y, \ + CMP_TYPE *__restrict__ a, \ + CMP_TYPE *__restrict__ b, \ + int n) \ + { \ + for (int i = 0; i < n; i++) \ + { \ + DATA_TYPE xval = x[i], yval = y[i]; \ + CMP_TYPE aval = a[i], bval = b[i]; \ + r[i] = aval COND bval ? xval : yval; \ + } \ + } + +#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \ + void __attribute__ ((noinline, noclone)) \ + vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ + DATA_TYPE *__restrict__ x, \ + DATA_TYPE *__restrict__ y, \ + CMP_TYPE *__restrict__ a, \ + int n) \ + { \ + for (int i = 0; i < n; i++) \ + { \ + DATA_TYPE xval = x[i], yval = y[i]; \ + CMP_TYPE aval = a[i]; \ + r[i] = aval COND (CMP_TYPE) IMM ? xval : yval; \ + } \ + } + +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ + T (int8_t, int8_t, COND, SUFFIX) \ + T (int16_t, int16_t, COND, SUFFIX) \ + T (int32_t, int32_t, COND, SUFFIX) \ + T (int64_t, int64_t, COND, SUFFIX) \ + T (_Float16, int16_t, COND, SUFFIX##_float16) \ + T (float, int32_t, COND, SUFFIX##_float) \ + T (double, int64_t, COND, SUFFIX##_double) + +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ + T (uint8_t, uint8_t, COND, SUFFIX) \ + T (uint16_t, uint16_t, COND, SUFFIX) \ + T (uint32_t, uint32_t, COND, SUFFIX) \ + T (uint64_t, uint64_t, COND, SUFFIX) \ + T (_Float16, uint16_t, COND, SUFFIX##_float16) \ + T (float, uint32_t, COND, SUFFIX##_float) \ + T (double, uint64_t, COND, SUFFIX##_double) + +#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ + TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ + TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) + +#define TEST_VAR_ALL(T) \ + TEST_COND_VAR_ALL (T, >, _gt) \ + TEST_COND_VAR_ALL (T, <, _lt) \ + TEST_COND_VAR_ALL (T, >=, _ge) \ + TEST_COND_VAR_ALL (T, <=, _le) \ + TEST_COND_VAR_ALL (T, ==, _eq) \ + TEST_COND_VAR_ALL (T, !=, _ne) + +#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (int8_t, int8_t, COND, IMM, SUFFIX) \ + T (int16_t, int16_t, COND, IMM, SUFFIX) \ + T (int32_t, int32_t, COND, IMM, SUFFIX) \ + T (int64_t, int64_t, COND, IMM, SUFFIX) \ + T (_Float16, int16_t, COND, IMM, SUFFIX##_float16) \ + T (float, int32_t, COND, IMM, SUFFIX##_float) \ + T (double, int64_t, COND, IMM, SUFFIX##_double) + +#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (uint8_t, uint8_t, COND, IMM, SUFFIX) \ + T (uint16_t, uint16_t, COND, IMM, SUFFIX) \ + T (uint32_t, uint32_t, COND, IMM, SUFFIX) \ + T (uint64_t, uint64_t, COND, IMM, SUFFIX) \ + T (_Float16, uint16_t, COND, IMM, SUFFIX##_float16) \ + T (float, uint32_t, COND, IMM, SUFFIX##_float) \ + T (double, uint64_t, COND, IMM, SUFFIX##_double) + +#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX) + +#define TEST_IMM_ALL(T) \ + /* Expect immediates to make it into the encoding. */ \ + TEST_COND_IMM_ALL (T, >, 5, _gt) \ + TEST_COND_IMM_ALL (T, <, 5, _lt) \ + TEST_COND_IMM_ALL (T, >=, 5, _ge) \ + TEST_COND_IMM_ALL (T, <=, 5, _le) \ + TEST_COND_IMM_ALL (T, ==, 5, _eq) \ + TEST_COND_IMM_ALL (T, !=, 5, _ne) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2) \ + TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2) \ + TEST_COND_IMM_ALL (T, ==, 15, _eq2) \ + TEST_COND_IMM_ALL (T, !=, 15, _ne2) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3) \ + TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3) \ + TEST_COND_IMM_ALL (T, ==, 16, _eq3) \ + TEST_COND_IMM_ALL (T, !=, 16, _ne3) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4) \ + TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4) \ + TEST_COND_IMM_ALL (T, ==, -16, _eq4) \ + TEST_COND_IMM_ALL (T, !=, -16, _ne4) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5) \ + TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5) \ + TEST_COND_IMM_ALL (T, ==, -17, _eq5) \ + TEST_COND_IMM_ALL (T, !=, -17, _ne5) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6) \ + /* Testing if an unsigned value >= 0 or < 0 is pointless as it will \ + get folded away by the compiler. */ \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7) \ + TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7) \ + \ + /* Expect immediates to NOT make it into the encoding, and instead be \ + forced into a register. */ \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8) \ + TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8) + +TEST_VAR_ALL (DEF_VCOND_VAR) +TEST_IMM_ALL (DEF_VCOND_IMM) + +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} 66 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 132 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 132 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 132 } } */ + +/* There are two signed ordered register comparisons for .b, one for a + variable comparison and one for one of the two out-of-range constant + comparisons. The other out-of-ranger constant comparison can be + adjusted to an in-range value by inverting the handling of equality. + + The same pattern appears twice for .h, .s and .d, once for integer data + and once for floating-point data. */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* Out-of-range >= is converted to in-range >. */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* Out-of-range < is converted to in-range <=. */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* 6 for .b: {signed, unsigned\n} x {variable, too high, too low}. */ +/* 12 for .h,.s and .d: the above 6 repeated for integer and floating-point + data. */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ + +/* Also used for >= 16. */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* gcc converts "a < 15" into "a <= 14". */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */ + +/* gcc converts "a >= 15" into "a > 14". */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */ + +/* Also used for < 16. */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* Appears once for each signedness. */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* gcc converts "a > -16" into "a >= -15". */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */ + +/* Also used for <= -17. */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* Also used for > -17. */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* gcc converts "a <= -16" into "a < -15". */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */ + +/* gcc converts "a > 0" into "a != 0". */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */ + +/* gcc converts "a <= 0" into "a == 0". */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */ + +/* Also used for >= 128. */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */ + +/* gcc converts "a < 127" into "a <= 126". */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */ + +/* gcc converts "a >= 127" into "a > 126". */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */ + +/* Also used for < 128. */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.C deleted file mode 100644 index b3c54b74fde..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.C +++ /dev/null @@ -1,118 +0,0 @@ -/* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -march=armv8-a+sve -fno-inline" } */ - -#include "sve_vcond_2.C" - -#include <stdlib.h> - -#define TEST_VCOND(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ -{ \ - const int n = 32 / sizeof (DATA_TYPE); \ - CMP_TYPE x[n], y[n]; \ - DATA_TYPE a[n], b[n]; \ - for (int i = 0; i < n; ++i) \ - { \ - x[i] = i; \ - y[i] = (i & 1) + 5; \ - a[i] = 6 * i; \ - b[i] = 4 + i; \ - } \ - vcond_##CMP_TYPE##SUFFIX (r_##DATA_TYPE, a, b, x, y, n); \ - for (int i = 0; i < n; ++i) \ - if (r_##DATA_TYPE[i] != (x[i] COND y[i] ? a[i] : b[i])) \ - abort (); \ -} - -#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \ -{ \ - const int n = 32 / sizeof (DATA_TYPE); \ - CMP_TYPE x[n]; \ - DATA_TYPE a[n], b[n]; \ - for (int i = 0; i < n; ++i) \ - { \ - x[i] = i - 1; \ - a[i] = 5 * i + IMM; \ - b[i] = 7 + i - IMM * 2; \ - } \ - vcond_imm_##CMP_TYPE##SUFFIX (r_##DATA_TYPE, a, b, x, n); \ - for (int i = 0; i < n; ++i) \ - if (r_##DATA_TYPE[i] != (x[i] COND IMM ? a[i] : b[i])) \ - abort (); \ -} - -#define TEST_VCOND_SIGNED_ALL(COND, SUFFIX) \ - TEST_VCOND (int8_t, int8_t, COND, SUFFIX) \ - TEST_VCOND (int16_t, int16_t, COND, SUFFIX) \ - TEST_VCOND (int32_t, int32_t, COND, SUFFIX) \ - TEST_VCOND (int64_t, int64_t, COND, SUFFIX) \ - TEST_VCOND (float, int32_t, COND, SUFFIX##_float) \ - TEST_VCOND (double, int64_t, COND, SUFFIX##_double) - -#define TEST_VCOND_UNSIGNED_ALL(COND, SUFFIX) \ - TEST_VCOND (uint8_t, uint8_t, COND, SUFFIX) \ - TEST_VCOND (uint16_t, uint16_t, COND, SUFFIX) \ - TEST_VCOND (uint32_t, uint32_t, COND, SUFFIX) \ - TEST_VCOND (uint64_t, uint64_t, COND, SUFFIX) \ - TEST_VCOND (float, uint32_t, COND, SUFFIX##_float) \ - TEST_VCOND (double, uint64_t, COND, SUFFIX##_double) - -#define TEST_VCOND_ALL(COND, SUFFIX) \ - TEST_VCOND_SIGNED_ALL (COND, SUFFIX) \ - TEST_VCOND_UNSIGNED_ALL (COND, SUFFIX) - -#define TEST_VCOND_IMM_SIGNED_ALL(COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (int8_t, int8_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (int16_t, int16_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (int32_t, int32_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (int64_t, int64_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (float, int32_t, COND, IMM, SUFFIX##_float) \ - TEST_VCOND_IMM (double, int64_t, COND, IMM, SUFFIX##_double) - -#define TEST_VCOND_IMM_UNSIGNED_ALL(COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (uint8_t, uint8_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (uint16_t, uint16_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (uint32_t, uint32_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (uint64_t, uint64_t, COND, IMM, SUFFIX) \ - TEST_VCOND_IMM (float, uint32_t, COND, IMM, SUFFIX##_float) \ - TEST_VCOND_IMM (double, uint64_t, COND, IMM, SUFFIX##_double) - -#define TEST_VCOND_IMM_ALL(COND, IMM, SUFFIX) \ - TEST_VCOND_IMM_SIGNED_ALL (COND, IMM, SUFFIX) \ - TEST_VCOND_IMM_UNSIGNED_ALL (COND, IMM, SUFFIX) - -#define DEF_INIT_VECTOR(TYPE) \ - TYPE r_##TYPE[NUM_ELEMS(TYPE)]; \ - for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - r_##TYPE[i] = i * 3; - -int __attribute__ ((optimize (1))) -main (int argc, char **argv) -{ - int result = 0; - DEF_INIT_VECTOR (int8_t) - DEF_INIT_VECTOR (int16_t) - DEF_INIT_VECTOR (int32_t) - DEF_INIT_VECTOR (int64_t) - DEF_INIT_VECTOR (uint8_t) - DEF_INIT_VECTOR (uint16_t) - DEF_INIT_VECTOR (uint32_t) - DEF_INIT_VECTOR (uint64_t) - DEF_INIT_VECTOR (float) - DEF_INIT_VECTOR (double) - - TEST_VCOND_ALL (>, _gt) - TEST_VCOND_ALL (<, _lt) - TEST_VCOND_ALL (>=, _ge) - TEST_VCOND_ALL (<=, _le) - TEST_VCOND_ALL (==, _eq) - TEST_VCOND_ALL (!=, _ne) - - TEST_VCOND_IMM_ALL (>, 5, _gt) - TEST_VCOND_IMM_ALL (<, 5, _lt) - TEST_VCOND_IMM_ALL (>=, 5, _ge) - TEST_VCOND_IMM_ALL (<=, 5, _le) - TEST_VCOND_IMM_ALL (==, 5, _eq) - TEST_VCOND_IMM_ALL (!=, 5, _ne) - - return 0; -} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.c new file mode 100644 index 00000000000..4cdb5bb9e43 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.c @@ -0,0 +1,49 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vcond_2.c" + +#define N 97 + +#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ +{ \ + DATA_TYPE x[N], y[N], r[N]; \ + CMP_TYPE a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + x[i] = i; \ + y[i] = (i & 1) + 5; \ + a[i] = i - N / 3; \ + b[i] = N - N / 3 - i; \ + asm volatile ("" ::: "memory"); \ + } \ + vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (a[i] COND b[i] ? x[i] : y[i])) \ + __builtin_abort (); \ +} + +#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \ +{ \ + DATA_TYPE x[N], y[N], r[N]; \ + CMP_TYPE a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + x[i] = i; \ + y[i] = (i & 1) + 5; \ + a[i] = IMM - N / 3 + i; \ + asm volatile ("" ::: "memory"); \ + } \ + vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i])) \ + __builtin_abort (); \ +} + +int __attribute__ ((optimize (1))) +main (int argc, char **argv) +{ + TEST_VAR_ALL (TEST_VCOND_VAR) + TEST_IMM_ALL (TEST_VCOND_IMM) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_3.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_3.c index 68c033b1a7d..9750bd07fda 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_3.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_3.c @@ -5,20 +5,18 @@ #define DEF_SEL_IMM(TYPE, SUFFIX, IMM) \ void \ -sel_##TYPE##_##SUFFIX (TYPE *__restrict__ a, TYPE *__restrict__ b, \ - int n) \ +sel_##TYPE##_##SUFFIX (TYPE *restrict a, TYPE *restrict b, int n) \ { \ for (int i = 0; i < n; i++) \ a[i] = b[i] != 0 ? IMM : 0; \ } -#define DEF_SEL_VAR(TYPE) \ -void \ -sel_##TYPE##_var (TYPE *__restrict__ a, TYPE *__restrict__ b, \ - TYPE val, int n) \ -{ \ - for (int i = 0; i < n; i++) \ - a[i] = b[i] != 0 ? val : 0; \ +#define DEF_SEL_VAR(TYPE) \ +void \ +sel_##TYPE##_var (TYPE *restrict a, TYPE *restrict b, TYPE val, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + a[i] = b[i] != 0 ? val : 0; \ } #define TEST_TYPE8(TYPE) \ @@ -54,17 +52,17 @@ TEST_TYPE16 (int16_t) TEST_TYPE32 (int32_t) TEST_TYPE32 (int64_t) -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.b, p[0-7]/z, #-128\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.b, p[0-7]/z, #-127\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.b, p[0-7]/z, #2\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.b, p[0-7]/z, #127\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #-128\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #-127\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #127\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #-32768\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #-32512\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #-256\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #-128\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #-127\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #2\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #127\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #256\n} 3 } } */ -/* { dg-final { scan-assembler-times {\tmov\tz[0-9]*\.[hsd], p[0-7]/z, #32512\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-32768\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-32512\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-256\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-128\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-127\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #2\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #127\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #256\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #32512\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_4_run.c index 36c43e9f1e8..e8d06bb9f17 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_4_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_4_run.c @@ -8,8 +8,6 @@ #include <fenv.h> -extern void abort (void) __attribute__ ((noreturn)); - #include "sve_vcond_4.c" #define N 401 @@ -33,6 +31,7 @@ extern void abort (void) __attribute__ ((noreturn)); b[i] = i * 0.1; \ else \ b[i] = i; \ + asm volatile ("" ::: "memory"); \ } \ feclearexcept (FE_ALL_EXCEPT); \ test_##TYPE1##_##TYPE2##_##CMP##_var (dest1, src, 11, a, b, N); \ @@ -40,15 +39,15 @@ extern void abort (void) __attribute__ ((noreturn)); test_##TYPE1##_##TYPE2##_##CMP##_sel (dest3, 33, 44, a, 9, N); \ if (TEST_EXCEPTIONS \ && !fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \ - abort (); \ + __builtin_abort (); \ for (int i = 0; i < N; ++i) \ { \ if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \ - abort (); \ + __builtin_abort (); \ if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22)) \ - abort (); \ + __builtin_abort (); \ if (dest3[i] != (CMP (a[i], 9) ? 33 : 44)) \ - abort (); \ + __builtin_abort (); \ } \ } @@ -64,7 +63,7 @@ extern void abort (void) __attribute__ ((noreturn)); RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \ RUN_LOOP (double, double, CMP, EXPECT_INVALID) -int __attribute__ ((optimize (1, "no-tree-vectorize"))) +int __attribute__ ((optimize (1))) main (void) { RUN_CMP (eq, 0) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_6.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6.c index 8ab040ef51e..74336050d8d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6.c @@ -25,6 +25,7 @@ } #define TEST_BINOP(T, BINOP) \ + T (_Float16, BINOP) \ T (float, BINOP) \ T (double, BINOP) @@ -40,11 +41,11 @@ TEST_ALL (LOOP) /* Currently we don't manage to remove ANDs from the other loops. */ -/* { dg-final { scan-assembler-times {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 { xfail *-*-* } } } */ /* { dg-final { scan-assembler {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} } } */ -/* { dg-final { scan-assembler-times {\torr\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 2 } } */ -/* { dg-final { scan-assembler-times {\teor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 2 } } */ -/* { dg-final { scan-assembler-times {\tnand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 2 } } */ -/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 2 } } */ -/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 2 } } */ -/* { dg-final { scan-assembler-times {\torn\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\teor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\tnand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\torn\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6_run.c index ff8ad90da9f..edad9b8272d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vcond_6_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6_run.c @@ -1,8 +1,6 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -extern void abort (void) __attribute__ ((noreturn)); - #include "sve_vcond_6.c" #define N 401 @@ -17,6 +15,7 @@ extern void abort (void) __attribute__ ((noreturn)); b[i] = i % 7 < 4 ? __builtin_nan("") : i; \ c[i] = i % 9 < 5 ? __builtin_nan("") : i; \ d[i] = i % 11 < 6 ? __builtin_nan("") : i; \ + asm volatile ("" ::: "memory"); \ } \ test_##TYPE##_##BINOP (dest, src, a, b, c, d, 100, N); \ for (int i = 0; i < N; ++i) \ @@ -24,11 +23,11 @@ extern void abort (void) __attribute__ ((noreturn)); int res = BINOP (__builtin_isunordered (a[i], b[i]), \ __builtin_isunordered (c[i], d[i])); \ if (dest[i] != (res ? src[i] : 100.0)) \ - abort (); \ + __builtin_abort (); \ } \ } -int __attribute__ ((optimize (1, "no-tree-vectorize"))) +int __attribute__ ((optimize (1))) main (void) { TEST_ALL (RUN_LOOP) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1.C b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1.c index d6194dcbf8f..95f19f7f786 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1.c @@ -1,25 +1,30 @@ /* { dg-do compile } */ -/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ #include <stdint.h> #define NUM_ELEMS(TYPE) (128 / sizeof (TYPE)) #define DUP_FN(TYPE) \ -void dup_##TYPE (TYPE *r, TYPE v) \ +void __attribute__ ((noinline, noclone)) \ +dup_##TYPE (TYPE *r, TYPE v) \ { \ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ r[i] = v; \ } +DUP_FN (int8_t) DUP_FN (int16_t) DUP_FN (int32_t) DUP_FN (int64_t) +DUP_FN (_Float16) DUP_FN (float) DUP_FN (double) +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, w[0-9]+\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, w[0-9]+\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, x[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, h[0-9]+\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, s[0-9]+\n} 1 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, d[0-9]+\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1_run.c index 579327ef81d..ba7eb44be70 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1_run.C +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1_run.c @@ -1,23 +1,26 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ -#include "sve_vec_init_1.C" - -#include <stdlib.h> +#include "sve_vec_init_1.c" #define TEST_INIT_VECTOR(TYPE, VAL) \ - TYPE r_##TYPE[NUM_ELEMS (TYPE)]; \ - dup_##TYPE (r_##TYPE, VAL); \ + { \ + TYPE r[NUM_ELEMS (TYPE)]; \ + dup_##TYPE (r, VAL); \ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ - if (r_##TYPE[i] != VAL) \ - abort (); + if (r[i] != VAL) \ + __builtin_abort (); \ + } -int main (void) +int __attribute__ ((optimize (1))) +main (void) { + TEST_INIT_VECTOR (int8_t, 0x2a); TEST_INIT_VECTOR (int16_t, 0x3976); TEST_INIT_VECTOR (int32_t, 0x31232976); TEST_INIT_VECTOR (int64_t, 0x9489363731232976LL); + TEST_INIT_VECTOR (_Float16, -0x1.fp10); TEST_INIT_VECTOR (float, -0x1.fe02p10); TEST_INIT_VECTOR (double, 0x1.fe02eeeee1p10); diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c index 214c0c3930f..ae8542f2c75 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c @@ -1,15 +1,19 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define VEC_PERM(TYPE, MASKTYPE) \ -TYPE vec_perm_##TYPE (TYPE values1, TYPE values2, MASKTYPE mask) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_perm_##TYPE (TYPE values1, TYPE values2, MASKTYPE mask) \ { \ return __builtin_shuffle (values1, values2, mask); \ } @@ -20,8 +24,9 @@ VEC_PERM (v16hi, v16hi); VEC_PERM (v32qi, v32qi); VEC_PERM (v4df, v4di); VEC_PERM (v8sf, v8si); +VEC_PERM (v16hf, v16hi); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c index 630e30867cb..6ab82250d4c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c @@ -1,8 +1,8 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ #include "sve_vec_perm_1.c" -extern void abort (void); #define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, \ VALUES1, VALUES2, MASK) \ @@ -14,7 +14,7 @@ extern void abort (void); TYPE dest; \ dest = vec_perm_##TYPE (values1, values2, mask); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ } int main (void) @@ -92,5 +92,20 @@ int main (void) 15 + (16 * 4), 7 + (16 * 4), 6 + (16 * 3), 5 + (16 * 2), 4 + (16 * 1), 10 + (16 * 0) })); + TEST_VEC_PERM (v16hf, v16hi, + ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, + 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), + ((v16hi) { 9 + (32 * 2), 13 + (32 * 2), + 15 + (32 * 8), 7 + (32 * 9), + 25 + (32 * 4), 26 + (32 * 3), + 27 + (32 * 1), 17 + (32 * 2), + 4 + (32 * 6), 31 + (32 * 7), + 0 + (32 * 8), 18 + (32 * 9), + 6 + (32 * 6), 5 + (32 * 7), + 4 + (32 * 2), 10 + (32 * 2) })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c index ce8cc79728a..4d46ff02192 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c @@ -3,7 +3,6 @@ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ #include "sve_vec_perm_1.c" -extern void abort (void); #define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, \ VALUES1, VALUES2, MASK) \ @@ -15,7 +14,7 @@ extern void abort (void); TYPE dest; \ dest = vec_perm_##TYPE (values1, values2, mask); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ } int main (void) @@ -67,5 +66,14 @@ int main (void) ((v8sf) { 33.2, 34.2, 35.2, 36.2, 37.2, 38.2, 39.2, 40.2 }), ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (v16hf, v16hi, + ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, + 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), + ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c index d26d0902165..e76b3bc5abb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c @@ -1,15 +1,19 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define VEC_PERM_CONST(TYPE, MASK) \ -TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_perm_##TYPE (TYPE values1, TYPE values2) \ { \ return __builtin_shuffle (values1, values2, MASK); \ } @@ -24,8 +28,10 @@ VEC_PERM_CONST (v32qi, ((v32qi) { 13, 31, 11, 2, 48, 28, 3, 4, 2, 57, 22, 11, 6, 16, 18, 21 })); VEC_PERM_CONST (v4df, ((v4di) { 7, 3, 2, 1 })); VEC_PERM_CONST (v8sf, ((v8si) { 1, 9, 13, 11, 2, 5, 4, 2 })); +VEC_PERM_CONST (v16hf, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0, + 22, 1, 8, 9, 3, 24, 15, 1 })); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c index 8507cb46fb9..b4f82091f7c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c @@ -1,12 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define VEC_PERM_CONST_OVERRUN(TYPE, MASK) \ TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \ @@ -50,8 +53,16 @@ VEC_PERM_CONST_OVERRUN (v8sf, ((v8si) { 1 + (16 * 1), 9 + (16 * 2), 13 + (16 * 2), 11 + (16 * 3), 2 + (16 * 2), 5 + (16 * 2), 4 + (16 * 4), 2 + (16 * 3) })); +VEC_PERM_CONST_OVERRUN (v16hf, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1), + 5 + (32 * 3), 4 + (32 * 3), + 21 + (32 * 1), 12 + (32 * 3), + 13 + (32 * 3), 0 + (32 * 1), + 22 + (32 * 2), 1 + (32 * 2), + 8 + (32 * 2), 9 + (32 * 1), + 3 + (32 * 2), 24 + (32 * 2), + 15 + (32 * 1), 1 + (32 * 1) })); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c index 7edda5398e2..7324c1da0a4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c @@ -4,7 +4,6 @@ #include "sve_vec_perm_const_1.c" #include "sve_vec_perm_const_1_overrun.c" -extern void abort (void); #define TEST_VEC_PERM(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ { \ @@ -14,11 +13,11 @@ extern void abort (void); TYPE dest; \ dest = vec_perm_##TYPE (values1, values2); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ TYPE dest2; \ dest2 = vec_perm_overrun_##TYPE (values1, values2); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ } int main (void) @@ -60,5 +59,12 @@ int main (void) ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), ((v8sf) { 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5 })); + TEST_VEC_PERM (v16hf, + ((v16hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0, + 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c index c1e12faa850..a4efb4fea79 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c @@ -1,12 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define VEC_PERM_SINGLE(TYPE, MASK) \ TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \ @@ -24,8 +27,10 @@ VEC_PERM_SINGLE (v32qi, ((v32qi) { 13, 21, 11, 2, 8, 28, 3, 4, 2, 7, 22, 11, 6, 16, 18, 21 })); VEC_PERM_SINGLE (v4df, ((v4di) { 3, 3, 1, 1 })); VEC_PERM_SINGLE (v8sf, ((v8si) { 4, 5, 6, 0, 2, 7, 4, 2 })); +VEC_PERM_SINGLE (v16hf, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0, + 1, 1, 8, 9, 3, 14, 15, 1 })); /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c index 2aa08f59590..fbae30c8d1c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c @@ -3,7 +3,6 @@ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ #include "sve_vec_perm_const_single_1.c" -extern void abort (void); #define TEST_VEC_PERM(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ { \ @@ -13,7 +12,7 @@ extern void abort (void); TYPE dest; \ dest = vec_perm_##TYPE (values1, values2); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ } int main (void) @@ -55,5 +54,12 @@ int main (void) ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), ((v8sf) { 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5 })); + TEST_VEC_PERM (v16hf, + ((v16hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0, + 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c index 54c3a3068b0..a82b57dc378 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c @@ -1,12 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define VEC_PERM(TYPE, MASKTYPE) \ TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \ @@ -14,14 +17,15 @@ TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \ return __builtin_shuffle (values, mask); \ } -VEC_PERM (v4di, v4di); \ -VEC_PERM (v8si, v8si); \ -VEC_PERM (v16hi, v16hi); \ -VEC_PERM (v32qi, v32qi); \ -VEC_PERM (v4df, v4di); \ -VEC_PERM (v8sf, v8si); +VEC_PERM (v4di, v4di) +VEC_PERM (v8si, v8si) +VEC_PERM (v16hi, v16hi) +VEC_PERM (v32qi, v32qi) +VEC_PERM (v4df, v4di) +VEC_PERM (v8sf, v8si) +VEC_PERM (v16hf, v16hi) /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ /* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c index 6caa1f95cfd..539c99d4f61 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c @@ -5,7 +5,7 @@ #include "sve_vec_perm_single_1.c" extern void abort (void); -#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, VALUES, MASK) \ +#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, VALUES, MASK) \ { \ TYPE expected_result = EXPECTED_RESULT; \ TYPE values = VALUES; \ @@ -13,7 +13,7 @@ extern void abort (void); TYPE dest; \ dest = vec_perm_##TYPE (values, mask); \ if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ - abort (); \ + __builtin_abort (); \ } int main (void) @@ -54,5 +54,12 @@ int main (void) ((v8sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }), ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (v16hf, v16hi, + ((v16hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0, + 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c index 509dddcb100..918313f62bd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c @@ -5,12 +5,15 @@ #define BIAS 0 #endif -typedef long v4di __attribute__((vector_size (32))); -typedef int v8si __attribute__((vector_size (32))); -typedef short v16hi __attribute__((vector_size (32))); -typedef char v32qi __attribute__((vector_size (32))); +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); typedef double v4df __attribute__((vector_size (32))); typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); #define MASK_2(X, Y) X, Y + X #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 1, Y) @@ -38,7 +41,8 @@ typedef float v8sf __attribute__((vector_size (32))); T (v16hi, 16) \ T (v32qi, 32) \ T (v4df, 4) \ - T (v8sf, 8) + T (v8sf, 8) \ + T (v16hf, 16) TEST_ALL (PERMUTE) @@ -46,5 +50,5 @@ TEST_ALL (PERMUTE) /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ -/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_zip2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_zip2_1.c index 360ffab7d3e..40a899bc40a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve_zip2_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve_zip2_1.c @@ -8,5 +8,5 @@ /* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ /* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ -/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ /* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_11.c b/gcc/testsuite/gcc.target/aarch64/target_attr_11.c index 7cfb826fc44..a3df438206b 100644 --- a/gcc/testsuite/gcc.target/aarch64/target_attr_11.c +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_11.c @@ -10,4 +10,4 @@ foo (int a) } /* { dg-error "does not allow a negated form" "" { target *-*-* } 0 } */ -/* { dg-error "is invalid" "" { target *-*-* } 0 } */ +/* { dg-error "is not valid" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_12.c b/gcc/testsuite/gcc.target/aarch64/target_attr_12.c index 39cb9964003..8a3a25bfed7 100644 --- a/gcc/testsuite/gcc.target/aarch64/target_attr_12.c +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_12.c @@ -10,4 +10,4 @@ foo (int a) } /* { dg-error "does not accept an argument" "" { target *-*-* } 0 } */ -/* { dg-error "is invalid" "" { target *-*-* } 0 } */ +/* { dg-error "is not valid" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_17.c b/gcc/testsuite/gcc.target/aarch64/target_attr_17.c index 483cc6d4a1d..2a7a7511bea 100644 --- a/gcc/testsuite/gcc.target/aarch64/target_attr_17.c +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_17.c @@ -5,4 +5,4 @@ foo (int a) return a + 5; } -/* { dg-error "target attribute.*is invalid" "" { target *-*-* } 0 } */
\ No newline at end of file +/* { dg-error "attribute 'target\\(\"invalid-attr-string\"\\)' is not valid" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c b/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c index a1422d7090b..436399c6195 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c @@ -56,13 +56,13 @@ TEST (SUFFIX, q, 32, 4, u,u,s) \ TEST (SUFFIX, q, 64, 2, u,u,d) \ BUILD_VARIANTS ( ) -/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */ -/* { dg-final { scan-assembler "fcvtzs\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzs\\t(w|s)\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzs\\t(x|d)\[0-9\]+, d\[0-9\]+" } } */ /* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ /* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ /* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ -/* { dg-final { scan-assembler "fcvtzu\\tw\[0-9\]+, s\[0-9\]+" } } */ -/* { dg-final { scan-assembler "fcvtzu\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzu\\t(w|s)\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzu\\t(x|d)\[0-9\]+, d\[0-9\]+" } } */ /* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ /* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ /* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ |