diff options
Diffstat (limited to 'gcc/testsuite')
731 files changed, 25352 insertions, 755 deletions
diff --git a/gcc/testsuite/g++.dg/vect/pr36648.cc b/gcc/testsuite/g++.dg/vect/pr36648.cc index 7bda82899d0..8d24d3d445d 100644 --- a/gcc/testsuite/g++.dg/vect/pr36648.cc +++ b/gcc/testsuite/g++.dg/vect/pr36648.cc @@ -25,6 +25,6 @@ int main() { } targets, ! vect_no_align is a sufficient test. */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/g++.target/aarch64/aarch64.exp b/gcc/testsuite/g++.target/aarch64/aarch64.exp new file mode 100644 index 00000000000..5eaa8725c9d --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/aarch64.exp @@ -0,0 +1,38 @@ +# Specific regression driver for AArch64. +# Copyright (C) 2009-2017 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an AArch64 target. +if {![istarget aarch64*-*-*] } then { + return +} + +# Load support procs. +load_lib g++-dg.exp + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C]] "" "" + +# All done. +dg-finish diff --git a/gcc/testsuite/g++.target/aarch64/sve_catch_1.C b/gcc/testsuite/g++.target/aarch64/sve_catch_1.C new file mode 100644 index 00000000000..48b007fc1be --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve_catch_1.C @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp-simd -fno-omit-frame-pointer" } */ +/* { dg-options "-O3 -fopenmp-simd -fno-omit-frame-pointer -march=armv8-a+sve" { target aarch64_sve_hw } } */ + +/* Invoke X (P##n) for n in [0, 7]. */ +#define REPEAT8(X, P) \ + X (P##0) X (P##1) X (P##2) X (P##3) X (P##4) X (P##5) X (P##6) X (P##7) + +/* Invoke X (n) for all octal n in [0, 39]. */ +#define REPEAT40(X) \ + REPEAT8 (X, 0) REPEAT8 (X, 1) REPEAT8 (X, 2) REPEAT8 (X, 3) REPEAT8 (X, 4) + +volatile int testi; + +/* Throw to f3. */ +void __attribute__ ((weak)) +f1 (int x[40][100], int *y) +{ + /* A wild write to x and y. */ + asm volatile ("" ::: "memory"); + if (y[testi] == x[testi][testi]) + throw 100; +} + +/* Expect vector work to be done, with spilling of vector registers. */ +void __attribute__ ((weak)) +f2 (int x[40][100], int *y) +{ + /* Try to force some spilling. */ +#define DECLARE(N) int y##N = y[N]; + REPEAT40 (DECLARE); + for (int j = 0; j < 20; ++j) + { + f1 (x, y); +#pragma omp simd + for (int i = 0; i < 100; ++i) + { +#define INC(N) x[N][i] += y##N; + REPEAT40 (INC); + } + } +} + +/* Catch an exception thrown from f1, via f2. */ +void __attribute__ ((weak)) +f3 (int x[40][100], int *y, int *z) +{ + volatile int extra = 111; + try + { + f2 (x, y); + } + catch (int val) + { + *z = val + extra; + } +} + +static int x[40][100]; +static int y[40]; +static int z; + +int +main (void) +{ + f3 (x, y, &z); + if (z != 211) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/g++.target/aarch64/sve_catch_2.C b/gcc/testsuite/g++.target/aarch64/sve_catch_2.C new file mode 100644 index 00000000000..4acdefd235a --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve_catch_2.C @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp-simd -fomit-frame-pointer" } */ +/* { dg-options "-O3 -fopenmp-simd -fomit-frame-pointer -march=armv8-a+sve" { target aarch64_sve_hw } } */ + +#include "sve_catch_1.C" diff --git a/gcc/testsuite/g++.target/aarch64/sve_catch_3.C b/gcc/testsuite/g++.target/aarch64/sve_catch_3.C new file mode 100644 index 00000000000..b7e701668e5 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve_catch_3.C @@ -0,0 +1,79 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp-simd -fno-omit-frame-pointer" } */ +/* { dg-options "-O3 -fopenmp-simd -fno-omit-frame-pointer -march=armv8-a+sve" { target aarch64_sve_hw } } */ + +/* Invoke X (P##n) for n in [0, 7]. */ +#define REPEAT8(X, P) \ + X (P##0) X (P##1) X (P##2) X (P##3) X (P##4) X (P##5) X (P##6) X (P##7) + +/* Invoke X (n) for all octal n in [0, 39]. */ +#define REPEAT40(X) \ + REPEAT8 (X, 0) REPEAT8 (X, 1) REPEAT8 (X, 2) REPEAT8 (X, 3) REPEAT8 (X, 4) + +volatile int testi, sink; + +/* Take 2 stack arguments and throw to f3. */ +void __attribute__ ((weak)) +f1 (int x[40][100], int *y, int z1, int z2, int z3, int z4, + int z5, int z6, int z7, int z8) +{ + /* A wild write to x and y. */ + sink = z1; + sink = z2; + sink = z3; + sink = z4; + sink = z5; + sink = z6; + sink = z7; + sink = z8; + asm volatile ("" ::: "memory"); + if (y[testi] == x[testi][testi]) + throw 100; +} + +/* Expect vector work to be done, with spilling of vector registers. */ +void __attribute__ ((weak)) +f2 (int x[40][100], int *y) +{ + /* Try to force some spilling. */ +#define DECLARE(N) int y##N = y[N]; + REPEAT40 (DECLARE); + for (int j = 0; j < 20; ++j) + { + f1 (x, y, 1, 2, 3, 4, 5, 6, 7, 8); +#pragma omp simd + for (int i = 0; i < 100; ++i) + { +#define INC(N) x[N][i] += y##N; + REPEAT40 (INC); + } + } +} + +/* Catch an exception thrown from f1, via f2. */ +void __attribute__ ((weak)) +f3 (int x[40][100], int *y, int *z) +{ + volatile int extra = 111; + try + { + f2 (x, y); + } + catch (int val) + { + *z = val + extra; + } +} + +static int x[40][100]; +static int y[40]; +static int z; + +int +main (void) +{ + f3 (x, y, &z); + if (z != 211) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/g++.target/aarch64/sve_catch_4.C b/gcc/testsuite/g++.target/aarch64/sve_catch_4.C new file mode 100644 index 00000000000..cb75672e6b6 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve_catch_4.C @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp-simd -fomit-frame-pointer" } */ +/* { dg-options "-O3 -fopenmp-simd -fomit-frame-pointer -march=armv8-a+sve" { target aarch64_sve_hw } } */ + +#include "sve_catch_3.C" diff --git a/gcc/testsuite/g++.target/aarch64/sve_catch_5.C b/gcc/testsuite/g++.target/aarch64/sve_catch_5.C new file mode 100644 index 00000000000..7d0d430fd91 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve_catch_5.C @@ -0,0 +1,82 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp-simd -fno-omit-frame-pointer" } */ +/* { dg-options "-O3 -fopenmp-simd -fno-omit-frame-pointer -march=armv8-a+sve" { target aarch64_sve_hw } } */ + +/* Invoke X (P##n) for n in [0, 7]. */ +#define REPEAT8(X, P) \ + X (P##0) X (P##1) X (P##2) X (P##3) X (P##4) X (P##5) X (P##6) X (P##7) + +/* Invoke X (n) for all octal n in [0, 39]. */ +#define REPEAT40(X) \ + REPEAT8 (X, 0) REPEAT8 (X, 1) REPEAT8 (X, 2) REPEAT8 (X, 3) REPEAT8 (X, 4) + +volatile int testi, sink; +volatile void *ptr; + +/* Take 2 stack arguments and throw to f3. */ +void __attribute__ ((weak)) +f1 (int x[40][100], int *y, int z1, int z2, int z3, int z4, + int z5, int z6, int z7, int z8) +{ + /* A wild write to x and y. */ + sink = z1; + sink = z2; + sink = z3; + sink = z4; + sink = z5; + sink = z6; + sink = z7; + sink = z8; + asm volatile ("" ::: "memory"); + if (y[testi] == x[testi][testi]) + throw 100; +} + +/* Expect vector work to be done, with spilling of vector registers. */ +void __attribute__ ((weak)) +f2 (int x[40][100], int *y) +{ + /* Create a true variable-sized frame. */ + ptr = __builtin_alloca (testi + 40); + /* Try to force some spilling. */ +#define DECLARE(N) int y##N = y[N]; + REPEAT40 (DECLARE); + for (int j = 0; j < 20; ++j) + { + f1 (x, y, 1, 2, 3, 4, 5, 6, 7, 8); +#pragma omp simd + for (int i = 0; i < 100; ++i) + { +#define INC(N) x[N][i] += y##N; + REPEAT40 (INC); + } + } +} + +/* Catch an exception thrown from f1, via f2. */ +void __attribute__ ((weak)) +f3 (int x[40][100], int *y, int *z) +{ + volatile int extra = 111; + try + { + f2 (x, y); + } + catch (int val) + { + *z = val + extra; + } +} + +static int x[40][100]; +static int y[40]; +static int z; + +int +main (void) +{ + f3 (x, y, &z); + if (z != 211) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/g++.target/aarch64/sve_catch_6.C b/gcc/testsuite/g++.target/aarch64/sve_catch_6.C new file mode 100644 index 00000000000..184d7ee111e --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve_catch_6.C @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fopenmp-simd -fomit-frame-pointer" } */ +/* { dg-options "-O3 -fopenmp-simd -fomit-frame-pointer -march=armv8-a+sve" { target aarch64_sve_hw } } */ + +#include "sve_catch_5.C" diff --git a/gcc/testsuite/gcc.c-torture/compile/pr82816.c b/gcc/testsuite/gcc.c-torture/compile/pr82816.c new file mode 100644 index 00000000000..8e9bd001bac --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr82816.c @@ -0,0 +1,12 @@ +struct A +{ + int b:3; +} d, e; + +int c; + +void f () +{ + char g = d.b * e.b; + c = g; +} diff --git a/gcc/testsuite/gcc.dg/fma-1.c b/gcc/testsuite/gcc.dg/fma-1.c new file mode 100644 index 00000000000..f9865775ac4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-1.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return a * b + c; +} + +double +f2 (double a, double b, double c) +{ + return a * b + c; +} + +/* { dg-final { scan-tree-dump-times { = FMA \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-2.c b/gcc/testsuite/gcc.dg/fma-2.c new file mode 100644 index 00000000000..79e873ad9c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-2.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return a * b - c; +} + +double +f2 (double a, double b, double c) +{ + return a * b - c; +} + +/* { dg-final { scan-tree-dump-times { = FMS \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c new file mode 100644 index 00000000000..931cdd49a26 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-3.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return c - a * b; +} + +double +f2 (double a, double b, double c) +{ + return c - a * b; +} + +/* { dg-final { scan-tree-dump-times { = FNMA \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c new file mode 100644 index 00000000000..d80d04acc99 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-4.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return -(a * b) - c; +} + +double +f2 (double a, double b, double c) +{ + return -(a * b) - c; +} + +/* { dg-final { scan-tree-dump-times { = FNMS \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-5.c b/gcc/testsuite/gcc.dg/fma-5.c new file mode 100644 index 00000000000..b8f6deeeb49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-5.c @@ -0,0 +1,53 @@ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +float +f1 (float a, float b, float c) +{ + return __builtin_fmaf (a, b, -c); +} + +double +f2 (double a, double b, double c) +{ + return __builtin_fma (a, b, -c); +} + +void +f3 (float a, float b, float c, float d, float e, float *res) +{ + res[0] = __builtin_fmaf (a, b, -e); + res[1] = __builtin_fmaf (c, d, -e); +} + +void +f4 (double a, double b, double c, double d, double e, double *res) +{ + res[0] = __builtin_fma (a, b, -e); + res[1] = __builtin_fma (c, d, -e); +} + +float +f5 (float a, float b, float c) +{ + return -__builtin_fmaf (-a, b, c); +} + +double +f6 (double a, double b, double c) +{ + return -__builtin_fma (-a, b, c); +} + +float +f7 (float a, float b, float c) +{ + return -__builtin_fmaf (a, -b, c); +} + +double +f8 (double a, double b, double c) +{ + return -__builtin_fma (a, -b, c); +} + +/* { dg-final { scan-tree-dump-times { = FMS \(} 10 "optimized" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c new file mode 100644 index 00000000000..06845725783 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-6.c @@ -0,0 +1,67 @@ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +float +f1 (float a, float b, float c) +{ + return __builtin_fmaf (-a, b, c); +} + +double +f2 (double a, double b, double c) +{ + return __builtin_fma (-a, b, c); +} + +float +f3 (float a, float b, float c) +{ + return __builtin_fmaf (a, -b, c); +} + +double +f4 (double a, double b, double c) +{ + return __builtin_fma (a, -b, c); +} + +void +f5 (float a, float b, float c, float d, float e, float *res) +{ + res[0] = __builtin_fmaf (-a, b, c); + res[1] = __builtin_fmaf (-a, d, e); +} + +void +f6 (double a, double b, double c, double d, double e, double *res) +{ + res[0] = __builtin_fma (-a, b, c); + res[1] = __builtin_fma (-a, d, e); +} + +void +f7 (float a, float b, float c, float d, float e, float *res) +{ + res[0] = __builtin_fmaf (a, -b, c); + res[1] = __builtin_fmaf (d, -b, e); +} + +void +f8 (double a, double b, double c, double d, double e, double *res) +{ + res[0] = __builtin_fma (a, -b, c); + res[1] = __builtin_fma (d, -b, e); +} + +float +f9 (float a, float b, float c) +{ + return -__builtin_fmaf (a, b, -c); +} + +double +f10 (double a, double b, double c) +{ + return -__builtin_fma (a, b, -c); +} + +/* { dg-final { scan-tree-dump-times { = FNMA \(} 14 "optimized" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c new file mode 100644 index 00000000000..97b1bbd9f63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-7.c @@ -0,0 +1,67 @@ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +float +f1 (float a, float b, float c) +{ + return __builtin_fmaf (-a, b, -c); +} + +double +f2 (double a, double b, double c) +{ + return __builtin_fma (-a, b, -c); +} + +float +f3 (float a, float b, float c) +{ + return __builtin_fmaf (a, -b, -c); +} + +double +f4 (double a, double b, double c) +{ + return __builtin_fma (a, -b, -c); +} + +void +f5 (float a, float b, float c, float d, float *res) +{ + res[0] = __builtin_fmaf (-a, b, -c); + res[1] = __builtin_fmaf (-a, d, -c); +} + +void +f6 (double a, double b, double c, double d, double *res) +{ + res[0] = __builtin_fma (-a, b, -c); + res[1] = __builtin_fma (-a, d, -c); +} + +void +f7 (float a, float b, float c, float d, float *res) +{ + res[0] = __builtin_fmaf (a, -b, -c); + res[1] = __builtin_fmaf (d, -b, -c); +} + +void +f8 (double a, double b, double c, double d, double *res) +{ + res[0] = __builtin_fma (a, -b, -c); + res[1] = __builtin_fma (d, -b, -c); +} + +float +f9 (float a, float b, float c) +{ + return -__builtin_fmaf (a, b, c); +} + +double +f10 (double a, double b, double c) +{ + return -__builtin_fma (a, b, c); +} + +/* { dg-final { scan-tree-dump-times { = FNMS \(} 14 "optimized" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/gimplefe-26.c b/gcc/testsuite/gcc.dg/gimplefe-26.c deleted file mode 100644 index bc2f3b1d4ca..00000000000 --- a/gcc/testsuite/gcc.dg/gimplefe-26.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do compile { target c99_runtime } } */ -/* { dg-options "-O -fgimple -fdump-tree-ssa-gimple" } */ - -#define foo(type, num) \ -type __GIMPLE () foo_##num (type a, type b, type c) \ -{ \ - type t0; \ - t0_1 = __FMA (a, b, c); \ - return t0_1; \ -} - -foo(float, 1) -foo(double, 2) -foo(long double, 3) - -/* { dg-final { scan-tree-dump-times "__FMA" 3 "ssa" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-3.c b/gcc/testsuite/gcc.dg/graphite/interchange-3.c index 4aec824183a..cb93f5d0920 100644 --- a/gcc/testsuite/gcc.dg/graphite/interchange-3.c +++ b/gcc/testsuite/gcc.dg/graphite/interchange-3.c @@ -47,4 +47,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +/* { dg-final { scan-tree-dump "tiled" "graphite" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-7.c b/gcc/testsuite/gcc.dg/graphite/interchange-7.c index 81a6d832327..81a0a4daf55 100644 --- a/gcc/testsuite/gcc.dg/graphite/interchange-7.c +++ b/gcc/testsuite/gcc.dg/graphite/interchange-7.c @@ -46,4 +46,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +/* { dg-final { scan-tree-dump "tiled" "graphite" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-9.c b/gcc/testsuite/gcc.dg/graphite/interchange-9.c index 88a357893e9..75d269e4527 100644 --- a/gcc/testsuite/gcc.dg/graphite/interchange-9.c +++ b/gcc/testsuite/gcc.dg/graphite/interchange-9.c @@ -44,4 +44,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +/* { dg-final { scan-tree-dump "tiled" "graphite" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c index cc108c2bbc3..fb36afe003e 100644 --- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c +++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c @@ -45,4 +45,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +/* { dg-final { scan-tree-dump "tiled" "graphite" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index c7a3b4dbf2f..8859138113a 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -62,6 +62,13 @@ set plugin_test_list [list \ { start_unit_plugin.c start_unit-test-1.c } \ { finish_unit_plugin.c finish_unit-test-1.c } \ { wide-int_plugin.c wide-int-test-1.c } \ + { poly-int-01_plugin.c poly-int-test-1.c } \ + { poly-int-02_plugin.c poly-int-test-1.c } \ + { poly-int-03_plugin.c poly-int-test-1.c } \ + { poly-int-04_plugin.c poly-int-test-1.c } \ + { poly-int-05_plugin.c poly-int-test-1.c } \ + { poly-int-06_plugin.c poly-int-test-1.c } \ + { poly-int-07_plugin.c poly-int-test-1.c } \ { diagnostic_plugin_test_show_locus.c \ diagnostic-test-show-locus-bw.c \ diagnostic-test-show-locus-color.c \ diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-01_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-01_plugin.c new file mode 100644 index 00000000000..099c9d94c42 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-01_plugin.c @@ -0,0 +1,21 @@ +/* Not worth spending time optimizing this. */ +/* { dg-options "-O0" } */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "poly-int-tests.h" + +int plugin_is_GPL_compatible; + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + test_helper (); + test_poly_coeff_traits (); + test_nonpoly (); + test_endpoint_representable (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-02_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-02_plugin.c new file mode 100644 index 00000000000..bf103acba8b --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-02_plugin.c @@ -0,0 +1,18 @@ +/* Not worth spending time optimizing this. */ +/* { dg-options "-O0" } */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "poly-int-tests.h" + +int plugin_is_GPL_compatible; + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + test_num_coeffs_core<1> (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-03_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-03_plugin.c new file mode 100644 index 00000000000..0c08ead8b75 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-03_plugin.c @@ -0,0 +1,18 @@ +/* Not worth spending time optimizing this. */ +/* { dg-options "-O0" } */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "poly-int-tests.h" + +int plugin_is_GPL_compatible; + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + test_num_coeffs_extra<1> (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-04_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-04_plugin.c new file mode 100644 index 00000000000..8b0a5f91fb4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-04_plugin.c @@ -0,0 +1,18 @@ +/* Not worth spending time optimizing this. */ +/* { dg-options "-O0" } */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "poly-int-tests.h" + +int plugin_is_GPL_compatible; + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + test_num_coeffs_core<2> (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-05_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-05_plugin.c new file mode 100644 index 00000000000..62493118fe4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-05_plugin.c @@ -0,0 +1,18 @@ +/* Not worth spending time optimizing this. */ +/* { dg-options "-O0" } */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "poly-int-tests.h" + +int plugin_is_GPL_compatible; + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + test_num_coeffs_extra<2> (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-06_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-06_plugin.c new file mode 100644 index 00000000000..ee4308c26bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-06_plugin.c @@ -0,0 +1,26 @@ +/* Not worth spending time optimizing this. */ +/* { dg-options "-O0" } */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "poly-int-tests.h" + +int plugin_is_GPL_compatible; + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + test_signed_2<int> (); + test_signed_2<HOST_WIDE_INT> (); + test_signed_2<offset_int> (); + test_signed_2<widest_int> (); + + test_ordered_2<unsigned short> (); + test_ordered_2<unsigned int> (); + test_ordered_2<unsigned HOST_WIDE_INT> (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-07_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-07_plugin.c new file mode 100644 index 00000000000..e3203d9f3e1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-07_plugin.c @@ -0,0 +1,18 @@ +/* Not worth spending time optimizing this. */ +/* { dg-options "-O" } */ + +#include "config.h" +#include "gcc-plugin.h" +#include "system.h" +#include "coretypes.h" +#include "poly-int-tests.h" + +int plugin_is_GPL_compatible; + +int +plugin_init (struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) +{ + test_num_coeffs_core<3> (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-test-1.c b/gcc/testsuite/gcc.dg/plugin/poly-int-test-1.c new file mode 100644 index 00000000000..fe284d59433 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-test-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +int +main (int argc, char **argv) +{ + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-tests.h b/gcc/testsuite/gcc.dg/plugin/poly-int-tests.h new file mode 100644 index 00000000000..b7a93856003 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/poly-int-tests.h @@ -0,0 +1,4778 @@ +/* This file contains templated tests that are then instantiated in + multiple plugin tests, in order to reduce the size of each test. */ + +#define ASSERT_FALSE(X) gcc_assert (!(X)) +#define ASSERT_TRUE(X) gcc_assert (X) +#define ASSERT_EQ(X, Y) gcc_assert ((X) == (Y)) +#define ASSERT_MUST_EQ(X, Y) gcc_assert (must_eq (X, Y)) +#define ASSERT_MAY_NE(X, Y) gcc_assert (may_ne (X, Y)) + +/* make (X) converts an X of type int into T, using an arbitrary + precision for wide_int. It passes other types of X through as-is. */ +template<typename T> +struct coeff_helper +{ + static inline const T &make (const T &x) { return x; } +}; + +template<> +struct coeff_helper<wide_int> +{ + template<typename T> + static inline const T &make (const T &x) { return x; } + static inline wide_int make (int i) { return wi::shwi (i, 77); } +}; + +/* make (C1, C2, C3) constructs a T using coefficients from C1, C2 and C3, + picking only enough to fill the T. */ +template<typename T> +struct poly_helper +{ + typedef typename poly_int_traits<T>::coeff_type C; + template<typename T1, typename T2, typename T3> + static T make (const T1 &a, const T2 &b, const T3 &c); +}; + +template<typename T> +template<typename T1, typename T2, typename T3> +inline T +poly_helper<T>::make (const T1 &a, const T2 &b, const T3 &c) +{ + T res; + res = coeff_helper<C>::make (a); + if (poly_int_traits<T>::num_coeffs >= 2) + res.coeffs[1] = coeff_helper<C>::make (b); + if (poly_int_traits<T>::num_coeffs >= 3) + res.coeffs[2] = coeff_helper<C>::make (c); + return res; +} + +/* Test the helper, */ + +static void +test_helper () +{ + typedef poly_helper< poly_int<1, int> > p1; + typedef poly_helper< poly_int<2, int> > p2; + typedef poly_helper< poly_int<3, int> > p3; + + ASSERT_MAY_NE (p1::make (1, 2, 3), 0); + ASSERT_MUST_EQ (p1::make (1, 2, 3) - p1::make (1, 0, 0), 0); + ASSERT_MUST_EQ (p1::make (1, 2, 3) - p1::make (1, 2, 0), 0); + ASSERT_MUST_EQ (p1::make (1, 2, 3) - p1::make (1, 2, 3), 0); + + ASSERT_MAY_NE (p2::make (1, 2, 3), 0); + ASSERT_MAY_NE (p2::make (1, 2, 3) - p2::make (1, 0, 0), 0); + ASSERT_MUST_EQ (p2::make (1, 2, 3) - p2::make (1, 2, 0), 0); + ASSERT_MUST_EQ (p2::make (1, 2, 3) - p2::make (1, 2, 3), 0); + + ASSERT_MAY_NE (p3::make (1, 2, 3), 0); + ASSERT_MAY_NE (p3::make (1, 2, 3) - p3::make (1, 0, 0), 0); + ASSERT_MAY_NE (p3::make (1, 2, 3) - p3::make (1, 2, 0), 0); + ASSERT_MUST_EQ (p3::make (1, 2, 3) - p3::make (1, 2, 3), 0); +} + +/* Test poly_coeff_traits. */ + +static void +test_poly_coeff_traits () +{ + ASSERT_EQ (poly_coeff_traits<unsigned short>::signedness, 0); + ASSERT_EQ (poly_coeff_traits<unsigned short>::max_value, 0xffff); + + ASSERT_EQ (poly_coeff_traits<int>::signedness, 1); + ASSERT_EQ (poly_coeff_traits<int>::max_value, INT_MAX); + + ASSERT_EQ (poly_coeff_traits<unsigned int>::signedness, 0); + ASSERT_EQ (poly_coeff_traits<unsigned int>::max_value, UINT_MAX); + + ASSERT_EQ (poly_coeff_traits<HOST_WIDE_INT>::signedness, 1); + ASSERT_EQ (poly_coeff_traits<HOST_WIDE_INT>::max_value, HOST_WIDE_INT_MAX); + + ASSERT_EQ (poly_coeff_traits<unsigned HOST_WIDE_INT>::signedness, 0); + ASSERT_EQ (poly_coeff_traits<unsigned HOST_WIDE_INT>::max_value, + HOST_WIDE_INT_M1U); + + ASSERT_EQ (poly_coeff_traits<wide_int>::signedness, -1); + ASSERT_EQ (poly_coeff_traits<offset_int>::signedness, 1); + ASSERT_EQ (poly_coeff_traits<widest_int>::signedness, 1); +} + +/* Test poly_int_traits. */ + +template<unsigned int N, typename C, typename T> +static void +test_poly_int_traits () +{ + /* Check the properties of poly_int_traits<C>. */ + ASSERT_FALSE (poly_int_traits<C>::is_poly); + ASSERT_EQ (poly_int_traits<C>::num_coeffs, 1); + ASSERT_EQ ((C *) 0 - (typename poly_int_traits<C>::coeff_type *) 0, 0); + + /* Check the properties of poly_int_traits<T>. */ + ASSERT_TRUE (poly_int_traits<T>::is_poly); + ASSERT_EQ (poly_int_traits<T>::num_coeffs, N); + ASSERT_EQ ((C *) 0 - (typename poly_int_traits<T>::coeff_type *) 0, 0); +} + +/* Test the handling of constants. */ + +template<unsigned int N, typename C, typename T> +static void +test_constants () +{ + typedef coeff_helper<C> ch; + T zero, one, two; + poly_int<N, unsigned char> two_uc = 2; + + /* Test operator = on C. */ + zero = ch::make (0); + one = ch::make (1); + two = ch::make (2); + + /* Basic tests of must and may_ne. */ + ASSERT_MUST_EQ (zero, ch::make (0)); + ASSERT_MAY_NE (one, ch::make (0)); + ASSERT_MAY_NE (two, ch::make (0)); + ASSERT_MUST_EQ (ch::make (0), zero); + ASSERT_MAY_NE (ch::make (0), one); + ASSERT_MAY_NE (ch::make (0), two); + ASSERT_MUST_EQ (zero, zero); + ASSERT_MAY_NE (one, zero); + ASSERT_MAY_NE (two, zero); + + ASSERT_MAY_NE (zero, ch::make (1)); + ASSERT_MUST_EQ (one, ch::make (1)); + ASSERT_MAY_NE (two, ch::make (1)); + ASSERT_MAY_NE (ch::make (1), zero); + ASSERT_MUST_EQ (ch::make (1), one); + ASSERT_MAY_NE (ch::make (1), two); + ASSERT_MAY_NE (zero, one); + ASSERT_MUST_EQ (one, one); + ASSERT_MAY_NE (two, one); + + ASSERT_MAY_NE (zero, ch::make (2)); + ASSERT_MAY_NE (one, ch::make (2)); + ASSERT_MUST_EQ (two, ch::make (2)); + ASSERT_MAY_NE (ch::make (2), zero); + ASSERT_MAY_NE (ch::make (2), one); + ASSERT_MUST_EQ (ch::make (2), two); + ASSERT_MAY_NE (zero, two); + ASSERT_MAY_NE (one, two); + ASSERT_MUST_EQ (two, two); + + ASSERT_MAY_NE (zero, two_uc); + ASSERT_MAY_NE (one, two_uc); + ASSERT_MUST_EQ (two, two_uc); + ASSERT_MAY_NE (two_uc, zero); + ASSERT_MAY_NE (two_uc, one); + ASSERT_MUST_EQ (two_uc, two); +} + +/* Test operator +=. */ + +template<unsigned int N, typename C, typename T> +static void +test_plus_equals () +{ + typedef poly_helper<T> ph; + + /* Test += on int. */ + T add_cm = ph::make (17, 11, 9); + add_cm += 14; + ASSERT_MUST_EQ (add_cm, ph::make (31, 11, 9)); + + /* Test += on T. */ + T add_pm = ph::make (100, 44, 11); + add_pm += ph::make (1, 2, 3); + ASSERT_MUST_EQ (add_pm, ph::make (101, 46, 14)); +} + +/* Test operator -=. */ + +template<unsigned int N, typename C, typename T> +static void +test_minus_equals () +{ + typedef poly_helper<T> ph; + + /* Test -= on int. */ + T sub_cm = ph::make (82, 13, 61); + sub_cm -= 76; + ASSERT_MUST_EQ (sub_cm, ph::make (6, 13, 61)); + + /* Test -= on T. */ + T sub_pm = ph::make (82, 13, 61); + sub_pm -= ph::make (19, 12, 14); + ASSERT_MUST_EQ (sub_pm, ph::make (63, 1, 47)); +} + +/* Test operator *=. */ + +template<unsigned int N, typename C, typename T> +static void +test_times_equals () +{ + typedef poly_helper<T> ph; + + /* Test *= on int. */ + T mul_cm = ph::make (11, 22, 33); + mul_cm *= 3; + ASSERT_MUST_EQ (mul_cm, ph::make (33, 66, 99)); +} + +/* Test operator <<=. */ + +template<unsigned int N, typename C, typename T> +static void +test_shl_equals () +{ + typedef poly_helper<T> ph; + + /* Test <<= on int. */ + T shl_cm = ph::make (10, 11, 13); + shl_cm <<= 2; + ASSERT_MUST_EQ (shl_cm, ph::make (40, 44, 52)); +} + +/* Test is_constant. */ + +template<unsigned int N, typename C, typename T> +static void +test_is_constant () +{ + typedef poly_helper<T> ph; + + /* Test is_constant without arguments. */ + ASSERT_TRUE (ph::make (1, 0, 0).is_constant ()); + ASSERT_EQ (ph::make (2, 0, 1).is_constant (), N <= 2); + ASSERT_EQ (ph::make (3, 1, 0).is_constant (), N == 1); + + /* Test is_constant with an argument. */ + C const_value; + ASSERT_TRUE (ph::make (1, 0, 0).is_constant (&const_value)); + ASSERT_EQ (const_value, 1); + ASSERT_EQ (ph::make (2, 0, 1).is_constant (&const_value), N <= 2); + ASSERT_EQ (const_value, N <= 2 ? 2 : 1); + ASSERT_EQ (ph::make (3, 1, 0).is_constant (&const_value), N == 1); + ASSERT_EQ (const_value, 4 - N); +} + +/* Test to_constant. */ + +template<unsigned int N, typename C, typename T> +static void +test_to_constant () +{ + typedef poly_helper<T> ph; + + ASSERT_TRUE (ph::make (1, 0, 0).to_constant () == 1); + ASSERT_TRUE (ph::make (111, 0, 0).to_constant () == 111); +} + +/* Test addition, both via operators and wi::. */ + +template<unsigned int N, typename C, typename T> +static void +test_addition () +{ + typedef poly_helper<T> ph; + + /* Test +. */ + ASSERT_MUST_EQ (ph::make (55, 43, 30) + 1, + ph::make (56, 43, 30)); + ASSERT_MUST_EQ (100 + ph::make (5, 15, 26), + ph::make (105, 15, 26)); + ASSERT_MUST_EQ (ph::make (7, 100, 41) + ph::make (96, 9, 21), + ph::make (103, 109, 62)); + + /* Test wi::add. */ + ASSERT_MUST_EQ (wi::add (ph::make (55, 43, 30), 1), + ph::make (56, 43, 30)); + ASSERT_MUST_EQ (wi::add (100, ph::make (5, 15, 26)), + ph::make (105, 15, 26)); + ASSERT_MUST_EQ (wi::add (ph::make (7, 100, 41), ph::make (96, 9, 21)), + ph::make (103, 109, 62)); +} + +/* Test subtraction, both via operators and wi::. */ + +template<unsigned int N, typename C, typename RC, typename T> +static void +test_subtraction () +{ + typedef poly_helper<T> ph; + typedef poly_helper< poly_int<N, RC> > rph; + typedef poly_helper< poly_int<N, int> > iph; + + /* Test -. Cs with a rank lower than HOST_WIDE_INT promote to + HOST_WIDE_INT; use rph to capture this. */ + ASSERT_MUST_EQ (ph::make (64, 49, 36) - 42, + rph::make (22, 49, 36)); + ASSERT_MUST_EQ (11 - ph::make (9, 3, 4), + rph::make (2, -3, -4)); + ASSERT_MUST_EQ (ph::make (100, 200, 300) - ph::make (99, 197, 305), + rph::make (1, 3, -5)); + + /* Test wi::sub. Primitive Cs promote to widest_int; use iph to capture + this. */ + ASSERT_MUST_EQ (wi::sub (ph::make (64, 49, 36), 42), + iph::make (22, 49, 36)); + ASSERT_MUST_EQ (wi::sub (11, ph::make (9, 3, 4)), + iph::make (2, -3, -4)); + ASSERT_MUST_EQ (wi::sub (ph::make (100, 200, 300), ph::make (99, 197, 305)), + iph::make (1, 3, -5)); +} + +/* Test negation, both via operators and wi::. */ + +template<unsigned int N, typename C, typename RC, typename T> +static void +test_negation () +{ + typedef poly_helper<T> ph; + typedef poly_helper< poly_int<N, RC> > rph; + typedef poly_helper< poly_int<N, int> > iph; + + /* Test unary -. */ + ASSERT_MUST_EQ (-ph::make (10, 20, 30), + rph::make (-10, -20, -30)); + + /* Test wi::neg. */ + ASSERT_MUST_EQ (wi::neg (ph::make (10, 20, 30)), + iph::make (-10, -20, -30)); +} + +/* Test multiplication, both via operators and wi::. */ + +template<unsigned int N, typename C, typename T> +static void +test_multiplication () +{ + typedef poly_helper<T> ph; + + /* Test *. */ + ASSERT_MUST_EQ (ph::make (5, 20, 25) * 10, + ph::make (50, 200, 250)); + ASSERT_MUST_EQ (111 * ph::make (7, 6, 5), + ph::make (777, 666, 555)); + + /* Test wi::mul. */ + ASSERT_MUST_EQ (wi::mul (ph::make (5, 20, 25), 10), + ph::make (50, 200, 250)); + ASSERT_MUST_EQ (wi::mul (111, ph::make (7, 6, 5)), + ph::make (777, 666, 555)); +} + +/* Test shift left, both via operators and wi::. */ + +template<unsigned int N, typename C, typename T> +static void +test_shift_left () +{ + typedef poly_helper<T> ph; + + /* Test <<. */ + ASSERT_MUST_EQ (ph::make (1, 20, 300) << 4, + ph::make (16, 320, 4800)); + + /* Test wi::lshift. */ + ASSERT_MUST_EQ (wi::lshift (ph::make (9, 15, 50), 3), + ph::make (72, 120, 400)); +} + +/* Test may_ne. */ + +template<unsigned int N, typename C, typename T> +static void +test_may_ne () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_ne (T, C). */ + ASSERT_EQ (may_ne (ph::make (1, 0, 2), ch::make (1)), N == 3); + ASSERT_EQ (may_ne (ph::make (-11, -2, 0), ch::make (-11)), N >= 2); + ASSERT_TRUE (may_ne (ph::make (199, 0, 0), ch::make (200))); + + /* Test may_ne (C, T). */ + ASSERT_EQ (may_ne (ch::make (-22), ph::make (-22, 0, -1)), N == 3); + ASSERT_EQ (may_ne (ch::make (5), ph::make (5, 4, 0)), N >= 2); + ASSERT_TRUE (may_ne (ch::make (-3), ph::make (-4, 0, 0))); + + /* Test may_ne (T, T). */ + ASSERT_EQ (may_ne (ph::make (1, 3, 5), + ph::make (1, 3, 6)), N == 3); + ASSERT_EQ (may_ne (ph::make (1, 3, 5), + ph::make (1, 4, 5)), N >= 2); + ASSERT_TRUE (may_ne (ph::make (1, 3, 5), + ph::make (0, 3, 5))); +} + +/* Test must_eq. */ + +template<unsigned int N, typename C, typename T> +static void +test_must_eq () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_eq (T, C). */ + ASSERT_EQ (must_eq (ph::make (1, 0, 2), ch::make (1)), N <= 2); + ASSERT_EQ (must_eq (ph::make (-11, -2, 0), ch::make (-11)), N == 1); + ASSERT_FALSE (must_eq (ph::make (199, 0, 0), ch::make (200))); + + /* Test must_eq (C, T). */ + ASSERT_EQ (must_eq (ch::make (-22), ph::make (-22, 0, -1)), N <= 2); + ASSERT_EQ (must_eq (ch::make (5), ph::make (5, 4, 0)), N == 1); + ASSERT_FALSE (must_eq (ch::make (-3), ph::make (-4, 0, 0))); + + /* Test must_eq (T, T). */ + ASSERT_EQ (must_eq (ph::make (1, 3, 5), + ph::make (1, 3, 6)), N <= 2); + ASSERT_EQ (must_eq (ph::make (1, 3, 5), + ph::make (1, 4, 5)), N == 1); + ASSERT_FALSE (must_eq (ph::make (1, 3, 5), + ph::make (0, 3, 5))); +} + +/* Test can_align_p. */ + +template<unsigned int N, typename C, typename T> +static void +test_can_align_p () +{ + typedef poly_helper<T> ph; + + ASSERT_TRUE (can_align_p (ph::make (41, 32, 16), 16)); + ASSERT_EQ (can_align_p (ph::make (15, 64, 8), 16), N <= 2); + ASSERT_EQ (can_align_p (ph::make (17, 8, 80), 16), N == 1); + ASSERT_TRUE (can_align_p (ph::make (-39, -64, -32), 32)); + ASSERT_EQ (can_align_p (ph::make (-32, -96, -31), 32), N <= 2); + ASSERT_EQ (can_align_p (ph::make (-31, -31, -128), 32), N == 1); + ASSERT_TRUE (can_align_p (ph::make (17, 0, 0), 16)); + ASSERT_TRUE (can_align_p (ph::make (16, 0, 0), 16)); + ASSERT_TRUE (can_align_p (ph::make (15, 0, 0), 16)); + ASSERT_TRUE (can_align_p (ph::make (-17, 0, 0), 16)); + ASSERT_TRUE (can_align_p (ph::make (-16, 0, 0), 16)); + ASSERT_TRUE (can_align_p (ph::make (-15, 0, 0), 16)); +} + +/* Test can_align_up. */ + +template<unsigned int N, typename C, typename T> +static void +test_can_align_up () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + T aligned; + ASSERT_TRUE (can_align_up (ph::make (41, 32, 16), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ph::make (48, 32, 16)); + ASSERT_EQ (can_align_up (ph::make (15, 64, 8), 16, &aligned), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (aligned, ph::make (16, 64, 0)); + ASSERT_EQ (can_align_up (ph::make (17, 8, 80), 16, &aligned), N == 1); + if (N == 1) + ASSERT_MUST_EQ (aligned, ch::make (32)); + ASSERT_TRUE (can_align_up (ph::make (-39, -64, -32), 32, &aligned)); + ASSERT_MUST_EQ (aligned, ph::make (-32, -64, -32)); + ASSERT_EQ (can_align_up (ph::make (-32, -96, -31), 32, &aligned), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (aligned, ph::make (-32, -96, 0)); + ASSERT_EQ (can_align_up (ph::make (-31, -31, -128), 32, &aligned), N == 1); + if (N == 1) + ASSERT_MUST_EQ (aligned, ch::make (0)); + ASSERT_TRUE (can_align_up (ph::make (17, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (32)); + ASSERT_TRUE (can_align_up (ph::make (16, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (16)); + ASSERT_TRUE (can_align_up (ph::make (15, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (16)); + ASSERT_TRUE (can_align_up (ph::make (-17, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (-16)); + ASSERT_TRUE (can_align_up (ph::make (-16, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (-16)); + ASSERT_TRUE (can_align_up (ph::make (-15, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (0)); +} + +/* Test can_align_down. */ + +template<unsigned int N, typename C, typename T> +static void +test_can_align_down () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + T aligned; + ASSERT_TRUE (can_align_down (ph::make (41, 32, 16), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ph::make (32, 32, 16)); + ASSERT_EQ (can_align_down (ph::make (15, 64, 8), 16, &aligned), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (aligned, ph::make (0, 64, 0)); + ASSERT_EQ (can_align_down (ph::make (17, 8, 80), 16, &aligned), N == 1); + if (N == 1) + ASSERT_MUST_EQ (aligned, ch::make (16)); + ASSERT_TRUE (can_align_down (ph::make (-39, -64, -32), 32, &aligned)); + ASSERT_MUST_EQ (aligned, ph::make (-64, -64, -32)); + ASSERT_EQ (can_align_down (ph::make (-32, -96, -31), 32, &aligned), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (aligned, ph::make (-32, -96, 0)); + ASSERT_EQ (can_align_down (ph::make (-31, -31, -128), 32, &aligned), N == 1); + if (N == 1) + ASSERT_MUST_EQ (aligned, ch::make (-32)); + ASSERT_TRUE (can_align_down (ph::make (17, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (16)); + ASSERT_TRUE (can_align_down (ph::make (16, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (16)); + ASSERT_TRUE (can_align_down (ph::make (15, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (0)); + ASSERT_TRUE (can_align_down (ph::make (-17, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (-32)); + ASSERT_TRUE (can_align_down (ph::make (-16, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (-16)); + ASSERT_TRUE (can_align_down (ph::make (-15, 0, 0), 16, &aligned)); + ASSERT_MUST_EQ (aligned, ch::make (-16)); +} + +/* Test known_equal_after_align_up. */ + +template<unsigned int N, typename C, typename T> +static void +test_known_equal_after_align_up () +{ + typedef poly_helper<T> ph; + + ASSERT_EQ (known_equal_after_align_up (ph::make (15, 15, 32), + ph::make (16, 15, 32), 16), N == 1); + ASSERT_EQ (known_equal_after_align_up (ph::make (16, 16, 15), + ph::make (15, 16, 15), 16), N <= 2); + ASSERT_EQ (known_equal_after_align_up (ph::make (15, 16, 32), + ph::make (7, 16, 48), 16), N <= 2); + ASSERT_EQ (known_equal_after_align_up (ph::make (7, 32, 16), + ph::make (15, 48, 16), 16), N == 1); + ASSERT_TRUE (known_equal_after_align_up (ph::make (16, 16, 32), + ph::make (15, 16, 32), 16)); + ASSERT_TRUE (known_equal_after_align_up (ph::make (32, 0, 0), + ph::make (31, 0, 0), 16)); + ASSERT_TRUE (known_equal_after_align_up (ph::make (32, 0, 0), + ph::make (32, 0, 0), 32)); + ASSERT_FALSE (known_equal_after_align_up (ph::make (32, 0, 0), + ph::make (33, 0, 0), 16)); + ASSERT_FALSE (known_equal_after_align_up (ph::make (-31, 0, 0), + ph::make (-32, 0, 0), 16)); + ASSERT_TRUE (known_equal_after_align_up (ph::make (-32, 0, 0), + ph::make (-32, 0, 0), 32)); + ASSERT_TRUE (known_equal_after_align_up (ph::make (-33, 0, 0), + ph::make (-32, 0, 0), 16)); +} + +/* Test known_equal_after_align_down. */ + +template<unsigned int N, typename C, typename T> +static void +test_known_equal_after_align_down () +{ + typedef poly_helper<T> ph; + + ASSERT_EQ (known_equal_after_align_down (ph::make (17, 15, 32), + ph::make (16, 15, 32), 16), N == 1); + ASSERT_EQ (known_equal_after_align_down (ph::make (16, 16, 15), + ph::make (17, 16, 15), 16), N <= 2); + ASSERT_EQ (known_equal_after_align_down (ph::make (15, 16, 32), + ph::make (7, 16, 48), 16), N <= 2); + ASSERT_EQ (known_equal_after_align_down (ph::make (15, 32, 16), + ph::make (7, 48, 16), 16), N == 1); + ASSERT_TRUE (known_equal_after_align_down (ph::make (16, 16, 32), + ph::make (17, 16, 32), 16)); + ASSERT_FALSE (known_equal_after_align_down (ph::make (32, 0, 0), + ph::make (31, 0, 0), 16)); + ASSERT_TRUE (known_equal_after_align_down (ph::make (32, 0, 0), + ph::make (32, 0, 0), 32)); + ASSERT_TRUE (known_equal_after_align_down (ph::make (32, 0, 0), + ph::make (33, 0, 0), 16)); + ASSERT_TRUE (known_equal_after_align_down (ph::make (-31, 0, 0), + ph::make (-32, 0, 0), 16)); + ASSERT_TRUE (known_equal_after_align_down (ph::make (-32, 0, 0), + ph::make (-32, 0, 0), 32)); + ASSERT_FALSE (known_equal_after_align_down (ph::make (-33, 0, 0), + ph::make (-32, 0, 0), 16)); +} + +/* Test force_align_up. */ + +template<unsigned int N, typename C, typename T> +static void +test_force_align_up () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test force_align_up. */ + ASSERT_MUST_EQ (force_align_up (ph::make (41, 32, 16), 16), + ph::make (48, 32, 16)); + ASSERT_MUST_EQ (force_align_up (ph::make (-39, -64, -32), 32), + ph::make (-32, -64, -32)); + ASSERT_MUST_EQ (force_align_up (ph::make (17, 0, 0), 16), + ch::make (32)); + ASSERT_MUST_EQ (force_align_up (ph::make (16, 0, 0), 16), + ch::make (16)); + ASSERT_MUST_EQ (force_align_up (ph::make (15, 0, 0), 16), + ch::make (16)); + ASSERT_MUST_EQ (force_align_up (ph::make (-17, 0, 0), 16), + ch::make (-16)); + ASSERT_MUST_EQ (force_align_up (ph::make (-16, 0, 0), 16), + ch::make (-16)); + ASSERT_MUST_EQ (force_align_up (ph::make (-15, 0, 0), 16), + ch::make (0)); +} + +/* Test force_align_down. */ + +template<unsigned int N, typename C, typename T> +static void +test_force_align_down () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_MUST_EQ (force_align_down (ph::make (41, 32, 16), 16), + ph::make (32, 32, 16)); + ASSERT_MUST_EQ (force_align_down (ph::make (-39, -64, -32), 32), + ph::make (-64, -64, -32)); + ASSERT_MUST_EQ (force_align_down (ph::make (17, 0, 0), 16), + ch::make (16)); + ASSERT_MUST_EQ (force_align_down (ph::make (16, 0, 0), 16), + ch::make (16)); + ASSERT_MUST_EQ (force_align_down (ph::make (15, 0, 0), 16), + ch::make (0)); + ASSERT_MUST_EQ (force_align_down (ph::make (-17, 0, 0), 16), + ch::make (-32)); + ASSERT_MUST_EQ (force_align_down (ph::make (-16, 0, 0), 16), + ch::make (-16)); + ASSERT_MUST_EQ (force_align_down (ph::make (-15, 0, 0), 16), + ch::make (-16)); +} + +/* Test aligned_lower_bound. */ + +template<unsigned int N, typename C, typename T> +static void +test_aligned_lower_bound () +{ + typedef poly_helper<T> ph; + + ASSERT_MUST_EQ (aligned_lower_bound (ph::make (17, 63, 33), 16), + ph::make (16, 48, 32)); + ASSERT_MUST_EQ (aligned_lower_bound (ph::make (11, -33, 64), 32), + ph::make (0, -64, 64)); + ASSERT_MUST_EQ (aligned_lower_bound (ph::make (-9, 16, -31), 8), + ph::make (-16, 16, -32)); + ASSERT_MUST_EQ (aligned_lower_bound (ph::make (-8, -12, 16), 4), + ph::make (-8, -12, 16)); +} + +/* Test aligned_upper_bound. */ + +template<unsigned int N, typename C, typename T> +static void +test_aligned_upper_bound () +{ + typedef poly_helper<T> ph; + + ASSERT_MUST_EQ (aligned_upper_bound (ph::make (17, 63, 33), 16), + ph::make (32, 64, 48)); + ASSERT_MUST_EQ (aligned_upper_bound (ph::make (11, -33, 64), 32), + ph::make (32, -32, 64)); + ASSERT_MUST_EQ (aligned_upper_bound (ph::make (-9, 16, -31), 8), + ph::make (-8, 16, -24)); + ASSERT_MUST_EQ (aligned_upper_bound (ph::make (-8, -12, 16), 4), + ph::make (-8, -12, 16)); +} + +/* Test known_misalignment. */ + +template<unsigned int N, typename C, typename T> +static void +test_known_misalignment () +{ + typedef poly_helper<T> ph; + + C misalignment; + ASSERT_TRUE (known_misalignment (ph::make (45, 8, 24), 8, &misalignment)); + ASSERT_EQ (misalignment, 5); + ASSERT_EQ (known_misalignment (ph::make (17, 16, 23), 8, &misalignment), + N <= 2); + ASSERT_EQ (misalignment, N <= 2 ? 1 : 5); + ASSERT_EQ (known_misalignment (ph::make (31, 15, 0), 16, &misalignment), + N == 1); + ASSERT_EQ (misalignment, N == 1 ? 15 : N == 2 ? 1 : 5); + ASSERT_TRUE (known_misalignment (ph::make (-45, -8, -24), 8, &misalignment)); + ASSERT_EQ (misalignment, 3); + ASSERT_TRUE (known_misalignment (ph::make (-11, 0, 0), 32, &misalignment)); + ASSERT_EQ (misalignment, 21); +} + +/* Test force_get_misalignment. */ + +template<unsigned int N, typename C, typename T> +static void +test_force_get_misalignment () +{ + typedef poly_helper<T> ph; + + ASSERT_EQ (force_get_misalignment (ph::make (45, 8, 24), 8), 5); + ASSERT_EQ (force_get_misalignment (ph::make (17, 16, 24), 8), 1); + ASSERT_EQ (force_get_misalignment (ph::make (31, -16, 0), 16), 15); + ASSERT_EQ (force_get_misalignment (ph::make (-45, -8, -24), 8), 3); + ASSERT_EQ (force_get_misalignment (ph::make (-11, 0, 0), 32), 21); +} + +/* Test known_alignment. */ + +template<unsigned int N, typename C, typename T> +static void +test_known_alignment () +{ + typedef poly_helper<T> ph; + + ASSERT_EQ (known_alignment (ph::make (16, 24, 30)), + N == 1 ? 16 : N == 2 ? 8 : 2); + ASSERT_EQ (known_alignment (ph::make (30, 0, 31)), + N <= 2 ? 2 : 1); + ASSERT_EQ (known_alignment (ph::make (20, 16, 24)), 4); + ASSERT_EQ (known_alignment (ph::make (24, 0, 0)), 8); + ASSERT_EQ (known_alignment (ph::make (0, 0, 0)), 0); + ASSERT_EQ (known_alignment (ph::make (0, 12, 0)), + N == 1 ? 0 : 4); + ASSERT_EQ (known_alignment (ph::make (0, 12, 6)), + N == 1 ? 0 : N == 2 ? 4 : 2); + ASSERT_EQ (known_alignment (ph::make (-40, -80, -12)), + N <= 2 ? 8 : 4); +} + +/* Test can_ior_p. */ + +template<unsigned int N, typename C, typename T> +static void +test_can_ior_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + T ior; + ASSERT_TRUE (can_ior_p (ph::make (0x87, 0x60, 0xa0), 0x13, &ior)); + ASSERT_MUST_EQ (ior, ph::make (0x97, 0x60, 0xa0)); + ASSERT_EQ (can_ior_p (ph::make (9, 96, 48), 28, &ior), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (ior, ph::make (29, 96, 0)); + ASSERT_EQ (can_ior_p (ph::make (0x81, 0x20, 0), 0x44, &ior), N == 1); + if (N == 1) + ASSERT_MUST_EQ (ior, ch::make (0xc5)); +} + +/* Test may_eq for poly_int<2, C>. */ + +template<typename C> +static void +test_may_eq_2 () +{ + typedef poly_int<2, C> T; + + /* Test may_eq (T, C). */ + ASSERT_TRUE (may_eq (T (1, 4), 41)); + ASSERT_FALSE (may_eq (T (1, 4), 42)); + ASSERT_FALSE (may_eq (T (1, 4), 40)); + ASSERT_TRUE (may_eq (T (1, 4), 1)); + ASSERT_FALSE (may_eq (T (1, 4), 0)); + ASSERT_FALSE (may_eq (T (1, 4), 2)); + + /* Test may_eq (C, T). */ + ASSERT_TRUE (may_eq (20, T (5, 3))); + ASSERT_FALSE (may_eq (21, T (5, 3))); + ASSERT_FALSE (may_eq (19, T (5, 3))); + ASSERT_TRUE (may_eq (5, T (5, 3))); + ASSERT_FALSE (may_eq (2, T (5, 3))); + ASSERT_FALSE (may_eq (6, T (5, 3))); + + /* Test may_eq (T, T). */ + ASSERT_TRUE (may_eq (T (2, 5), T (22, 3))); + ASSERT_FALSE (may_eq (T (3, 5), T (22, 3))); + ASSERT_FALSE (may_eq (T (2, 5), T (23, 3))); + ASSERT_FALSE (may_eq (T (2, 5), T (3, 5))); + ASSERT_TRUE (may_eq (T (10, 3), T (19, 0))); + ASSERT_FALSE (may_eq (T (10, 3), T (20, 0))); + ASSERT_TRUE (may_eq (T (10, 0), T (4, 2))); + ASSERT_FALSE (may_eq (T (11, 0), T (4, 2))); +} + +/* Test must_ne for poly_int<2, C>. */ + +template<typename C> +static void +test_must_ne_2 () +{ + typedef poly_int<2, C> T; + + /* Test must_ne (T, C). */ + ASSERT_FALSE (must_ne (T (1, 4), 41)); + ASSERT_TRUE (must_ne (T (1, 4), 42)); + ASSERT_TRUE (must_ne (T (1, 4), 40)); + ASSERT_FALSE (must_ne (T (1, 4), 1)); + ASSERT_TRUE (must_ne (T (1, 4), 0)); + ASSERT_TRUE (must_ne (T (1, 4), 2)); + + /* Test must_ne (C, T). */ + ASSERT_FALSE (must_ne (20, T (5, 3))); + ASSERT_TRUE (must_ne (21, T (5, 3))); + ASSERT_TRUE (must_ne (19, T (5, 3))); + ASSERT_FALSE (must_ne (5, T (5, 3))); + ASSERT_TRUE (must_ne (2, T (5, 3))); + ASSERT_TRUE (must_ne (6, T (5, 3))); + + /* Test must_ne (T, T). */ + ASSERT_FALSE (must_ne (T (2, 5), T (22, 3))); + ASSERT_TRUE (must_ne (T (3, 5), T (22, 3))); + ASSERT_TRUE (must_ne (T (2, 5), T (23, 3))); + ASSERT_TRUE (must_ne (T (2, 5), T (3, 5))); + ASSERT_FALSE (must_ne (T (10, 3), T (19, 0))); + ASSERT_TRUE (must_ne (T (10, 3), T (20, 0))); + ASSERT_FALSE (must_ne (T (10, 0), T (4, 2))); + ASSERT_TRUE (must_ne (T (11, 0), T (4, 2))); +} + +/* Test may_le for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_may_le () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_le (T, C). */ + ASSERT_FALSE (may_le (ph::make (7, 5, 4), ch::make (6))); + ASSERT_FALSE (may_le (ph::make (7, 0, 0), ch::make (6))); + ASSERT_TRUE (may_le (ph::make (60, 1, 2), ch::make (60))); + ASSERT_TRUE (may_le (ph::make (60, 0, 0), ch::make (60))); + ASSERT_TRUE (may_le (ph::make (30, 9, 4), ch::make (31))); + ASSERT_TRUE (may_le (ph::make (30, 0, 0), ch::make (31))); + + /* Test may_le (C, T). */ + ASSERT_TRUE (may_le (ch::make (6), ph::make (7, 5, 4))); + ASSERT_TRUE (may_le (ch::make (6), ph::make (7, 0, 0))); + ASSERT_TRUE (may_le (ch::make (60), ph::make (60, 1, 2))); + ASSERT_TRUE (may_le (ch::make (60), ph::make (60, 0, 0))); + ASSERT_EQ (may_le (ch::make (31), ph::make (30, 9, 4)), N >= 2); + ASSERT_EQ (may_le (ch::make (31), ph::make (30, 0, 4)), N == 3); + ASSERT_FALSE (may_le (ch::make (31), ph::make (30, 0, 0))); + + /* Test may_le (T, T). */ + ASSERT_EQ (may_le (ph::make (3, 14, 99), ph::make (2, 15, 100)), N >= 2); + ASSERT_EQ (may_le (ph::make (3, 14, 99), ph::make (2, 13, 100)), N == 3); + ASSERT_EQ (may_le (ph::make (3, 14, 99), ph::make (2, 15, 98)), N >= 2); + ASSERT_FALSE (may_le (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (may_le (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (may_le (ph::make (2, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (may_le (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (may_le (ph::make (2, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (may_le (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (may_le (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (may_le (ph::make (1, 14, 99), ph::make (2, 13, 98))); +} + +/* Test may_lt for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_may_lt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_lt (T, C). */ + ASSERT_FALSE (may_lt (ph::make (7, 5, 4), ch::make (6))); + ASSERT_FALSE (may_lt (ph::make (7, 0, 0), ch::make (6))); + ASSERT_FALSE (may_lt (ph::make (60, 1, 2), ch::make (60))); + ASSERT_FALSE (may_lt (ph::make (60, 0, 0), ch::make (60))); + ASSERT_TRUE (may_lt (ph::make (30, 9, 4), ch::make (31))); + ASSERT_TRUE (may_lt (ph::make (30, 0, 0), ch::make (31))); + + /* Test may_lt (C, T). */ + ASSERT_TRUE (may_lt (ch::make (6), ph::make (7, 5, 4))); + ASSERT_TRUE (may_lt (ch::make (6), ph::make (7, 0, 0))); + ASSERT_EQ (may_lt (ch::make (60), ph::make (60, 1, 2)), N >= 2); + ASSERT_EQ (may_lt (ch::make (60), ph::make (60, 0, 2)), N == 3); + ASSERT_FALSE (may_lt (ch::make (60), ph::make (60, 0, 0))); + ASSERT_EQ (may_lt (ch::make (31), ph::make (30, 9, 4)), N >= 2); + ASSERT_EQ (may_lt (ch::make (31), ph::make (30, 0, 4)), N == 3); + ASSERT_FALSE (may_lt (ch::make (31), ph::make (30, 0, 0))); + + /* Test may_lt (T, T). */ + ASSERT_EQ (may_lt (ph::make (3, 14, 99), ph::make (2, 15, 100)), N >= 2); + ASSERT_EQ (may_lt (ph::make (3, 14, 99), ph::make (2, 13, 100)), N == 3); + ASSERT_EQ (may_lt (ph::make (3, 14, 99), ph::make (2, 15, 98)), N >= 2); + ASSERT_FALSE (may_lt (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (may_lt (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_EQ (may_lt (ph::make (2, 14, 99), ph::make (2, 15, 100)), N >= 2); + ASSERT_EQ (may_lt (ph::make (2, 14, 99), ph::make (2, 13, 100)), N == 3); + ASSERT_EQ (may_lt (ph::make (2, 14, 99), ph::make (2, 15, 98)), N >= 2); + ASSERT_FALSE (may_lt (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (may_lt (ph::make (2, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (may_lt (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (may_lt (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (may_lt (ph::make (1, 14, 99), ph::make (2, 13, 98))); +} + +/* Test may_ge for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_may_ge () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_ge (T, C). */ + ASSERT_TRUE (may_ge (ph::make (7, 5, 4), ch::make (6))); + ASSERT_TRUE (may_ge (ph::make (7, 0, 0), ch::make (6))); + ASSERT_TRUE (may_ge (ph::make (60, 1, 2), ch::make (60))); + ASSERT_TRUE (may_ge (ph::make (60, 0, 0), ch::make (60))); + ASSERT_EQ (may_ge (ph::make (30, 9, 4), ch::make (31)), N >= 2); + ASSERT_EQ (may_ge (ph::make (30, 0, 4), ch::make (31)), N == 3); + ASSERT_FALSE (may_ge (ph::make (30, 0, 0), ch::make (31))); + + /* Test may_ge (C, T). */ + ASSERT_FALSE (may_ge (ch::make (6), ph::make (7, 5, 4))); + ASSERT_FALSE (may_ge (ch::make (6), ph::make (7, 0, 0))); + ASSERT_TRUE (may_ge (ch::make (60), ph::make (60, 1, 2))); + ASSERT_TRUE (may_ge (ch::make (60), ph::make (60, 0, 0))); + ASSERT_TRUE (may_ge (ch::make (31), ph::make (30, 9, 4))); + ASSERT_TRUE (may_ge (ch::make (31), ph::make (30, 0, 0))); + + /* Test may_ge (T, T). */ + ASSERT_TRUE (may_ge (ph::make (3, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (may_ge (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (may_ge (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (may_ge (ph::make (2, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (may_ge (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (may_ge (ph::make (2, 14, 99), ph::make (2, 13, 98))); + ASSERT_FALSE (may_ge (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (may_ge (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (may_ge (ph::make (1, 14, 99), ph::make (2, 15, 98)), N == 3); + ASSERT_EQ (may_ge (ph::make (1, 14, 99), ph::make (2, 13, 100)), N >= 2); + ASSERT_EQ (may_ge (ph::make (1, 14, 99), ph::make (2, 13, 98)), N >= 2); +} + +/* Test may_gt for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_may_gt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_gt (T, C). */ + ASSERT_TRUE (may_gt (ph::make (7, 5, 4), ch::make (6))); + ASSERT_TRUE (may_gt (ph::make (7, 0, 0), ch::make (6))); + ASSERT_EQ (may_gt (ph::make (60, 1, 2), ch::make (60)), N >= 2); + ASSERT_EQ (may_gt (ph::make (60, 0, 2), ch::make (60)), N == 3); + ASSERT_FALSE (may_gt (ph::make (60, 0, 0), ch::make (60))); + ASSERT_EQ (may_gt (ph::make (30, 9, 4), ch::make (31)), N >= 2); + ASSERT_EQ (may_gt (ph::make (30, 0, 4), ch::make (31)), N == 3); + ASSERT_FALSE (may_gt (ph::make (30, 0, 0), ch::make (31))); + + /* Test may_gt (C, T). */ + ASSERT_FALSE (may_gt (ch::make (6), ph::make (7, 5, 4))); + ASSERT_FALSE (may_gt (ch::make (6), ph::make (7, 0, 0))); + ASSERT_FALSE (may_gt (ch::make (60), ph::make (60, 1, 2))); + ASSERT_FALSE (may_gt (ch::make (60), ph::make (60, 0, 0))); + ASSERT_TRUE (may_gt (ch::make (31), ph::make (30, 9, 4))); + ASSERT_TRUE (may_gt (ch::make (31), ph::make (30, 0, 0))); + + /* Test may_gt (T, T). */ + ASSERT_TRUE (may_gt (ph::make (3, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (may_gt (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (may_gt (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_FALSE (may_gt (ph::make (2, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (may_gt (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (may_gt (ph::make (2, 14, 99), ph::make (2, 15, 98)), N == 3); + ASSERT_EQ (may_gt (ph::make (2, 14, 99), ph::make (2, 13, 100)), N >= 2); + ASSERT_EQ (may_gt (ph::make (2, 14, 99), ph::make (2, 13, 98)), N >= 2); + ASSERT_FALSE (may_gt (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (may_gt (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (may_gt (ph::make (1, 14, 99), ph::make (2, 15, 98)), N == 3); + ASSERT_EQ (may_gt (ph::make (1, 14, 99), ph::make (2, 13, 100)), N >= 2); + ASSERT_EQ (may_gt (ph::make (1, 14, 99), ph::make (2, 13, 98)), N >= 2); +} + +/* Test must_gt for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_must_gt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_gt (T, C). */ + ASSERT_TRUE (must_gt (ph::make (7, 5, 4), ch::make (6))); + ASSERT_TRUE (must_gt (ph::make (7, 0, 0), ch::make (6))); + ASSERT_FALSE (must_gt (ph::make (60, 1, 2), ch::make (60))); + ASSERT_FALSE (must_gt (ph::make (60, 0, 0), ch::make (60))); + ASSERT_FALSE (must_gt (ph::make (30, 9, 4), ch::make (31))); + ASSERT_FALSE (must_gt (ph::make (30, 0, 0), ch::make (31))); + + /* Test must_gt (C, T). */ + ASSERT_FALSE (must_gt (ch::make (6), ph::make (7, 5, 4))); + ASSERT_FALSE (must_gt (ch::make (6), ph::make (7, 0, 0))); + ASSERT_FALSE (must_gt (ch::make (60), ph::make (60, 1, 2))); + ASSERT_FALSE (must_gt (ch::make (60), ph::make (60, 0, 0))); + ASSERT_EQ (must_gt (ch::make (31), ph::make (30, 9, 4)), N == 1); + ASSERT_EQ (must_gt (ch::make (31), ph::make (30, 0, 4)), N <= 2); + ASSERT_TRUE (must_gt (ch::make (31), ph::make (30, 0, 0))); + + /* Test must_gt (T, T). */ + ASSERT_EQ (must_gt (ph::make (3, 14, 99), ph::make (2, 15, 100)), N == 1); + ASSERT_EQ (must_gt (ph::make (3, 14, 99), ph::make (2, 13, 100)), N <= 2); + ASSERT_EQ (must_gt (ph::make (3, 14, 99), ph::make (2, 15, 98)), N == 1); + ASSERT_TRUE (must_gt (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (must_gt (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_FALSE (must_gt (ph::make (2, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (must_gt (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (must_gt (ph::make (2, 14, 99), ph::make (2, 13, 98))); + ASSERT_FALSE (must_gt (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (must_gt (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (must_gt (ph::make (1, 14, 99), ph::make (2, 13, 98))); +} + +/* Test must_ge for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_must_ge () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_ge (T, C). */ + ASSERT_TRUE (must_ge (ph::make (7, 5, 4), ch::make (6))); + ASSERT_TRUE (must_ge (ph::make (7, 0, 0), ch::make (6))); + ASSERT_TRUE (must_ge (ph::make (60, 1, 2), ch::make (60))); + ASSERT_TRUE (must_ge (ph::make (60, 0, 0), ch::make (60))); + ASSERT_FALSE (must_ge (ph::make (30, 9, 4), ch::make (31))); + ASSERT_FALSE (must_ge (ph::make (30, 0, 0), ch::make (31))); + + /* Test must_ge (C, T). */ + ASSERT_FALSE (must_ge (ch::make (6), ph::make (7, 5, 4))); + ASSERT_FALSE (must_ge (ch::make (6), ph::make (7, 0, 0))); + ASSERT_EQ (must_ge (ch::make (60), ph::make (60, 1, 2)), N == 1); + ASSERT_EQ (must_ge (ch::make (60), ph::make (60, 0, 2)), N <= 2); + ASSERT_TRUE (must_ge (ch::make (60), ph::make (60, 0, 0))); + ASSERT_EQ (must_ge (ch::make (31), ph::make (30, 9, 4)), N == 1); + ASSERT_EQ (must_ge (ch::make (31), ph::make (30, 0, 4)), N <= 2); + ASSERT_TRUE (must_ge (ch::make (31), ph::make (30, 0, 0))); + + /* Test must_ge (T, T). */ + ASSERT_EQ (must_ge (ph::make (3, 14, 99), ph::make (2, 15, 100)), N == 1); + ASSERT_EQ (must_ge (ph::make (3, 14, 99), ph::make (2, 13, 100)), N <= 2); + ASSERT_EQ (must_ge (ph::make (3, 14, 99), ph::make (2, 15, 98)), N == 1); + ASSERT_TRUE (must_ge (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (must_ge (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_EQ (must_ge (ph::make (2, 14, 99), ph::make (2, 15, 100)), N == 1); + ASSERT_EQ (must_ge (ph::make (2, 14, 99), ph::make (2, 13, 100)), N <= 2); + ASSERT_EQ (must_ge (ph::make (2, 14, 99), ph::make (2, 15, 98)), N == 1); + ASSERT_TRUE (must_ge (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (must_ge (ph::make (2, 14, 99), ph::make (2, 13, 98))); + ASSERT_FALSE (must_ge (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (must_ge (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (must_ge (ph::make (1, 14, 99), ph::make (2, 13, 98))); +} + +/* Test must_lt for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_must_lt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_lt (T, C). */ + ASSERT_FALSE (must_lt (ph::make (7, 5, 4), ch::make (6))); + ASSERT_FALSE (must_lt (ph::make (7, 0, 0), ch::make (6))); + ASSERT_FALSE (must_lt (ph::make (60, 1, 2), ch::make (60))); + ASSERT_FALSE (must_lt (ph::make (60, 0, 0), ch::make (60))); + ASSERT_EQ (must_lt (ph::make (30, 9, 4), ch::make (31)), N == 1); + ASSERT_EQ (must_lt (ph::make (30, 0, 4), ch::make (31)), N <= 2); + ASSERT_TRUE (must_lt (ph::make (30, 0, 0), ch::make (31))); + + /* Test must_lt (C, T). */ + ASSERT_TRUE (must_lt (ch::make (6), ph::make (7, 5, 4))); + ASSERT_TRUE (must_lt (ch::make (6), ph::make (7, 0, 0))); + ASSERT_FALSE (must_lt (ch::make (60), ph::make (60, 1, 2))); + ASSERT_FALSE (must_lt (ch::make (60), ph::make (60, 0, 0))); + ASSERT_FALSE (must_lt (ch::make (31), ph::make (30, 9, 4))); + ASSERT_FALSE (must_lt (ch::make (31), ph::make (30, 0, 0))); + + /* Test must_lt (T, T). */ + ASSERT_FALSE (must_lt (ph::make (3, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (must_lt (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (must_lt (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_FALSE (must_lt (ph::make (2, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (must_lt (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (must_lt (ph::make (2, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (must_lt (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (must_lt (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (must_lt (ph::make (1, 14, 99), ph::make (2, 15, 98)), N <= 2); + ASSERT_EQ (must_lt (ph::make (1, 14, 99), ph::make (2, 13, 100)), N == 1); + ASSERT_EQ (must_lt (ph::make (1, 14, 99), ph::make (2, 13, 98)), N == 1); +} + +/* Test must_le for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_must_le () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_le (T, C). */ + ASSERT_FALSE (must_le (ph::make (7, 5, 4), ch::make (6))); + ASSERT_FALSE (must_le (ph::make (7, 0, 0), ch::make (6))); + ASSERT_EQ (must_le (ph::make (60, 1, 2), ch::make (60)), N == 1); + ASSERT_EQ (must_le (ph::make (60, 0, 2), ch::make (60)), N <= 2); + ASSERT_TRUE (must_le (ph::make (60, 0, 0), ch::make (60))); + ASSERT_EQ (must_le (ph::make (30, 9, 4), ch::make (31)), N == 1); + ASSERT_EQ (must_le (ph::make (30, 0, 4), ch::make (31)), N <= 2); + ASSERT_TRUE (must_le (ph::make (30, 0, 0), ch::make (31))); + + /* Test must_le (C, T). */ + ASSERT_TRUE (must_le (ch::make (6), ph::make (7, 5, 4))); + ASSERT_TRUE (must_le (ch::make (6), ph::make (7, 0, 0))); + ASSERT_TRUE (must_le (ch::make (60), ph::make (60, 1, 2))); + ASSERT_TRUE (must_le (ch::make (60), ph::make (60, 0, 0))); + ASSERT_FALSE (must_le (ch::make (31), ph::make (30, 9, 4))); + ASSERT_FALSE (must_le (ch::make (31), ph::make (30, 0, 0))); + + /* Test must_le (T, T). */ + ASSERT_FALSE (must_le (ph::make (3, 14, 99), ph::make (2, 15, 100))); + ASSERT_FALSE (must_le (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_FALSE (must_le (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (must_le (ph::make (2, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (must_le (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (must_le (ph::make (2, 14, 99), ph::make (2, 15, 98)), N <= 2); + ASSERT_EQ (must_le (ph::make (2, 14, 99), ph::make (2, 13, 100)), N == 1); + ASSERT_EQ (must_le (ph::make (2, 14, 99), ph::make (2, 13, 98)), N == 1); + ASSERT_TRUE (must_le (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (must_le (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (must_le (ph::make (1, 14, 99), ph::make (2, 15, 98)), N <= 2); + ASSERT_EQ (must_le (ph::make (1, 14, 99), ph::make (2, 13, 100)), N == 1); + ASSERT_EQ (must_le (ph::make (1, 14, 99), ph::make (2, 13, 98)), N == 1); +} + +/* Test ordered_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_ordered_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_p (T, C). */ + ASSERT_EQ (ordered_p (ph::make (4, 1, 2), ch::make (5)), N == 1); + ASSERT_EQ (ordered_p (ph::make (4, 0, 2), ch::make (5)), N <= 2); + ASSERT_TRUE (ordered_p (ph::make (4, 0, 0), ch::make (5))); + ASSERT_TRUE (ordered_p (ph::make (4, 1, 2), ch::make (4))); + ASSERT_TRUE (ordered_p (ph::make (4, 0, 0), ch::make (4))); + ASSERT_TRUE (ordered_p (ph::make (4, 1, 2), ch::make (3))); + ASSERT_TRUE (ordered_p (ph::make (4, 0, 0), ch::make (3))); + ASSERT_TRUE (ordered_p (ph::make (4, 4, 4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (4, 4, 0), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (4, 0, 4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (-4, -4, -4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (-4, -4, 0), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (-4, 0, -4), ch::make (0))); + + /* Test ordered_p (C, T). */ + ASSERT_EQ (ordered_p (ch::make (5), ph::make (4, 1, 2)), N == 1); + ASSERT_EQ (ordered_p (ch::make (5), ph::make (4, 0, 2)), N <= 2); + ASSERT_TRUE (ordered_p (ch::make (5), ph::make (4, 0, 0))); + ASSERT_TRUE (ordered_p (ch::make (4), ph::make (4, 1, 2))); + ASSERT_TRUE (ordered_p (ch::make (4), ph::make (4, 0, 0))); + ASSERT_TRUE (ordered_p (ch::make (3), ph::make (4, 1, 2))); + ASSERT_TRUE (ordered_p (ch::make (3), ph::make (4, 0, 0))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (4, 4, 4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (4, 4, 0))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (4, 0, 4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (-4, -4, -4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (-4, -4, 0))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (-4, 0, -4))); + + /* Test ordered_p (T, T). */ + ASSERT_EQ (ordered_p (ph::make (3, 14, 99), ph::make (2, 15, 100)), N == 1); + ASSERT_EQ (ordered_p (ph::make (3, 14, 99), ph::make (2, 13, 100)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (3, 14, 99), ph::make (2, 15, 98)), N == 1); + ASSERT_TRUE (ordered_p (ph::make (3, 14, 99), ph::make (2, 14, 99))); + ASSERT_TRUE (ordered_p (ph::make (3, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (ordered_p (ph::make (2, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (ordered_p (ph::make (2, 14, 99), ph::make (2, 14, 100))); + ASSERT_TRUE (ordered_p (ph::make (2, 14, 99), ph::make (2, 15, 99))); + ASSERT_EQ (ordered_p (ph::make (2, 14, 99), ph::make (2, 13, 100)), N <= 2); + ASSERT_TRUE (ordered_p (ph::make (2, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (ordered_p (ph::make (2, 14, 99), ph::make (2, 15, 98)), N <= 2); + ASSERT_TRUE (ordered_p (ph::make (2, 14, 99), ph::make (2, 13, 99))); + ASSERT_TRUE (ordered_p (ph::make (2, 14, 99), ph::make (2, 14, 98))); + ASSERT_TRUE (ordered_p (ph::make (2, 14, 99), ph::make (2, 13, 98))); + ASSERT_TRUE (ordered_p (ph::make (1, 14, 99), ph::make (2, 15, 100))); + ASSERT_TRUE (ordered_p (ph::make (1, 14, 99), ph::make (2, 14, 99))); + ASSERT_EQ (ordered_p (ph::make (1, 14, 99), ph::make (2, 15, 98)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (1, 14, 99), ph::make (2, 13, 100)), N == 1); + ASSERT_EQ (ordered_p (ph::make (1, 14, 99), ph::make (2, 13, 98)), N == 1); +} + +/* Test ordered_min for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_ordered_min () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_min (T, C). */ + ASSERT_MUST_EQ (ordered_min (ph::make (4, 0, 0), ch::make (5)), + ch::make (4)); + ASSERT_MUST_EQ (ordered_min (ph::make (12, 0, 0), ch::make (11)), + ch::make (11)); + ASSERT_MUST_EQ (ordered_min (ph::make (12, 6, 4), ch::make (11)), + ch::make (11)); + + /* Test ordered_min (C, T). */ + ASSERT_MUST_EQ (ordered_min (ch::make (5), ph::make (4, 0, 0)), + ch::make (4)); + ASSERT_MUST_EQ (ordered_min (ch::make (11), ph::make (12, 0, 0)), + ch::make (11)); + ASSERT_MUST_EQ (ordered_min (ch::make (11), ph::make (12, 6, 4)), + ch::make (11)); + + /* Test ordered_min (T, T). */ + ASSERT_MUST_EQ (ordered_min (ph::make (4, 6, 14), ph::make (5, 6, 19)), + ph::make (4, 6, 14)); + ASSERT_MUST_EQ (ordered_min (ph::make (4, 9, 17), ph::make (3, 9, 0)), + ph::make (3, 9, 0)); + ASSERT_MUST_EQ (ordered_min (ph::make (-4, -5, 12), ph::make (-3, -5, 12)), + ph::make (-4, -5, 12)); + ASSERT_MUST_EQ (ordered_min (ph::make (4, -9, 6), ph::make (4, -8, 6)), + ph::make (4, -9, 6)); + ASSERT_MUST_EQ (ordered_min (ph::make (5, -1, -14), ph::make (5, -1, -16)), + ph::make (5, -1, -16)); +} + +/* Test ordered_max for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_ordered_max () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_max (T, C). */ + ASSERT_MUST_EQ (ordered_max (ph::make (4, 0, 0), ch::make (5)), + ch::make (5)); + ASSERT_MUST_EQ (ordered_max (ph::make (12, 0, 0), ch::make (11)), + ch::make (12)); + ASSERT_MUST_EQ (ordered_max (ph::make (12, 6, 4), ch::make (11)), + ph::make (12, 6, 4)); + + /* Test ordered_max (C, T). */ + ASSERT_MUST_EQ (ordered_max (ch::make (5), ph::make (4, 0, 0)), + ch::make (5)); + ASSERT_MUST_EQ (ordered_max (ch::make (11), ph::make (12, 0, 0)), + ch::make (12)); + ASSERT_MUST_EQ (ordered_max (ch::make (11), ph::make (12, 6, 4)), + ph::make (12, 6, 4)); + + /* Test ordered_max (T, T). */ + ASSERT_MUST_EQ (ordered_max (ph::make (4, 6, 14), ph::make (5, 6, 19)), + ph::make (5, 6, 19)); + ASSERT_MUST_EQ (ordered_max (ph::make (4, 9, 17), ph::make (3, 9, 0)), + ph::make (4, 9, 17)); + ASSERT_MUST_EQ (ordered_max (ph::make (-4, -5, 12), ph::make (-3, -5, 12)), + ph::make (-3, -5, 12)); + ASSERT_MUST_EQ (ordered_max (ph::make (4, -9, 6), ph::make (4, -8, 6)), + ph::make (4, -8, 6)); + ASSERT_MUST_EQ (ordered_max (ph::make (5, -1, -14), ph::make (5, -1, -16)), + ph::make (5, -1, -14)); +} + +/* Test constant_lower_bound for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_constant_lower_bound () +{ + typedef poly_helper<T> ph; + + ASSERT_EQ (constant_lower_bound (ph::make (4, 1, 2)), 4); + ASSERT_EQ (constant_lower_bound (ph::make (5, 0, 1)), 5); + ASSERT_EQ (constant_lower_bound (ph::make (6, 1, 0)), 6); + ASSERT_EQ (constant_lower_bound (ph::make (7, 0, 0)), 7); +} + +/* Test lower_bound for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_lower_bound () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test lower_bound (T, C). */ + ASSERT_MUST_EQ (lower_bound (ph::make (7, 2, 15), ch::make (4)), + ch::make (4)); + ASSERT_MUST_EQ (lower_bound (ph::make (100, 5, 50), ch::make (200)), + ch::make (100)); + + /* Test lower_bound (C, T). */ + ASSERT_MUST_EQ (lower_bound (ch::make (4), ph::make (7, 2, 15)), + ch::make (4)); + ASSERT_MUST_EQ (lower_bound (ch::make (200), ph::make (100, 5, 50)), + ch::make (100)); + + /* Test lower_bound (T, T). */ + ASSERT_MUST_EQ (lower_bound (ph::make (7, 2, 15), ph::make (5, 19, 14)), + ph::make (5, 2, 14)); + ASSERT_MUST_EQ (lower_bound (ph::make (100, 5, 50), ph::make (200, 0, 80)), + ph::make (100, 0, 50)); +} + +/* Test upper_bound for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_upper_bound () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test upper_bound (T, C). */ + ASSERT_MUST_EQ (upper_bound (ph::make (7, 2, 15), ch::make (4)), + ph::make (7, 2, 15)); + ASSERT_MUST_EQ (upper_bound (ph::make (100, 5, 50), ch::make (200)), + ph::make (200, 5, 50)); + + /* Test upper_bound (C, T). */ + ASSERT_MUST_EQ (upper_bound (ch::make (4), ph::make (7, 2, 15)), + ph::make (7, 2, 15)); + ASSERT_MUST_EQ (upper_bound (ch::make (200), ph::make (100, 5, 50)), + ph::make (200, 5, 50)); + + /* Test upper_bound (T, T). */ + ASSERT_MUST_EQ (upper_bound (ph::make (7, 2, 15), ph::make (5, 19, 14)), + ph::make (7, 19, 15)); + ASSERT_MUST_EQ (upper_bound (ph::make (100, 5, 50), ph::make (200, 0, 80)), + ph::make (200, 5, 80)); +} + +/* Test compare_sizes_for_sort for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_compare_sizes_for_sort () +{ + typedef poly_helper<T> ph; + + ASSERT_EQ (compare_sizes_for_sort (ph::make (5, 10, 8), + ph::make (7, 9, 11)), + N == 2 ? 1 : -1); + ASSERT_EQ (compare_sizes_for_sort (ph::make (5, 9, 8), + ph::make (7, 9, 11)), + -1); + ASSERT_EQ (compare_sizes_for_sort (ph::make (19, 9, 13), + ph::make (7, 9, 13)), + 1); + ASSERT_EQ (compare_sizes_for_sort (ph::make (5, 9, 7), + ph::make (5, 10, 5)), + N == 1 ? 0 : N == 2 ? -1 : 1); + ASSERT_EQ (compare_sizes_for_sort (ph::make (10, 9, 10), + ph::make (10, 9, 6)), + N <= 2 ? 0 : 1); + ASSERT_EQ (compare_sizes_for_sort (ph::make (10, 9, 6), + ph::make (10, 9, 6)), + 0); +} + +/* Test force_align_up_and_div for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_force_align_up_and_div () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (41, 32, 16), 16), + ph::make (3, 2, 1)); + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (-39, -64, -32), 32), + ph::make (C (-32) / 32, C (-64) / 32, C (-32) / 32)); + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (17, 0, 0), 16), + ch::make (2)); + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (16, 0, 0), 16), + ch::make (1)); + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (15, 0, 0), 16), + ch::make (1)); + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (-17, 0, 0), 16), + ch::make (C (-16) / 16)); + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (-16, 0, 0), 16), + ch::make (C (-16) / 16)); + /* For unsigned short C this gives 0x10000 / 16. */ + ASSERT_MUST_EQ (force_align_up_and_div (ph::make (-15, 0, 0), 16), + ch::make ((C (-1) + 1) / 16)); +} + +/* Test force_align_down_and_div for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_force_align_down_and_div () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (41, 32, 16), 16), + ph::make (2, 2, 1)); + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (-39, -64, -32), 32), + ph::make (C (-64) / 32, C (-64) / 32, C (-32) / 32)); + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (17, 0, 0), 16), + ch::make (1)); + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (16, 0, 0), 16), + ch::make (1)); + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (15, 0, 0), 16), + ch::make (0)); + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (-17, 0, 0), 16), + ch::make (C (-32) / 16)); + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (-16, 0, 0), 16), + ch::make (C (-16) / 16)); + ASSERT_MUST_EQ (force_align_down_and_div (ph::make (-15, 0, 0), 16), + ch::make (C (-16) / 16)); +} + +/* Test constant_multiple_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_constant_multiple_p () +{ + typedef poly_helper<T> ph; + + /* Test constant_multiple_p (T, C). */ + C const_multiple; + ASSERT_TRUE (constant_multiple_p (ph::make (15, 0, 0), 5, + &const_multiple)); + ASSERT_EQ (const_multiple, 3); + ASSERT_FALSE (constant_multiple_p (ph::make (16, 0, 0), 5, + &const_multiple)); + ASSERT_FALSE (constant_multiple_p (ph::make (14, 5, 5), 5, + &const_multiple)); + ASSERT_EQ (constant_multiple_p (ph::make (44, 0, 55), 11, + &const_multiple), N <= 2); + ASSERT_EQ (const_multiple, N <= 2 ? 4 : 3); + ASSERT_EQ (constant_multiple_p (ph::make (30, 30, 0), 6, + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? 5 : N == 2 ? 4 : 3); + ASSERT_TRUE (constant_multiple_p (ph::make (0, 0, 0), 5, + &const_multiple)); + + /* Test constant_multiple_p (C, T). */ + ASSERT_TRUE (constant_multiple_p (15, ph::make (5, 0, 0), + &const_multiple)); + ASSERT_EQ (const_multiple, 3); + ASSERT_FALSE (constant_multiple_p (16, ph::make (5, 0, 0), + &const_multiple)); + ASSERT_FALSE (constant_multiple_p (14, ph::make (5, 5, 5), + &const_multiple)); + ASSERT_EQ (constant_multiple_p (44, ph::make (11, 0, 4), + &const_multiple), N <= 2); + ASSERT_EQ (const_multiple, N <= 2 ? 4 : 3); + ASSERT_EQ (constant_multiple_p (30, ph::make (6, 6, 6), + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? 5 : N == 2 ? 4 : 3); + ASSERT_TRUE (constant_multiple_p (0, ph::make (5, 4, 11), + &const_multiple)); + ASSERT_EQ (const_multiple, 0); + + /* Test constant_multiple_p (T, T). */ + ASSERT_TRUE (constant_multiple_p (ph::make (5, 15, 25), + ph::make (1, 3, 5), + &const_multiple)); + ASSERT_EQ (const_multiple, 5); + ASSERT_EQ (constant_multiple_p (ph::make (18, 30, 7), + ph::make (6, 10, 2), + &const_multiple), N <= 2); + ASSERT_EQ (const_multiple, N <= 2 ? 3 : 5); + ASSERT_EQ (constant_multiple_p (ph::make (54, 19, 0), + ph::make (9, 3, 0), + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? 6 : N == 2 ? 3: 5); + ASSERT_TRUE (constant_multiple_p (ph::make (120, 0, 90), + ph::make (12, 0, 9), + &const_multiple)); + ASSERT_EQ (const_multiple, 10); + ASSERT_EQ (constant_multiple_p (ph::make (110, 1, 22), + ph::make (10, 0, 2), + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? 11 : 10); + ASSERT_EQ (constant_multiple_p (ph::make (120, -1, 22), + ph::make (10, 0, 2), + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? 12 : 10); + ASSERT_EQ (constant_multiple_p (ph::make (130, 0, 26), + ph::make (10, 1, 2), + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? 13 : 10); + ASSERT_EQ (constant_multiple_p (ph::make (140, 0, 28), + ph::make (10, -1, 2), + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? 14 : 10); + ASSERT_FALSE (constant_multiple_p (ph::make (89, 0, 0), + ph::make (11, 0, 0), + &const_multiple)); + ASSERT_TRUE (constant_multiple_p (ph::make (88, 0, 0), + ph::make (11, 0, 0), + &const_multiple)); + ASSERT_EQ (const_multiple, 8); + ASSERT_FALSE (constant_multiple_p (ph::make (87, 0, 0), + ph::make (11, 0, 0), + &const_multiple)); + ASSERT_TRUE (constant_multiple_p (ph::make (35, 63, 0), + ph::make (5, 9, 0), + &const_multiple)); + ASSERT_EQ (const_multiple, 7); + ASSERT_TRUE (constant_multiple_p (ph::make (0, 0, 0), + ph::make (11, -24, 25), + &const_multiple)); + ASSERT_EQ (const_multiple, 0); +} + +/* Test multiple_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_multiple_p () +{ + typedef poly_helper<T> ph; + + /* Test multiple_p (T, C). */ + ASSERT_TRUE (multiple_p (ph::make (15, 0, 0), 5)); + ASSERT_FALSE (multiple_p (ph::make (16, 0, 0), 5)); + ASSERT_FALSE (multiple_p (ph::make (14, 5, 5), 5)); + ASSERT_TRUE (multiple_p (ph::make (44, 0, 55), 11)); + ASSERT_TRUE (multiple_p (ph::make (30, 30, 0), 6)); + ASSERT_TRUE (multiple_p (ph::make (30, 35, 45), 5)); + ASSERT_EQ (multiple_p (ph::make (30, 35, 44), 5), N <= 2); + ASSERT_EQ (multiple_p (ph::make (30, 34, 45), 5), N == 1); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), 5)); + + /* Test multiple_p (C, T). */ + ASSERT_TRUE (multiple_p (15, ph::make (5, 0, 0))); + ASSERT_FALSE (multiple_p (16, ph::make (5, 0, 0))); + ASSERT_FALSE (multiple_p (14, ph::make (5, 5, 5))); + ASSERT_EQ (multiple_p (44, ph::make (11, 0, 4)), N <= 2); + ASSERT_EQ (multiple_p (30, ph::make (6, 6, 6)), N == 1); + ASSERT_TRUE (multiple_p (0, ph::make (5, 4, 11))); + + /* Test multiple_p (T, T). */ + ASSERT_TRUE (multiple_p (ph::make (15, 0, 0), + ph::make (5, 0, 0))); + ASSERT_FALSE (multiple_p (ph::make (16, 0, 0), + ph::make (5, 0, 0))); + ASSERT_FALSE (multiple_p (ph::make (14, 5, 5), + ph::make (5, 0, 0))); + ASSERT_TRUE (multiple_p (ph::make (44, 0, 55), + ph::make (11, 0, 0))); + ASSERT_TRUE (multiple_p (ph::make (30, 30, 0), + ph::make (6, 0, 0))); + ASSERT_TRUE (multiple_p (ph::make (30, 35, 45), + ph::make (5, 0, 0))); + ASSERT_EQ (multiple_p (ph::make (30, 35, 44), + ph::make (5, 0, 0)), N <= 2); + ASSERT_EQ (multiple_p (ph::make (30, 34, 45), + ph::make (5, 0, 0)), N == 1); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), + ph::make (5, 0, 0))); + ASSERT_TRUE (multiple_p (ph::make (15, 0, 0), + ph::make (5, 0, 0))); + ASSERT_FALSE (multiple_p (ph::make (16, 0, 0), + ph::make (5, 0, 0))); + ASSERT_FALSE (multiple_p (ph::make (14, 0, 0), + ph::make (5, 5, 5))); + ASSERT_EQ (multiple_p (ph::make (44, 0, 0), + ph::make (11, 0, 4)), N <= 2); + ASSERT_EQ (multiple_p (ph::make (30, 0, 0), + ph::make (6, 6, 6)), N == 1); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), + ph::make (5, 4, 11))); + ASSERT_TRUE (multiple_p (ph::make (5, 15, 25), + ph::make (1, 3, 5))); + ASSERT_EQ (multiple_p (ph::make (18, 30, 7), + ph::make (6, 10, 2)), N <= 2); + ASSERT_EQ (multiple_p (ph::make (54, 19, 0), + ph::make (9, 3, 0)), N == 1); + ASSERT_TRUE (multiple_p (ph::make (120, 0, 90), + ph::make (12, 0, 9))); + ASSERT_EQ (multiple_p (ph::make (110, 1, 22), + ph::make (10, 0, 2)), N == 1); + ASSERT_EQ (multiple_p (ph::make (120, -1, 22), + ph::make (10, 0, 2)), N == 1); + ASSERT_EQ (multiple_p (ph::make (130, 0, 26), + ph::make (10, 1, 2)), N == 1); + ASSERT_EQ (multiple_p (ph::make (140, 0, 28), + ph::make (10, -1, 2)), N == 1); + ASSERT_FALSE (multiple_p (ph::make (89, 0, 0), + ph::make (11, 0, 0))); + ASSERT_TRUE (multiple_p (ph::make (88, 0, 0), + ph::make (11, 0, 0))); + ASSERT_FALSE (multiple_p (ph::make (87, 0, 0), + ph::make (11, 0, 0))); + ASSERT_TRUE (multiple_p (ph::make (35, 63, 0), + ph::make (5, 9, 0))); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), + ph::make (11, -24, 25))); +} + +/* Test the 3-operand form of multiple_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_multiple_p_with_result () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test multiple_p (T, C) -> T. */ + T multiple; + ASSERT_TRUE (multiple_p (ph::make (15, 0, 0), 5, &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (3)); + ASSERT_FALSE (multiple_p (ph::make (16, 0, 0), 5, &multiple)); + ASSERT_FALSE (multiple_p (ph::make (14, 5, 5), 5, &multiple)); + ASSERT_TRUE (multiple_p (ph::make (44, 0, 55), 11, &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (4, 0, 5)); + ASSERT_TRUE (multiple_p (ph::make (30, 30, 0), 6, &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (5, 5, 0)); + ASSERT_TRUE (multiple_p (ph::make (30, 35, 45), 5, &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (6, 7, 9)); + ASSERT_EQ (multiple_p (ph::make (30, 35, 44), 5, &multiple), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (multiple, ph::make (6, 7, 0)); + ASSERT_EQ (multiple_p (ph::make (30, 34, 45), 5, &multiple), N == 1); + if (N == 1) + ASSERT_MUST_EQ (multiple, ch::make (6)); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), 5, &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (0)); + + /* Test multiple_p (C, T) -> T. */ + ASSERT_TRUE (multiple_p (15, ph::make (5, 0, 0), &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (3)); + ASSERT_FALSE (multiple_p (16, ph::make (5, 0, 0), &multiple)); + ASSERT_FALSE (multiple_p (14, ph::make (5, 5, 5), &multiple)); + ASSERT_EQ (multiple_p (44, ph::make (11, 0, 4), &multiple), N <= 2); + ASSERT_MUST_EQ (multiple, ch::make (N <= 2 ? 4 : 3)); + ASSERT_EQ (multiple_p (30, ph::make (6, 6, 6), &multiple), N == 1); + ASSERT_MUST_EQ (multiple, ch::make (N == 1 ? 5 : N == 2 ? 4 : 3)); + ASSERT_TRUE (multiple_p (0, ph::make (5, 4, 11), &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (0)); + + /* Test multiple_p (T, T) -> T. */ + ASSERT_TRUE (multiple_p (ph::make (15, 0, 0), + ph::make (5, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (3)); + ASSERT_FALSE (multiple_p (ph::make (16, 0, 0), + ph::make (5, 0, 0), + &multiple)); + ASSERT_FALSE (multiple_p (ph::make (14, 5, 5), + ph::make (5, 0, 0), + &multiple)); + ASSERT_TRUE (multiple_p (ph::make (44, 0, 55), + ph::make (11, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (4, 0, 5)); + ASSERT_TRUE (multiple_p (ph::make (30, 30, 0), + ph::make (6, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (5, 5, 0)); + ASSERT_TRUE (multiple_p (ph::make (30, 35, 45), + ph::make (5, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (6, 7, 9)); + ASSERT_EQ (multiple_p (ph::make (30, 35, 44), + ph::make (5, 0, 0), + &multiple), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (multiple, ph::make (6, 7, 0)); + ASSERT_EQ (multiple_p (ph::make (30, 34, 45), + ph::make (5, 0, 0), + &multiple), N == 1); + if (N == 1) + ASSERT_MUST_EQ (multiple, ch::make (6)); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), + ph::make (5, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (0)); + ASSERT_TRUE (multiple_p (ph::make (15, 0, 0), + ph::make (5, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (3)); + ASSERT_FALSE (multiple_p (ph::make (16, 0, 0), + ph::make (5, 0, 0), + &multiple)); + ASSERT_FALSE (multiple_p (ph::make (14, 0, 0), + ph::make (5, 5, 5), + &multiple)); + ASSERT_EQ (multiple_p (ph::make (44, 0, 0), + ph::make (11, 0, 4), + &multiple), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (multiple, ch::make (4)); + ASSERT_EQ (multiple_p (ph::make (30, 0, 0), + ph::make (6, 6, 6), + &multiple), N == 1); + if (N == 1) + ASSERT_MUST_EQ (multiple, ch::make (5)); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), + ph::make (5, 4, 11), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (0)); + ASSERT_TRUE (multiple_p (ph::make (5, 15, 25), + ph::make (1, 3, 5), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (5)); + ASSERT_EQ (multiple_p (ph::make (18, 30, 7), + ph::make (6, 10, 2), + &multiple), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (multiple, ch::make (3)); + ASSERT_EQ (multiple_p (ph::make (54, 19, 0), + ph::make (9, 3, 0), + &multiple), N == 1); + if (N == 1) + ASSERT_MUST_EQ (multiple, ch::make (6)); + ASSERT_TRUE (multiple_p (ph::make (120, 0, 90), + ph::make (12, 0, 9), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (10)); + ASSERT_EQ (multiple_p (ph::make (110, 1, 22), + ph::make (10, 0, 2), + &multiple), N == 1); + ASSERT_MUST_EQ (multiple, ch::make (N == 1 ? 11 : 10)); + ASSERT_EQ (multiple_p (ph::make (120, -1, 22), + ph::make (10, 0, 2), + &multiple), N == 1); + ASSERT_MUST_EQ (multiple, ch::make (N == 1 ? 12 : 10)); + ASSERT_EQ (multiple_p (ph::make (130, 0, 26), + ph::make (10, 1, 2), + &multiple), N == 1); + ASSERT_MUST_EQ (multiple, ch::make (N == 1 ? 13 : 10)); + ASSERT_EQ (multiple_p (ph::make (140, 0, 28), + ph::make (10, -1, 2), + &multiple), N == 1); + ASSERT_MUST_EQ (multiple, ch::make (N == 1 ? 14 : 10)); + ASSERT_FALSE (multiple_p (ph::make (89, 0, 0), + ph::make (11, 0, 0), + &multiple)); + ASSERT_TRUE (multiple_p (ph::make (88, 0, 0), + ph::make (11, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (8)); + ASSERT_FALSE (multiple_p (ph::make (87, 0, 0), + ph::make (11, 0, 0), + &multiple)); + ASSERT_TRUE (multiple_p (ph::make (35, 63, 0), + ph::make (5, 9, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (7)); + ASSERT_TRUE (multiple_p (ph::make (0, 0, 0), + ph::make (11, -24, 25), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (0)); +} + +/* Test exact_div for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_exact_div () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test exact_div (T, C). */ + ASSERT_MUST_EQ (exact_div (ph::make (15, 0, 0), 5), + ch::make (3)); + ASSERT_MUST_EQ (exact_div (ph::make (44, 0, 55), 11), + ph::make (4, 0, 5)); + ASSERT_MUST_EQ (exact_div (ph::make (30, 30, 0), 6), + ph::make (5, 5, 0)); + ASSERT_MUST_EQ (exact_div (ph::make (30, 35, 45), 5), + ph::make (6, 7, 9)); + ASSERT_MUST_EQ (exact_div (ph::make (0, 0, 0), 5), + ch::make (0)); + + /* Test exact_div (T, T). */ + ASSERT_MUST_EQ (exact_div (ph::make (15, 0, 0), + ph::make (5, 0, 0)), + ch::make (3)); + ASSERT_MUST_EQ (exact_div (ph::make (44, 0, 55), + ph::make (11, 0, 0)), + ph::make (4, 0, 5)); + ASSERT_MUST_EQ (exact_div (ph::make (30, 30, 0), + ph::make (6, 0, 0)), + ph::make (5, 5, 0)); + ASSERT_MUST_EQ (exact_div (ph::make (30, 35, 45), + ph::make (5, 0, 0)), + ph::make (6, 7, 9)); + ASSERT_MUST_EQ (exact_div (ph::make (0, 0, 0), + ph::make (5, 0, 0)), + ch::make (0)); + ASSERT_MUST_EQ (exact_div (ph::make (15, 0, 0), + ph::make (5, 0, 0)), + ch::make (3)); + ASSERT_MUST_EQ (exact_div (ph::make (0, 0, 0), + ph::make (5, 4, 11)), + ch::make (0)); + ASSERT_MUST_EQ (exact_div (ph::make (5, 15, 25), + ph::make (1, 3, 5)), + ch::make (5)); + ASSERT_MUST_EQ (exact_div (ph::make (120, 0, 90), + ph::make (12, 0, 9)), + ch::make (10)); + ASSERT_MUST_EQ (exact_div (ph::make (88, 0, 0), + ph::make (11, 0, 0)), + ch::make (8)); + ASSERT_MUST_EQ (exact_div (ph::make (35, 63, 0), + ph::make (5, 9, 0)), + ch::make (7)); + ASSERT_MUST_EQ (exact_div (ph::make (0, 0, 0), + ph::make (11, -24, 25)), + ch::make (0)); +} + +/* Test the form of can_div_trunc_p that returns a constant quotient, + for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_can_div_trunc_p_const () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test can_div_trunc_p (T, C) -> C. */ + C const_quot; + ASSERT_TRUE (can_div_trunc_p (ph::make (22, 0, 0), 5, &const_quot)); + ASSERT_MUST_EQ (const_quot, C (4)); + ASSERT_EQ (can_div_trunc_p (ph::make (44, 0, 1), 5, &const_quot), N <= 2); + ASSERT_MUST_EQ (const_quot, C (N <= 2 ? 8 : 4)); + ASSERT_EQ (can_div_trunc_p (ph::make (88, 1, 0), 5, &const_quot), N == 1); + ASSERT_MUST_EQ (const_quot, C (N == 1 ? 17 : N == 2 ? 8 : 4)); + ASSERT_TRUE (can_div_trunc_p (ph::make (20, 0, 0), 5, &const_quot)); + ASSERT_MUST_EQ (const_quot, C (4)); + ASSERT_TRUE (can_div_trunc_p (ph::make (19, 0, 0), 5, &const_quot)); + ASSERT_MUST_EQ (const_quot, C (3)); + + /* Test can_div_trunc_p (T, T) -> C. */ + ASSERT_TRUE (can_div_trunc_p (ph::make (8, 44, 28), + ph::make (2, 11, 7), + &const_quot)); + ASSERT_EQ (const_quot, C (4)); + ASSERT_TRUE (can_div_trunc_p (ph::make (9, 23, 30), + ph::make (4, 8, 12), + &const_quot)); + ASSERT_EQ (const_quot, C (2)); + ASSERT_EQ (can_div_trunc_p (ph::make (15, 25, 40), + ph::make (4, 8, 10), + &const_quot), N <= 2); + ASSERT_EQ (const_quot, C (N <= 2 ? 3 : 2)); + ASSERT_EQ (can_div_trunc_p (ph::make (43, 79, 80), + ph::make (4, 8, 10), + &const_quot), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 10 : N == 2 ? 3 : 2)); + ASSERT_TRUE (can_div_trunc_p (ph::make (3, 4, 5), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (0)); + ASSERT_TRUE (can_div_trunc_p (ph::make (3, 4, 6), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (0)); + ASSERT_TRUE (can_div_trunc_p (ph::make (3, 5, 5), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (0)); + ASSERT_EQ (can_div_trunc_p (ph::make (3, 4, 7), + ph::make (4, 5, 6), + &const_quot), N <= 2); + ASSERT_EQ (const_quot, C (0)); + ASSERT_EQ (can_div_trunc_p (ph::make (3, 6, 0), + ph::make (4, 5, 6), + &const_quot), N == 1); + ASSERT_EQ (const_quot, C (0)); + ASSERT_TRUE (can_div_trunc_p (ph::make (56, 0, 11), + ph::make (11, 0, 2), + &const_quot)); + ASSERT_EQ (const_quot, C (5)); + ASSERT_EQ (can_div_trunc_p (ph::make (66, 1, 12), + ph::make (11, 0, 2), + &const_quot), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 6 : 5)); + ASSERT_EQ (can_div_trunc_p (ph::make (77, -1, 14), + ph::make (11, 0, 2), + &const_quot), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 7 : 5)); + ASSERT_TRUE (can_div_trunc_p (ph::make (89, 0, 0), + ph::make (11, 0, 0), + &const_quot)); + ASSERT_EQ (const_quot, C (8)); + ASSERT_EQ (can_div_trunc_p (ph::make (101, 0, 1), + ph::make (11, 0, 0), + &const_quot), N <= 2); + ASSERT_EQ (const_quot, C (N <= 2 ? 9 : 8)); + ASSERT_TRUE (can_div_trunc_p (ph::make (0, 0, 0), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (0)); + + /* Test can_div_trunc_p (T, T) -> C, T. */ + T rem; + ASSERT_TRUE (can_div_trunc_p (ph::make (8, 44, 28), + ph::make (2, 11, 7), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (4)); + ASSERT_MUST_EQ (rem, ch::make (0)); + ASSERT_TRUE (can_div_trunc_p (ph::make (9, 23, 30), + ph::make (4, 8, 12), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (2)); + ASSERT_MUST_EQ (rem, ph::make (1, 7, 6)); + ASSERT_EQ (can_div_trunc_p (ph::make (15, 25, 40), + ph::make (4, 8, 10), + &const_quot, &rem), N <= 2); + ASSERT_EQ (const_quot, C (N <= 2 ? 3 : 2)); + if (N <= 2) + ASSERT_MUST_EQ (rem, ph::make (3, 1, 0)); + ASSERT_EQ (can_div_trunc_p (ph::make (43, 79, 80), + ph::make (4, 8, 10), + &const_quot, &rem), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 10 : N == 2 ? 3 : 2)); + if (N == 1) + ASSERT_MUST_EQ (rem, ch::make (3)); + ASSERT_TRUE (can_div_trunc_p (ph::make (3, 4, 5), + ph::make (4, 5, 6), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (0)); + ASSERT_MUST_EQ (rem, ph::make (3, 4, 5)); + ASSERT_TRUE (can_div_trunc_p (ph::make (3, 4, 6), + ph::make (4, 5, 6), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (0)); + ASSERT_MUST_EQ (rem, ph::make (3, 4, 6)); + ASSERT_TRUE (can_div_trunc_p (ph::make (3, 5, 5), + ph::make (4, 5, 6), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (0)); + ASSERT_MUST_EQ (rem, ph::make (3, 5, 5)); + ASSERT_TRUE (can_div_trunc_p (ph::make (56, 0, 11), + ph::make (11, 0, 2), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (5)); + ASSERT_MUST_EQ (rem, ph::make (1, 0, 1)); + ASSERT_EQ (can_div_trunc_p (ph::make (66, 1, 12), + ph::make (11, 0, 2), + &const_quot, &rem), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 6 : 5)); + if (N == 1) + ASSERT_MUST_EQ (rem, ch::make (0)); + ASSERT_EQ (can_div_trunc_p (ph::make (77, -1, 14), + ph::make (11, 0, 2), + &const_quot, &rem), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 7 : 5)); + if (N == 1) + ASSERT_MUST_EQ (rem, ch::make (0)); + ASSERT_TRUE (can_div_trunc_p (ph::make (89, 0, 0), + ph::make (11, 0, 0), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (8)); + ASSERT_MUST_EQ (rem, ch::make (1)); + ASSERT_EQ (can_div_trunc_p (ph::make (101, 0, 1), + ph::make (11, 0, 0), + &const_quot, &rem), N <= 2); + ASSERT_EQ (const_quot, C (N <= 2 ? 9 : 8)); + if (N <= 2) + ASSERT_MUST_EQ (rem, ch::make (2)); + ASSERT_TRUE (can_div_trunc_p (ph::make (0, 0, 0), + ph::make (4, 5, 6), + &const_quot, &rem)); + ASSERT_EQ (const_quot, C (0)); + ASSERT_MUST_EQ (rem, ch::make (0)); +} + +/* Test the form of can_div_trunc_p that returns a polynomail quotient, + for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_can_div_trunc_p_poly () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test can_div_trunc_p (T, C) -> T. */ + T quot; + ASSERT_TRUE (can_div_trunc_p (ph::make (22, 0, 0), 5, ")); + ASSERT_MUST_EQ (quot, ch::make (4)); + ASSERT_TRUE (can_div_trunc_p (ph::make (45, 40, 24), 4, ")); + ASSERT_MUST_EQ (quot, ph::make (11, 10, 6)); + ASSERT_EQ (can_div_trunc_p (ph::make (13, 18, 19), 6, "), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (quot, ph::make (2, 3, 0)); + ASSERT_EQ (can_div_trunc_p (ph::make (55, 11, 10), 10, "), N == 1); + if (N == 1) + ASSERT_MUST_EQ (quot, ch::make (5)); + + /* Test can_div_trunc_p (T, C) -> T, C. */ + C const_rem; + ASSERT_TRUE (can_div_trunc_p (ph::make (22, 0, 0), 5, + ", &const_rem)); + ASSERT_MUST_EQ (quot, ch::make (4)); + ASSERT_EQ (const_rem, C (2)); + ASSERT_TRUE (can_div_trunc_p (ph::make (45, 40, 24), 4, + ", &const_rem)); + ASSERT_MUST_EQ (quot, ph::make (11, 10, 6)); + ASSERT_EQ (const_rem, C (1)); + ASSERT_EQ (can_div_trunc_p (ph::make (13, 18, 19), 6, + ", &const_rem), N <= 2); + if (N <= 2) + { + ASSERT_MUST_EQ (quot, ph::make (2, 3, 0)); + ASSERT_EQ (const_rem, C (1)); + } + ASSERT_EQ (can_div_trunc_p (ph::make (55, 11, 10), 10, + ", &const_rem), N == 1); + if (N == 1) + { + ASSERT_MUST_EQ (quot, ch::make (5)); + ASSERT_EQ (const_rem, C (5)); + } +} + +/* Test can_div_away_from_zero_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_can_div_away_from_zero_p () +{ + typedef poly_helper<T> ph; + + /* Test can_div_away_from_zero_p (T, T) -> C. */ + C const_quot; + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (8, 44, 28), + ph::make (2, 11, 7), + &const_quot)); + ASSERT_EQ (const_quot, C (4)); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (9, 23, 30), + ph::make (4, 8, 12), + &const_quot)); + ASSERT_EQ (const_quot, C (3)); + ASSERT_EQ (can_div_away_from_zero_p (ph::make (15, 25, 40), + ph::make (4, 8, 10), + &const_quot), N <= 2); + ASSERT_EQ (const_quot, C (N <= 2 ? 4 : 3)); + ASSERT_EQ (can_div_away_from_zero_p (ph::make (43, 79, 80), + ph::make (4, 8, 10), + &const_quot), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 11 : N == 2 ? 4 : 3)); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (3, 4, 5), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (1)); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (3, 4, 6), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (1)); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (3, 5, 5), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (1)); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (56, 0, 11), + ph::make (11, 0, 2), + &const_quot)); + ASSERT_EQ (const_quot, C (6)); + ASSERT_EQ (can_div_away_from_zero_p (ph::make (66, 1, 12), + ph::make (11, 0, 2), + &const_quot), N == 1); + ASSERT_EQ (const_quot, C (6)); + ASSERT_EQ (can_div_away_from_zero_p (ph::make (77, -1, 14), + ph::make (11, 0, 2), + &const_quot), N == 1); + ASSERT_EQ (const_quot, C (N == 1 ? 7 : 6)); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (89, 0, 0), + ph::make (11, 0, 0), + &const_quot)); + ASSERT_EQ (const_quot, C (9)); + ASSERT_EQ (can_div_away_from_zero_p (ph::make (101, 0, 1), + ph::make (11, 0, 0), + &const_quot), N <= 2); + ASSERT_EQ (const_quot, C (N <= 2 ? 10 : 9)); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (0, 0, 0), + ph::make (4, 5, 6), + &const_quot)); + ASSERT_EQ (const_quot, C (0)); +} + +/* Test known_size_p. */ + +template<unsigned int N, typename C, typename T> +static void +test_known_size_p () +{ + typedef poly_helper<T> ph; + + ASSERT_EQ (known_size_p (ph::make (-1, 0, -1)), N == 3); + ASSERT_EQ (known_size_p (ph::make (-1, -1, 0)), N >= 2); + ASSERT_EQ (known_size_p (ph::make (-1, -1, -1)), N >= 2); + ASSERT_FALSE (known_size_p (ph::make (-1, 0, 0))); + ASSERT_TRUE (known_size_p (ph::make (0, 0, 0))); + ASSERT_TRUE (known_size_p (ph::make (1, 0, 0))); +} + +/* Test maybe_in_range_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_maybe_in_range_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_FALSE (maybe_in_range_p (ch::make (4), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_FALSE (maybe_in_range_p (ph::make (4, 0, 0), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_FALSE (maybe_in_range_p (ph::make (4, 1, 2), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_TRUE (maybe_in_range_p (ph::make (5, 1, 2), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_EQ (maybe_in_range_p (ph::make (4, 0, 3), + ph::make (5, 1, 2), + ch::make (-1)), N == 3); + ASSERT_EQ (maybe_in_range_p (ph::make (4, 2, 0), + ph::make (5, 1, 2), + ch::make (-1)), N >= 2); + ASSERT_TRUE (maybe_in_range_p (ph::make (500, 100, 200), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_EQ (maybe_in_range_p (ph::make (6, 1, 0), + ph::make (5, 1, 1), + ch::make (1)), N == 3); + ASSERT_EQ (maybe_in_range_p (ph::make (6, 0, 1), + ph::make (5, 1, 1), + ch::make (1)), N >= 2); + ASSERT_FALSE (maybe_in_range_p (ph::make (14, 1, 2), + ph::make (5, 1, 2), + ch::make (9))); + ASSERT_FALSE (maybe_in_range_p (ph::make (14, 1, 2), + ch::make (5), + ph::make (9, 1, 2))); + ASSERT_FALSE (maybe_in_range_p (ph::make (15, 15, 17), + ph::make (8, 10, 11), + ph::make (7, 5, 6))); + ASSERT_EQ (maybe_in_range_p (ph::make (15, 15, 16), + ph::make (8, 10, 11), + ph::make (7, 5, 6)), N == 3); + ASSERT_EQ (maybe_in_range_p (ph::make (15, 14, 17), + ph::make (8, 10, 11), + ph::make (7, 5, 6)), N >= 2); + ASSERT_TRUE (maybe_in_range_p (ph::make (6, 100, 1000), + ph::make (5, 10, 11), + ph::make (2, 1, 2))); + ASSERT_FALSE (maybe_in_range_p (ph::make (6, 8, 2), + ph::make (6, 8, 2), + ch::make (0))); + ASSERT_EQ (maybe_in_range_p (ph::make (6, 8, 1), + ph::make (6, 7, 2), + ph::make (0, 1, 2)), N == 3); +} + +/* Test known_in_range_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_known_in_range_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_FALSE (known_in_range_p (ch::make (4), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_FALSE (known_in_range_p (ph::make (5, 1, 2), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_FALSE (known_in_range_p (ph::make (6, 2, 3), + ph::make (5, 1, 2), + ch::make (-1))); + ASSERT_FALSE (known_in_range_p (ph::make (6, 1, 0), + ph::make (5, 1, 1), + ch::make (1))); + ASSERT_FALSE (known_in_range_p (ph::make (6, 0, 1), + ph::make (5, 1, 1), + ch::make (1))); + ASSERT_EQ (known_in_range_p (ph::make (6, 1, 0), + ph::make (5, 1, 1), + ch::make (2)), N <= 2); + ASSERT_EQ (known_in_range_p (ph::make (6, 0, 1), + ph::make (5, 1, 1), + ch::make (2)), N == 1); + ASSERT_TRUE (known_in_range_p (ph::make (6, 4, 5), + ph::make (5, 1, 2), + ph::make (2, 3, 3))); + ASSERT_EQ (known_in_range_p (ph::make (6, 4, 6), + ph::make (5, 1, 2), + ph::make (2, 3, 3)), N <= 2); + ASSERT_EQ (known_in_range_p (ph::make (6, 5, 5), + ph::make (5, 1, 2), + ph::make (2, 3, 3)), N == 1); + ASSERT_FALSE (known_in_range_p (ph::make (6, 8, 2), + ph::make (6, 8, 2), + ch::make (0))); + ASSERT_FALSE (known_in_range_p (ph::make (6, 8, 1), + ph::make (6, 7, 2), + ph::make (0, 1, 2))); +} + +/* Test ranges_may_overlap_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_ranges_may_overlap_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_TRUE (ranges_may_overlap_p (ph::make (4, 1, 2), + ch::make (-1), + ph::make (500, 3, 5), + ch::make (1))); + ASSERT_FALSE (ranges_may_overlap_p (ph::make (100, 1, 5), + ch::make (-1), + ph::make (50, 1, 5), + ch::make (50))); + ASSERT_EQ (ranges_may_overlap_p (ph::make (100, 1, 5), + ch::make (-1), + ph::make (50, 0, 6), + ch::make (50)), N == 3); + ASSERT_EQ (ranges_may_overlap_p (ph::make (100, 1, 5), + ch::make (-1), + ph::make (50, 2, 0), + ch::make (50)), N >= 2); + ASSERT_TRUE (ranges_may_overlap_p (ph::make (500, 3, 5), + ch::make (1), + ph::make (4, 1, 2), + ch::make (-1))); + ASSERT_FALSE (ranges_may_overlap_p (ph::make (50, 1, 5), + ch::make (50), + ph::make (100, 1, 5), + ch::make (-1))); + ASSERT_FALSE (ranges_may_overlap_p (ph::make (10, 2, 3), + ch::make (0), + ch::make (0), + ph::make (20, 30, 40))); + ASSERT_EQ (ranges_may_overlap_p (ph::make (10, 2, 3), + ph::make (0, 1, 1), + ph::make (0, 1, 1), + ph::make (20, 30, 40)), N >= 2); + ASSERT_FALSE (ranges_may_overlap_p (ch::make (0), + ph::make (20, 30, 40), + ph::make (10, 2, 3), + ch::make (0))); + ASSERT_EQ (ranges_may_overlap_p (ph::make (0, 1, 1), + ph::make (20, 30, 40), + ph::make (10, 2, 3), + ph::make (0, 1, 0)), N >= 2); + ASSERT_TRUE (ranges_may_overlap_p (ph::make (8, 10, 15), + ph::make (2, 6, 20), + ch::make (7), + ch::make (2))); + ASSERT_FALSE (ranges_may_overlap_p (ph::make (8, 10, 15), + ph::make (2, 6, 20), + ch::make (6), + ph::make (2, 10, 15))); + ASSERT_EQ (ranges_may_overlap_p (ph::make (8, 10, 15), + ph::make (2, 6, 20), + ch::make (6), + ph::make (0, 0, 16)), N == 3); + ASSERT_EQ (ranges_may_overlap_p (ph::make (8, 10, 15), + ph::make (2, 6, 20), + ch::make (6), + ph::make (0, 11, 0)), N >= 2); + ASSERT_FALSE (ranges_may_overlap_p (ph::make (80, 4, 5), + ph::make (10, 6, 7), + ph::make (100, 10, 12), + ph::make (20, 1, 2))); + ASSERT_EQ (ranges_may_overlap_p (ph::make (80, 4, 5), + ph::make (10, 6, 7), + ph::make (100, 10, 11), + ph::make (0, 0, 2)), N == 3); + ASSERT_EQ (ranges_may_overlap_p (ph::make (80, 5, 5), + ph::make (0, 6, 0), + ph::make (100, 10, 12), + ph::make (20, 1, 2)), N >= 2); +} + +/* Test ranges_must_overlap_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_ranges_must_overlap_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_FALSE (ranges_must_overlap_p (ph::make (5, 1, 2), + ch::make (-1), + ch::make (4), + ch::make (2))); + ASSERT_FALSE (ranges_must_overlap_p (ch::make (9), + ph::make (2, 3, 4), + ch::make (10), + ch::make (-1))); + ASSERT_FALSE (ranges_must_overlap_p (ph::make (10, 2, 3), + ch::make (0), + ch::make (0), + ph::make (20, 30, 40))); + ASSERT_FALSE (ranges_must_overlap_p (ph::make (10, 2, 3), + ph::make (0, 1, 1), + ph::make (0, 1, 1), + ph::make (20, 30, 40))); + ASSERT_FALSE (ranges_must_overlap_p (ch::make (0), + ph::make (20, 30, 40), + ph::make (10, 2, 3), + ch::make (0))); + ASSERT_FALSE (ranges_must_overlap_p (ph::make (0, 1, 1), + ph::make (20, 30, 40), + ph::make (10, 2, 3), + ph::make (0, 1, 0))); + ASSERT_EQ (ranges_must_overlap_p (ph::make (5, 1, 2), + ch::make (1), + ch::make (4), + ch::make (2)), N == 1); + ASSERT_TRUE (ranges_must_overlap_p (ph::make (5, 1, 2), + ch::make (1), + ph::make (4, 1, 2), + ch::make (2))); + ASSERT_TRUE (ranges_must_overlap_p (ch::make (9), + ph::make (2, 3, 4), + ch::make (10), + ch::make (1))); + ASSERT_FALSE (ranges_must_overlap_p (ph::make (10, 11, 12), + ph::make (20, 30, 40), + ch::make (30), + ch::make (1))); + ASSERT_TRUE (ranges_must_overlap_p (ph::make (10, 11, 12), + ph::make (20, 30, 40), + ph::make (29, 41, 52), + ch::make (1))); + ASSERT_EQ (ranges_must_overlap_p (ph::make (10, 11, 12), + ph::make (20, 30, 40), + ph::make (29, 41, 53), + ch::make (1)), N <= 2); + ASSERT_EQ (ranges_must_overlap_p (ph::make (10, 11, 12), + ph::make (20, 30, 40), + ph::make (29, 42, 52), + ch::make (1)), N == 1); + ASSERT_TRUE (ranges_must_overlap_p (ph::make (29, 41, 52), + ch::make (1), + ph::make (10, 11, 12), + ph::make (20, 30, 40))); + ASSERT_EQ (ranges_must_overlap_p (ph::make (29, 41, 53), + ch::make (1), + ph::make (10, 11, 12), + ph::make (20, 30, 40)), N <= 2); + ASSERT_EQ (ranges_must_overlap_p (ph::make (29, 42, 52), + ch::make (1), + ph::make (10, 11, 12), + ph::make (20, 30, 40)), N == 1); + ASSERT_TRUE (ranges_must_overlap_p (ph::make (10, 0, 20), + ph::make (4, 4, 4), + ph::make (7, 3, 20), + ph::make (4, 4, 4))); +} + +/* Test known_subrange_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_known_subrange_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_FALSE (known_subrange_p (ph::make (5, 1, 2), + ch::make (-1), + ch::make (4), + ph::make (2, 2, 2))); + ASSERT_FALSE (known_subrange_p (ph::make (5, 2, 3), + ch::make (2), + ph::make (4, 1, 2), + ch::make (-1))); + ASSERT_FALSE (known_subrange_p (ph::make (6, 2, 3), + ph::make (0, 1, 1), + ch::make (4), + ph::make (3, 4, 11))); + ASSERT_TRUE (known_subrange_p (ph::make (6, 2, 3), + ph::make (1, 1, 1), + ch::make (4), + ph::make (3, 4, 11))); + ASSERT_FALSE (known_subrange_p (ph::make (6, 2, 3), + ph::make (1, 1, 1), + ch::make (4), + ph::make (2, 4, 11))); + ASSERT_TRUE (known_subrange_p (ph::make (10, 20, 30), + ph::make (5, 6, 7), + ph::make (9, 19, 29), + ph::make (6, 7, 8))); + ASSERT_EQ (known_subrange_p (ph::make (10, 20, 31), + ph::make (5, 6, 7), + ph::make (9, 19, 29), + ph::make (6, 7, 8)), N <= 2); + ASSERT_EQ (known_subrange_p (ph::make (10, 20, 30), + ph::make (5, 7, 7), + ph::make (9, 19, 29), + ph::make (6, 7, 8)), N == 1); + ASSERT_EQ (known_subrange_p (ph::make (10, 20, 30), + ph::make (5, 6, 7), + ph::make (9, 18, 29), + ph::make (6, 7, 8)), N == 1); + ASSERT_EQ (known_subrange_p (ph::make (10, 20, 30), + ph::make (5, 6, 7), + ph::make (9, 19, 29), + ph::make (6, 6, 8)), N == 1); +} + +/* Test coeffs_in_range_p for both signed and unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_coeffs_in_range_p (void) +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_TRUE (coeffs_in_range_p (ph::make (10, 20, 30), 10, 30)); + ASSERT_EQ (coeffs_in_range_p (ph::make (1, 10, 19), 0, 11), N <= 2); + ASSERT_EQ (coeffs_in_range_p (ph::make (100, 1, 102), 10, 100), N == 1); + ASSERT_FALSE (coeffs_in_range_p (ph::make (10, 11, 12), 7, 9)); + ASSERT_FALSE (coeffs_in_range_p (ph::make (10, 11, 12), 13, 15)); +} + +/* Test may_eq for poly_int<2, C>, given that C is signed. */ + +template<typename C> +static void +test_signed_may_eq_2 () +{ + typedef poly_int<2, C> T; + + /* Test may_eq (T, C). */ + ASSERT_TRUE (may_eq (T (4, -4), 0)); + ASSERT_FALSE (may_eq (T (4, -4), 1)); + ASSERT_TRUE (may_eq (T (4, -4), 4)); + ASSERT_FALSE (may_eq (T (4, -4), 8)); + ASSERT_TRUE (may_eq (T (4, -4), -4)); + ASSERT_FALSE (may_eq (T (4, -4), -3)); + + /* Test may_eq (C, T). */ + ASSERT_FALSE (may_eq (0, T (4, -3))); + ASSERT_TRUE (may_eq (1, T (4, -3))); + ASSERT_TRUE (may_eq (4, T (4, -3))); + ASSERT_FALSE (may_eq (7, T (4, -3))); + ASSERT_FALSE (may_eq (T (4, -3), -3)); + ASSERT_TRUE (may_eq (T (4, -3), -2)); + + /* Test may_eq (T, T). */ + ASSERT_TRUE (may_eq (T (0, 3), T (6, 1))); + ASSERT_FALSE (may_eq (T (0, -3), T (6, 1))); + ASSERT_FALSE (may_eq (T (0, 3), T (7, 1))); + ASSERT_TRUE (may_eq (T (-3, 4), T (7, -1))); + ASSERT_FALSE (may_eq (T (-3, 4), T (6, -1))); +} + +/* Test must_ne for poly_int<2, C>, given that C is signed. */ + +template<typename C> +static void +test_signed_must_ne_2 () +{ + typedef poly_int<2, C> T; + + /* Test must_ne (T, C). */ + ASSERT_FALSE (must_ne (T (4, -4), 0)); + ASSERT_TRUE (must_ne (T (4, -4), 1)); + ASSERT_FALSE (must_ne (T (4, -4), 4)); + ASSERT_TRUE (must_ne (T (4, -4), 8)); + ASSERT_FALSE (must_ne (T (4, -4), -4)); + ASSERT_TRUE (must_ne (T (4, -4), -3)); + + /* Test must_ne (C, T). */ + ASSERT_TRUE (must_ne (0, T (4, -3))); + ASSERT_FALSE (must_ne (1, T (4, -3))); + ASSERT_FALSE (must_ne (4, T (4, -3))); + ASSERT_TRUE (must_ne (7, T (4, -3))); + ASSERT_TRUE (must_ne (T (4, -3), -3)); + ASSERT_FALSE (must_ne (T (4, -3), -2)); + + /* Test must_ne (T, T). */ + ASSERT_FALSE (must_ne (T (0, 3), T (6, 1))); + ASSERT_TRUE (must_ne (T (0, -3), T (6, 1))); + ASSERT_TRUE (must_ne (T (0, 3), T (7, 1))); + ASSERT_FALSE (must_ne (T (-3, 4), T (7, -1))); + ASSERT_TRUE (must_ne (T (-3, 4), T (6, -1))); +} + +/* Test negation for signed C, both via operators and wi::. */ + +template<unsigned int N, typename C, typename RC, typename T> +static void +test_signed_negation () +{ + typedef poly_helper<T> ph; + typedef poly_helper< poly_int<N, RC> > rph; + typedef poly_helper< poly_int<N, int> > iph; + + /* Test unary -. */ + ASSERT_MUST_EQ (-ph::make (-11, 22, -33), + rph::make (11, -22, 33)); + + /* Test wi::neg. */ + ASSERT_MUST_EQ (wi::neg (ph::make (-11, 22, -33)), + iph::make (11, -22, 33)); +} + +/* Test may_le for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_may_le () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_le (T, C). */ + ASSERT_EQ (may_le (ph::make (3, 5, -1), ch::make (2)), N == 3); + ASSERT_EQ (may_le (ph::make (40, -10, 60), ch::make (15)), N >= 2); + ASSERT_TRUE (may_le (ph::make (-14, 0, 0), ch::make (13))); + + /* Test may_le (C, T). */ + ASSERT_EQ (may_le (ch::make (4), ph::make (3, 5, -1)), N >= 2); + ASSERT_EQ (may_le (ch::make (41), ph::make (40, -10, 60)), N == 3); + ASSERT_TRUE (may_le (ch::make (-15), ph::make (11, 0, 0))); + + /* Test may_le (T, T). */ + ASSERT_EQ (may_le (ph::make (-2, 4, -2), + ph::make (-3, -5, -1)), N == 3); + ASSERT_EQ (may_le (ph::make (-2, -6, 0), + ph::make (-3, 0, 100)), N >= 2); + ASSERT_FALSE (may_le (ph::make (-2, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test may_lt for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_may_lt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_lt (T, C). */ + ASSERT_EQ (may_lt (ph::make (3, 5, -1), ch::make (2)), N == 3); + ASSERT_EQ (may_lt (ph::make (40, -10, 60), ch::make (15)), N >= 2); + ASSERT_TRUE (may_lt (ph::make (-18, 0, 0), ch::make (18))); + ASSERT_EQ (may_lt (ph::make (-2, -2, -2), ch::make (-2)), N >= 2); + + /* Test may_lt (C, T). */ + ASSERT_EQ (may_lt (ch::make (4), ph::make (3, 5, -1)), N >= 2); + ASSERT_EQ (may_lt (ch::make (41), ph::make (40, -10, 60)), N == 3); + ASSERT_TRUE (may_lt (ch::make (-45), ph::make (40, 0, 0))); + ASSERT_FALSE (may_lt (ch::make (-2), ph::make (-2, -2, -2))); + + /* Test may_lt (T, T). */ + ASSERT_EQ (may_lt (ph::make (-3, 4, -2), + ph::make (-3, -5, -1)), N == 3); + ASSERT_EQ (may_lt (ph::make (-3, -6, 0), + ph::make (-3, 0, 100)), N >= 2); + ASSERT_FALSE (may_lt (ph::make (-3, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test may_ge for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_may_ge () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_ge (T, C). */ + ASSERT_EQ (may_ge (ph::make (3, 5, -1), ch::make (4)), N >= 2); + ASSERT_EQ (may_ge (ph::make (40, -10, 60), ch::make (41)), N == 3); + ASSERT_TRUE (may_ge (ph::make (11, 0, 0), ch::make (-15))); + + /* Test may_ge (C, T). */ + ASSERT_EQ (may_ge (ch::make (2), ph::make (3, 5, -1)), N == 3); + ASSERT_EQ (may_ge (ch::make (15), ph::make (40, -10, 60)), N >= 2); + ASSERT_TRUE (may_ge (ch::make (13), ph::make (-14, 0, 0))); + + /* Test may_ge (T, T). */ + ASSERT_EQ (may_ge (ph::make (-3, -5, -1), + ph::make (-2, 4, -2)), N == 3); + ASSERT_EQ (may_ge (ph::make (-3, 0, 100), + ph::make (-2, -6, 0)), N >= 2); + ASSERT_FALSE (may_ge (ph::make (-3, 4, 0), + ph::make (-2, 5, 1))); +} + +/* Test may_gt for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_may_gt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_gt (T, C). */ + ASSERT_EQ (may_gt (ph::make (3, 5, -1), ch::make (4)), N >= 2); + ASSERT_EQ (may_gt (ph::make (40, -10, 60), ch::make (41)), N == 3); + ASSERT_TRUE (may_gt (ph::make (40, 0, 0), ch::make (-45))); + ASSERT_FALSE (may_gt (ph::make (-2, -2, -2), ch::make (-2))); + + /* Test may_gt (C, T). */ + ASSERT_EQ (may_gt (ch::make (2), ph::make (3, 5, -1)), N == 3); + ASSERT_EQ (may_gt (ch::make (15), ph::make (40, -10, 60)), N >= 2); + ASSERT_TRUE (may_gt (ch::make (18), ph::make (-18, 0, 0))); + ASSERT_EQ (may_gt (ch::make (-2), ph::make (-2, -2, -2)), N >= 2); + + /* Test may_gt (T, T). */ + ASSERT_EQ (may_gt (ph::make (-3, -5, -1), + ph::make (-3, 4, -2)), N == 3); + ASSERT_EQ (may_gt (ph::make (-3, 0, 100), + ph::make (-3, -6, 0)), N >= 2); + ASSERT_FALSE (may_gt (ph::make (-3, 4, 0), + ph::make (-3, 5, 1))); +} + +/* Test must_gt for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_must_gt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_gt (T, C). */ + ASSERT_EQ (must_gt (ph::make (3, 5, -1), ch::make (2)), N <= 2); + ASSERT_EQ (must_gt (ph::make (40, -10, 60), ch::make (15)), N == 1); + ASSERT_FALSE (must_gt (ph::make (-14, 0, 0), ch::make (13))); + + /* Test must_gt (C, T). */ + ASSERT_EQ (must_gt (ch::make (4), ph::make (3, 5, -1)), N == 1); + ASSERT_EQ (must_gt (ch::make (41), ph::make (40, -10, 60)), N <= 2); + ASSERT_FALSE (must_gt (ch::make (-15), ph::make (11, 0, 0))); + + /* Test must_gt (T, T). */ + ASSERT_EQ (must_gt (ph::make (-2, 4, -2), + ph::make (-3, -5, -1)), N <= 2); + ASSERT_EQ (must_gt (ph::make (-2, -6, 0), + ph::make (-3, 0, 100)), N == 1); + ASSERT_TRUE (must_gt (ph::make (-2, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test must_ge for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_must_ge () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_ge (T, C). */ + ASSERT_EQ (must_ge (ph::make (3, 5, -1), ch::make (2)), N <= 2); + ASSERT_EQ (must_ge (ph::make (40, -10, 60), ch::make (15)), N == 1); + ASSERT_FALSE (must_ge (ph::make (-18, 0, 0), ch::make (18))); + ASSERT_EQ (must_ge (ph::make (-2, -2, -2), ch::make (-2)), N == 1); + + /* Test must_ge (C, T). */ + ASSERT_EQ (must_ge (ch::make (4), ph::make (3, 5, -1)), N == 1); + ASSERT_EQ (must_ge (ch::make (41), ph::make (40, -10, 60)), N <= 2); + ASSERT_FALSE (must_ge (ch::make (-45), ph::make (40, 0, 0))); + ASSERT_TRUE (must_ge (ch::make (-2), ph::make (-2, -2, -2))); + + /* Test must_ge (T, T). */ + ASSERT_EQ (must_ge (ph::make (-3, 4, -2), + ph::make (-3, -5, -1)), N <= 2); + ASSERT_EQ (must_ge (ph::make (-3, -6, 0), + ph::make (-3, 0, 100)), N == 1); + ASSERT_TRUE (must_ge (ph::make (-3, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test must_lt for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_must_lt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_lt (T, C). */ + ASSERT_EQ (must_lt (ph::make (3, 5, -1), ch::make (4)), N == 1); + ASSERT_EQ (must_lt (ph::make (40, -10, 60), ch::make (41)), N <= 2); + ASSERT_FALSE (must_lt (ph::make (11, 0, 0), ch::make (-15))); + + /* Test must_lt (C, T). */ + ASSERT_EQ (must_lt (ch::make (2), ph::make (3, 5, -1)), N <= 2); + ASSERT_EQ (must_lt (ch::make (15), ph::make (40, -10, 60)), N == 1); + ASSERT_FALSE (must_lt (ch::make (13), ph::make (-14, 0, 0))); + + /* Test must_lt (T, T). */ + ASSERT_EQ (must_lt (ph::make (-3, -5, -1), + ph::make (-2, 4, -2)), N <= 2); + ASSERT_EQ (must_lt (ph::make (-3, 0, 100), + ph::make (-2, -6, 0)), N == 1); + ASSERT_TRUE (must_lt (ph::make (-3, 4, 0), + ph::make (-2, 5, 1))); +} + +/* Test must_le for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_must_le () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_le (T, C). */ + ASSERT_EQ (must_le (ph::make (3, 5, -1), ch::make (4)), N == 1); + ASSERT_EQ (must_le (ph::make (40, -10, 60), ch::make (41)), N <= 2); + ASSERT_FALSE (must_le (ph::make (40, 0, 0), ch::make (-45))); + ASSERT_TRUE (must_le (ph::make (-2, -2, -2), ch::make (-2))); + + /* Test must_le (C, T). */ + ASSERT_EQ (must_le (ch::make (2), ph::make (3, 5, -1)), N <= 2); + ASSERT_EQ (must_le (ch::make (15), ph::make (40, -10, 60)), N == 1); + ASSERT_FALSE (must_le (ch::make (18), ph::make (-18, 0, 0))); + ASSERT_EQ (must_le (ch::make (-2), ph::make (-2, -2, -2)), N == 1); + + /* Test must_le (T, T). */ + ASSERT_EQ (must_le (ph::make (-3, -5, -1), + ph::make (-3, 4, -2)), N <= 2); + ASSERT_EQ (must_le (ph::make (-3, 0, 100), + ph::make (-3, -6, 0)), N == 1); + ASSERT_TRUE (must_le (ph::make (-3, 4, 0), + ph::make (-3, 5, 1))); +} + +/* Test ordered_p for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_ordered_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_p (T, C). */ + ASSERT_EQ (ordered_p (ph::make (3, 5, -1), ch::make (4)), N == 1); + ASSERT_EQ (ordered_p (ph::make (3, 5, -1), ch::make (3)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (3, 5, -1), ch::make (2)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (40, -10, 60), ch::make (41)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (40, -10, 60), ch::make (40)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (40, -10, 60), ch::make (39)), N == 1); + ASSERT_EQ (ordered_p (ph::make (4, -4, -4), ch::make (0)), N == 1); + ASSERT_EQ (ordered_p (ph::make (4, 0, -4), ch::make (0)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (4, 4, -4), ch::make (0)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (-4, 4, 4), ch::make (0)), N == 1); + ASSERT_EQ (ordered_p (ph::make (-4, 0, 4), ch::make (0)), N <= 2); + ASSERT_EQ (ordered_p (ph::make (-4, -4, 4), ch::make (0)), N <= 2); + + /* Test ordered_p (C, T). */ + ASSERT_EQ (ordered_p (ch::make (4), ph::make (3, 5, -1)), N == 1); + ASSERT_EQ (ordered_p (ch::make (3), ph::make (3, 5, -1)), N <= 2); + ASSERT_EQ (ordered_p (ch::make (2), ph::make (3, 5, -1)), N <= 2); + ASSERT_EQ (ordered_p (ch::make (41), ph::make (40, -10, 60)), N <= 2); + ASSERT_EQ (ordered_p (ch::make (40), ph::make (40, -10, 60)), N <= 2); + ASSERT_EQ (ordered_p (ch::make (39), ph::make (40, -10, 60)), N == 1); + ASSERT_EQ (ordered_p (ch::make (0), ph::make (4, -4, -4)), N == 1); + ASSERT_EQ (ordered_p (ch::make (0), ph::make (4, 0, -4)), N <= 2); + ASSERT_EQ (ordered_p (ch::make (0), ph::make (4, 4, -4)), N <= 2); + ASSERT_EQ (ordered_p (ch::make (0), ph::make (-4, 4, 4)), N == 1); + ASSERT_EQ (ordered_p (ch::make (0), ph::make (-4, 0, 4)), N <= 2); + ASSERT_EQ (ordered_p (ch::make (0), ph::make (-4, -4, 4)), N <= 2); +} + +/* Test ordered_min for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_ordered_min () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_min (T, C). */ + ASSERT_MUST_EQ (ordered_min (ph::make (4, -12, -14), ch::make (5)), + ph::make (4, -12, -14)); + + /* Test ordered_min (C, T). */ + ASSERT_MUST_EQ (ordered_min (ch::make (9), ph::make (9, -90, -77)), + ph::make (9, -90, -77)); + + /* Test ordered_min (T, T). */ + ASSERT_MUST_EQ (ordered_min (ph::make (4, 9, 17), ph::make (4, -1, 17)), + ph::make (4, -1, 17)); +} + +/* Test ordered_max for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_ordered_max () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_max (T, C). */ + ASSERT_MUST_EQ (ordered_max (ph::make (4, -12, -14), ch::make (5)), + ch::make (5)); + + /* Test ordered_max (C, T). */ + ASSERT_MUST_EQ (ordered_max (ch::make (9), ph::make (9, -90, -77)), + ch::make (9)); + + /* Test ordered_max (T, T). */ + ASSERT_MUST_EQ (ordered_max (ph::make (4, 9, 17), ph::make (4, -1, 17)), + ph::make (4, 9, 17)); +} + +/* Test lower_bound for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_lower_bound () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test lower_bound (T, C). */ + ASSERT_MUST_EQ (lower_bound (ph::make (4, -1, 3), ch::make (5)), + ph::make (4, -1, 0)); + ASSERT_MUST_EQ (lower_bound (ph::make (6, 5, -14), ch::make (-11)), + ph::make (-11, 0, -14)); + + /* Test lower_bound (C, T). */ + ASSERT_MUST_EQ (lower_bound (ch::make (5), ph::make (4, -1, 3)), + ph::make (4, -1, 0)); + ASSERT_MUST_EQ (lower_bound (ch::make (-11), ph::make (6, 5, -14)), + ph::make (-11, 0, -14)); + + /* Test lower_bound (T, T). */ + ASSERT_MUST_EQ (lower_bound (ph::make (4, -1, 3), ph::make (5, 7, -2)), + ph::make (4, -1, -2)); + ASSERT_MUST_EQ (lower_bound (ph::make (6, 5, -14), ph::make (-11, 4, 3)), + ph::make (-11, 4, -14)); +} + +/* Test upper_bound for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_upper_bound () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test upper_bound (T, C). */ + ASSERT_MUST_EQ (upper_bound (ph::make (4, -1, 3), ch::make (5)), + ph::make (5, 0, 3)); + ASSERT_MUST_EQ (upper_bound (ph::make (6, 5, -14), ch::make (-11)), + ph::make (6, 5, 0)); + + /* Test upper_bound (C, T). */ + ASSERT_MUST_EQ (upper_bound (ch::make (5), ph::make (4, -1, 3)), + ph::make (5, 0, 3)); + ASSERT_MUST_EQ (upper_bound (ch::make (-11), ph::make (6, 5, -14)), + ph::make (6, 5, 0)); + + /* Test upper_bound (T, T). */ + ASSERT_MUST_EQ (upper_bound (ph::make (4, -1, 3), ph::make (5, 7, -2)), + ph::make (5, 7, 3)); + ASSERT_MUST_EQ (upper_bound (ph::make (6, 5, -14), ph::make (-11, 4, 3)), + ph::make (6, 5, 3)); +} + +/* Test constant_multiple_p for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_constant_multiple_p () +{ + typedef poly_helper<T> ph; + + /* Test constant_multiple_p (T, C). */ + C const_multiple; + ASSERT_TRUE (constant_multiple_p (ph::make (-45, 0, 0), 9, + &const_multiple)); + ASSERT_EQ (const_multiple, -5); + ASSERT_TRUE (constant_multiple_p (ph::make (63, 0, 0), -7, + &const_multiple)); + ASSERT_EQ (const_multiple, -9); + ASSERT_FALSE (constant_multiple_p (ph::make (-121, 0, 0), -12, + &const_multiple)); + ASSERT_TRUE (constant_multiple_p (ph::make (-120, 0, 0), -12, + &const_multiple)); + ASSERT_EQ (const_multiple, 10); + ASSERT_FALSE (constant_multiple_p (ph::make (-119, 0, 0), -12, + &const_multiple)); + ASSERT_EQ (constant_multiple_p (ph::make (-120, -23, 12), 12, + &const_multiple), N == 1); + if (N == 1) + ASSERT_EQ (const_multiple, -10); + + /* Test constant_multiple_p (C, T). */ + ASSERT_TRUE (constant_multiple_p (-45, ph::make (9, 0, 0), + &const_multiple)); + ASSERT_EQ (const_multiple, -5); + ASSERT_TRUE (constant_multiple_p (63, ph::make (-7, 0, 0), + &const_multiple)); + ASSERT_EQ (const_multiple, -9); + ASSERT_FALSE (constant_multiple_p (-121, ph::make (-12, 0, 0), + &const_multiple)); + ASSERT_TRUE (constant_multiple_p (-120, ph::make (-12, 0, 0), + &const_multiple)); + ASSERT_EQ (const_multiple, 10); + ASSERT_FALSE (constant_multiple_p (-119, ph::make (-12, 0, 0), + &const_multiple)); + ASSERT_EQ (constant_multiple_p (-120, ph::make (12, 10, 6), + &const_multiple), N == 1); + if (N == 1) + ASSERT_EQ (const_multiple, -10); + + ASSERT_TRUE (constant_multiple_p (ph::make (-40, 80, -200), + ph::make (2, -4, 10), + &const_multiple)); + ASSERT_EQ (const_multiple, -20); + ASSERT_EQ (constant_multiple_p (ph::make (-20, 40, 100), + ph::make (2, -4, 10), + &const_multiple), N <= 2); + ASSERT_EQ (const_multiple, N <= 2 ? -10 : -20); + ASSERT_EQ (constant_multiple_p (ph::make (-10, -20, -50), + ph::make (2, -4, 10), + &const_multiple), N == 1); + ASSERT_EQ (const_multiple, N == 1 ? -5 : N == 2 ? -10 : -20); + ASSERT_FALSE (constant_multiple_p (ph::make (-31, 0, 0), + ph::make (-6, 0, 0), + &const_multiple)); + ASSERT_TRUE (constant_multiple_p (ph::make (-30, 0, 0), + ph::make (-6, 0, 0), + &const_multiple)); + ASSERT_EQ (const_multiple, 5); + ASSERT_FALSE (constant_multiple_p (ph::make (-29, 0, 0), + ph::make (-6, 0, 0), + &const_multiple)); +} + +/* Test multiple_p for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_multiple_p () +{ + typedef poly_helper<T> ph; + + /* Test multiple_p (T, C). */ + ASSERT_TRUE (multiple_p (ph::make (-45, 36, 0), 9)); + ASSERT_TRUE (multiple_p (ph::make (63, 0, -14), -7)); + ASSERT_FALSE (multiple_p (ph::make (-121, 0, 0), -12)); + ASSERT_TRUE (multiple_p (ph::make (-120, 0, 0), -12)); + ASSERT_FALSE (multiple_p (ph::make (-119, 0, 0), -12)); + ASSERT_TRUE (multiple_p (ph::make (-120, -24, 12), 12)); + ASSERT_EQ (multiple_p (ph::make (-120, -24, 11), 12), N <= 2); + ASSERT_EQ (multiple_p (ph::make (-120, -23, 12), 12), N == 1); + + /* Test multiple_p (C, T). */ + ASSERT_TRUE (multiple_p (-45, ph::make (9, 0, 0))); + ASSERT_TRUE (multiple_p (63, ph::make (-7, 0, 0))); + ASSERT_FALSE (multiple_p (-121, ph::make (-12, 0, 0))); + ASSERT_TRUE (multiple_p (-120, ph::make (-12, 0, 0))); + ASSERT_FALSE (multiple_p (-119, ph::make (-12, 0, 0))); + ASSERT_EQ (multiple_p (-120, ph::make (12, 10, 6)), N == 1); + + /* Test multiple_p (T, T). */ + ASSERT_TRUE (multiple_p (ph::make (-40, 80, -200), + ph::make (2, -4, 10))); + ASSERT_EQ (multiple_p (ph::make (-20, 40, 100), + ph::make (2, -4, 10)), N <= 2); + ASSERT_EQ (multiple_p (ph::make (-10, -20, -50), + ph::make (2, -4, 10)), N == 1); + ASSERT_FALSE (multiple_p (ph::make (-31, 0, 0), + ph::make (-6, 0, 0))); + ASSERT_TRUE (multiple_p (ph::make (-30, 0, 0), + ph::make (-6, 0, 0))); + ASSERT_FALSE (multiple_p (ph::make (-29, 0, 0), + ph::make (-6, 0, 0))); +} + +/* Test the 3-operand form of multiple_p for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_multiple_p_with_result () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test multiple_p (T, C) -> T. */ + T multiple; + ASSERT_TRUE (multiple_p (ph::make (-45, 36, 0), 9, &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (-5, 4, 0)); + ASSERT_TRUE (multiple_p (ph::make (63, 0, -14), -7, &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (-9, 0, 2)); + ASSERT_FALSE (multiple_p (ph::make (-121, 0, 0), -12, &multiple)); + ASSERT_TRUE (multiple_p (ph::make (-120, 0, 0), -12, &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (10)); + ASSERT_FALSE (multiple_p (ph::make (-119, 0, 0), -12, &multiple)); + ASSERT_TRUE (multiple_p (ph::make (-120, -24, 12), 12, &multiple)); + ASSERT_MUST_EQ (multiple, ph::make (-10, -2, 1)); + ASSERT_EQ (multiple_p (ph::make (-120, -24, 11), 12, &multiple), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (multiple, ph::make (-10, -2, 0)); + ASSERT_EQ (multiple_p (ph::make (-120, -23, 12), 12, &multiple), N == 1); + if (N == 1) + ASSERT_MUST_EQ (multiple, ch::make (-10)); + + /* Test multiple_p (C, T) -> T. */ + ASSERT_TRUE (multiple_p (-45, ph::make (9, 0, 0), &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (-5)); + ASSERT_TRUE (multiple_p (63, ph::make (-7, 0, 0), &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (-9)); + ASSERT_FALSE (multiple_p (-121, ph::make (-12, 0, 0), &multiple)); + ASSERT_TRUE (multiple_p (-120, ph::make (-12, 0, 0), &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (10)); + ASSERT_FALSE (multiple_p (-119, ph::make (-12, 0, 0), &multiple)); + ASSERT_EQ (multiple_p (-120, ph::make (12, 10, 6), &multiple), N == 1); + ASSERT_MUST_EQ (multiple, ch::make (N == 1 ? -10 : 10)); + + /* Test multiple_p (T, T) -> T. */ + ASSERT_TRUE (multiple_p (ph::make (-40, 80, -200), + ph::make (2, -4, 10), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (-20)); + ASSERT_EQ (multiple_p (ph::make (-20, 40, 100), + ph::make (2, -4, 10), + &multiple), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (multiple, ch::make (-10)); + ASSERT_EQ (multiple_p (ph::make (-10, -20, -50), + ph::make (2, -4, 10), + &multiple), N == 1); + if (N == 1) + ASSERT_MUST_EQ (multiple, ch::make (-5)); + ASSERT_FALSE (multiple_p (ph::make (-31, 0, 0), + ph::make (-6, 0, 0), + &multiple)); + ASSERT_TRUE (multiple_p (ph::make (-30, 0, 0), + ph::make (-6, 0, 0), + &multiple)); + ASSERT_MUST_EQ (multiple, ch::make (5)); + ASSERT_FALSE (multiple_p (ph::make (-29, 0, 0), + ph::make (-6, 0, 0), + &multiple)); +} + +/* Test exact_div for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_exact_div () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test exact_div (T, C). */ + ASSERT_MUST_EQ (exact_div (ph::make (-45, 36, 0), 9), + ph::make (-5, 4, 0)); + ASSERT_MUST_EQ (exact_div (ph::make (63, 0, -14), -7), + ph::make (-9, 0, 2)); + ASSERT_MUST_EQ (exact_div (ph::make (-120, 0, 0), -12), + ch::make (10)); + ASSERT_MUST_EQ (exact_div (ph::make (-120, -24, 12), 12), + ph::make (-10, -2, 1)); + + /* Test exact_div (T, T). */ + ASSERT_MUST_EQ (exact_div (ph::make (-40, 80, -200), + ph::make (2, -4, 10)), + ch::make (-20)); + ASSERT_MUST_EQ (exact_div (ph::make (-30, 0, 0), + ph::make (-6, 0, 0)), + ch::make (5)); +} + +/* Test the form of can_div_trunc_p that returns a constant, for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_can_div_trunc_p_const () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test can_div_trunc_p (T, C) -> C. */ + C const_quot; + ASSERT_TRUE (can_div_trunc_p (ph::make (-31, 0, 0), 10, &const_quot)); + ASSERT_MUST_EQ (const_quot, -3); + ASSERT_TRUE (can_div_trunc_p (ph::make (-29, 0, 0), 10, &const_quot)); + ASSERT_MUST_EQ (const_quot, -2); + + /* Test can_div_trunc_p (T, T) -> C. */ + ASSERT_TRUE (can_div_trunc_p (ph::make (-10, 25, -15), + ph::make (2, -5, 3), + &const_quot)); + ASSERT_EQ (const_quot, -5); + /* (-5 + 2x) / (-3 + 2x) != 1 when x == 1. */ + ASSERT_EQ (can_div_trunc_p (ph::make (-5, 2, 0), + ph::make (-3, 2, 0), + &const_quot), N == 1); + ASSERT_EQ (const_quot, N == 1 ? 1 : -5); + /* Similarly for the third coefficient. */ + ASSERT_EQ (can_div_trunc_p (ph::make (-5, -5, 2), + ph::make (-3, -3, 2), + &const_quot), N <= 2); + ASSERT_EQ (const_quot, N <= 2 ? 1 : -5); + /* (-15 + 3x) / (-12 + 2x) != 1 when x == 7. */ + ASSERT_EQ (can_div_trunc_p (ph::make (-15, 3, 0), + ph::make (-12, 2, 0), + &const_quot), N == 1); + ASSERT_EQ (const_quot, N <= 2 ? 1 : -5); + ASSERT_TRUE (can_div_trunc_p (ph::make (-21, -18, -14), + ph::make (5, 4, 3), + &const_quot)); + ASSERT_EQ (const_quot, -4); + ASSERT_TRUE (can_div_trunc_p (ph::make (18, 9, 13), + ph::make (-8, -4, -5), + &const_quot)); + ASSERT_EQ (const_quot, -2); + + /* Test can_div_trunc_p (T, T) -> C, T. */ + T rem; + ASSERT_TRUE (can_div_trunc_p (ph::make (-10, 25, -15), + ph::make (2, -5, 3), + &const_quot, &rem)); + ASSERT_EQ (const_quot, -5); + ASSERT_MUST_EQ (rem, ch::make (0)); + /* (-5 + 2x) / (-3 + 2x) != 1 when x == 1. */ + ASSERT_EQ (can_div_trunc_p (ph::make (-5, 2, 0), + ph::make (-3, 2, 0), + &const_quot, &rem), N == 1); + ASSERT_MUST_EQ (const_quot, N == 1 ? 1 : -5); + if (N == 1) + ASSERT_MUST_EQ (rem, ch::make (-2)); + /* Similarly for the third coefficient. */ + ASSERT_EQ (can_div_trunc_p (ph::make (-5, -5, 2), + ph::make (-3, -3, 2), + &const_quot, &rem), N <= 2); + ASSERT_MUST_EQ (const_quot, N <= 2 ? 1 : -5); + if (N <= 2) + ASSERT_MUST_EQ (rem, ph::make (-2, -2, 0)); + /* (-15 + 3x) / (-12 + 2x) != 1 when x == 7. */ + ASSERT_EQ (can_div_trunc_p (ph::make (-15, 3, 0), + ph::make (-12, 2, 0), + &const_quot, &rem), N == 1); + ASSERT_MUST_EQ (const_quot, N <= 2 ? 1 : -5); + if (N == 1) + ASSERT_MUST_EQ (rem, ch::make (-3)); + ASSERT_TRUE (can_div_trunc_p (ph::make (-21, -18, -14), + ph::make (5, 4, 3), + &const_quot, &rem)); + ASSERT_EQ (const_quot, -4); + ASSERT_MUST_EQ (rem, ph::make (-1, -2, -2)); + ASSERT_TRUE (can_div_trunc_p (ph::make (18, 9, 13), + ph::make (-8, -4, -5), + &const_quot, &rem)); + ASSERT_EQ (const_quot, -2); + ASSERT_MUST_EQ (rem, ph::make (2, 1, 3)); +} + +/* Test the form of can_div_trunc_p that returns a poly_int, for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_can_div_trunc_p_poly () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test can_div_trunc_p (T, C) -> T. */ + T quot; + ASSERT_TRUE (can_div_trunc_p (ph::make (-99, 0, 0), 10, ")); + ASSERT_MUST_EQ (quot, ch::make (-9)); + ASSERT_TRUE (can_div_trunc_p (ph::make (7, -63, 81), 9, ")); + ASSERT_MUST_EQ (quot, ph::make (0, -7, 9)); + ASSERT_TRUE (can_div_trunc_p (ph::make (15, 44, -55), -11, ")); + ASSERT_MUST_EQ (quot, ph::make (-1, -4, 5)); + ASSERT_EQ (can_div_trunc_p (ph::make (-63, -24, -17), -8, "), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (quot, ph::make (7, 3, 0)); + ASSERT_EQ (can_div_trunc_p (ph::make (40, 48, 70), -7, "), N == 1); + if (N == 1) + ASSERT_MUST_EQ (quot, ch::make (-5)); + + /* Test can_div_trunc_p (T, C) -> T, C. */ + C const_rem; + ASSERT_TRUE (can_div_trunc_p (ph::make (-99, 0, 0), 10, + ", &const_rem)); + ASSERT_MUST_EQ (quot, ch::make (-9)); + ASSERT_EQ (const_rem, -9); + ASSERT_TRUE (can_div_trunc_p (ph::make (7, -63, 81), 9, + ", &const_rem)); + ASSERT_MUST_EQ (quot, ph::make (0, -7, 9)); + ASSERT_EQ (const_rem, 7); + ASSERT_TRUE (can_div_trunc_p (ph::make (15, 44, -55), -11, + ", &const_rem)); + ASSERT_MUST_EQ (quot, ph::make (-1, -4, 5)); + ASSERT_EQ (const_rem, 4); + ASSERT_EQ (can_div_trunc_p (ph::make (-63, -24, -17), -8, + ", &const_rem), N <= 2); + if (N <= 2) + { + ASSERT_MUST_EQ (quot, ph::make (7, 3, 0)); + ASSERT_EQ (const_rem, -7); + } + ASSERT_EQ (can_div_trunc_p (ph::make (40, 48, 70), -7, + ", &const_rem), N == 1); + if (N == 1) + { + ASSERT_MUST_EQ (quot, ch::make (-5)); + ASSERT_EQ (const_rem, 5); + } +} + +/* Test can_div_away_from_zero_p for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_can_div_away_from_zero_p () +{ + typedef poly_helper<T> ph; + + /* Test can_div_away_from_zero_p (T, T) -> C. */ + C const_quot; + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (-10, 25, -15), + ph::make (2, -5, 3), + &const_quot)); + ASSERT_EQ (const_quot, -5); + /* (-5 + 2x) / (-3 + 2x) != 1 when x == 1. */ + ASSERT_EQ (can_div_away_from_zero_p (ph::make (-5, 2, 0), + ph::make (-3, 2, 0), + &const_quot), N == 1); + ASSERT_EQ (const_quot, N == 1 ? 2 : -5); + /* Similarly for the third coefficient. */ + ASSERT_EQ (can_div_away_from_zero_p (ph::make (-5, -5, 2), + ph::make (-3, -3, 2), + &const_quot), N <= 2); + ASSERT_EQ (const_quot, N <= 2 ? 2 : -5); + /* (-15 + 3x) / (-12 + 2x) != 1 when x == 7. */ + ASSERT_EQ (can_div_away_from_zero_p (ph::make (-15, 3, 0), + ph::make (-12, 2, 0), + &const_quot), N == 1); + ASSERT_EQ (const_quot, N <= 2 ? 2 : -5); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (-21, -18, -14), + ph::make (5, 4, 3), + &const_quot)); + ASSERT_EQ (const_quot, -5); + ASSERT_TRUE (can_div_away_from_zero_p (ph::make (18, 9, 13), + ph::make (-8, -4, -5), + &const_quot)); + ASSERT_EQ (const_quot, -3); +} + +/* Test maybe_in_range_p for signed C. */ + +template<unsigned int N, typename C, typename T> +static void +test_signed_maybe_in_range_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_EQ (maybe_in_range_p (ch::make (4), + ph::make (5, 1, -2), + ph::make (-1, -1, -1)), N == 3); + ASSERT_EQ (maybe_in_range_p (ch::make (4), + ph::make (5, -1, 2), + ph::make (-1, -1, -1)), N >= 2); +} + +/* Test may_le for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_may_le () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_le (T, C). */ + ASSERT_FALSE (may_le (ph::make (3, 5, -1), ch::make (2))); + ASSERT_FALSE (may_le (ph::make (40, -10, 60), ch::make (15))); + ASSERT_FALSE (may_le (ph::make (-14, 0, 0), ch::make (13))); + + /* Test may_le (C, T). */ + ASSERT_EQ (may_le (ch::make (4), ph::make (3, 5, -1)), N >= 2); + ASSERT_EQ (may_le (ch::make (41), ph::make (40, -10, 60)), N >= 2); + ASSERT_FALSE (may_le (ch::make (-15), ph::make (11, 0, 0))); + + /* Test may_le (T, T). */ + ASSERT_EQ (may_le (ph::make (-2, 4, -2), + ph::make (-3, -5, -1)), N >= 2); + ASSERT_EQ (may_le (ph::make (-2, -6, 0), + ph::make (-3, 0, 100)), N == 3); + ASSERT_FALSE (may_le (ph::make (-2, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test may_lt for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_may_lt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_lt (T, C). */ + ASSERT_FALSE (may_lt (ph::make (3, 5, -1), ch::make (2))); + ASSERT_FALSE (may_lt (ph::make (40, -10, 60), ch::make (15))); + ASSERT_FALSE (may_lt (ph::make (-18, 0, 0), ch::make (18))); + ASSERT_FALSE (may_lt (ph::make (-2, -2, -2), ch::make (-2))); + + /* Test may_lt (C, T). */ + ASSERT_EQ (may_lt (ch::make (4), ph::make (3, 5, -1)), N >= 2); + ASSERT_EQ (may_lt (ch::make (41), ph::make (40, -10, 60)), N >= 2); + ASSERT_FALSE (may_lt (ch::make (-45), ph::make (40, 0, 0))); + ASSERT_EQ (may_lt (ch::make (-2), ph::make (-2, -2, -2)), N >= 2); + + /* Test may_lt (T, T). */ + ASSERT_EQ (may_lt (ph::make (-3, 4, -2), + ph::make (-3, -5, -1)), N >= 2); + ASSERT_EQ (may_lt (ph::make (-3, -6, 0), + ph::make (-3, 0, 100)), N == 3); + ASSERT_FALSE (may_lt (ph::make (-3, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test may_ge for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_may_ge () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_ge (T, C). */ + ASSERT_EQ (may_ge (ph::make (3, 5, -1), ch::make (4)), N >= 2); + ASSERT_EQ (may_ge (ph::make (40, -10, 60), ch::make (41)), N >= 2); + ASSERT_FALSE (may_ge (ph::make (11, 0, 0), ch::make (-15))); + + /* Test may_ge (C, T). */ + ASSERT_FALSE (may_ge (ch::make (2), ph::make (3, 5, -1))); + ASSERT_FALSE (may_ge (ch::make (15), ph::make (40, -10, 60))); + ASSERT_FALSE (may_ge (ch::make (13), ph::make (-14, 0, 0))); + + /* Test may_ge (T, T). */ + ASSERT_EQ (may_ge (ph::make (-3, -5, -1), + ph::make (-2, 4, -2)), N >= 2); + ASSERT_EQ (may_ge (ph::make (-3, 0, 100), + ph::make (-2, -6, 0)), N == 3); + ASSERT_FALSE (may_ge (ph::make (-3, 4, 0), + ph::make (-2, 5, 1))); +} + +/* Test may_gt for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_may_gt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test may_gt (T, C). */ + ASSERT_EQ (may_gt (ph::make (3, 5, -1), ch::make (4)), N >= 2); + ASSERT_EQ (may_gt (ph::make (40, -10, 60), ch::make (41)), N >= 2); + ASSERT_FALSE (may_gt (ph::make (40, 0, 0), ch::make (-45))); + ASSERT_EQ (may_gt (ph::make (-2, -2, -2), ch::make (-2)), N >= 2); + + /* Test may_gt (C, T). */ + ASSERT_FALSE (may_gt (ch::make (2), ph::make (3, 5, -1))); + ASSERT_FALSE (may_gt (ch::make (15), ph::make (40, -10, 60))); + ASSERT_FALSE (may_gt (ch::make (18), ph::make (-18, 0, 0))); + ASSERT_FALSE (may_gt (ch::make (-2), ph::make (-2, -2, -2))); + + /* Test may_gt (T, T). */ + ASSERT_EQ (may_gt (ph::make (-3, -5, -1), + ph::make (-3, 4, -2)), N >= 2); + ASSERT_EQ (may_gt (ph::make (-3, 0, 100), + ph::make (-3, -6, 0)), N == 3); + ASSERT_FALSE (may_gt (ph::make (-3, 4, 0), + ph::make (-3, 5, 1))); +} + +/* Test must_gt for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_must_gt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_gt (T, C). */ + ASSERT_TRUE (must_gt (ph::make (3, 5, -1), ch::make (2))); + ASSERT_TRUE (must_gt (ph::make (40, -10, 60), ch::make (15))); + ASSERT_TRUE (must_gt (ph::make (-14, 0, 0), ch::make (13))); + + /* Test must_gt (C, T). */ + ASSERT_EQ (must_gt (ch::make (4), ph::make (3, 5, -1)), N == 1); + ASSERT_EQ (must_gt (ch::make (41), ph::make (40, -10, 60)), N == 1); + ASSERT_TRUE (must_gt (ch::make (-15), ph::make (11, 0, 0))); + + /* Test must_gt (T, T). */ + ASSERT_EQ (must_gt (ph::make (-2, 4, -2), + ph::make (-3, -5, -1)), N == 1); + ASSERT_EQ (must_gt (ph::make (-2, -6, 0), + ph::make (-3, 0, 100)), N <= 2); + ASSERT_TRUE (must_gt (ph::make (-2, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test must_ge for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_must_ge () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_ge (T, C). */ + ASSERT_TRUE (must_ge (ph::make (3, 5, -1), ch::make (2))); + ASSERT_TRUE (must_ge (ph::make (40, -10, 60), ch::make (15))); + ASSERT_TRUE (must_ge (ph::make (-18, 0, 0), ch::make (18))); + ASSERT_TRUE (must_ge (ph::make (-2, -2, -2), ch::make (-2))); + + /* Test must_ge (C, T). */ + ASSERT_EQ (must_ge (ch::make (4), ph::make (3, 5, -1)), N == 1); + ASSERT_EQ (must_ge (ch::make (41), ph::make (40, -10, 60)), N == 1); + ASSERT_TRUE (must_ge (ch::make (-45), ph::make (40, 0, 0))); + ASSERT_EQ (must_ge (ch::make (-2), ph::make (-2, -2, -2)), N == 1); + + /* Test must_ge (T, T). */ + ASSERT_EQ (must_ge (ph::make (-3, 4, -2), + ph::make (-3, -5, -1)), N == 1); + ASSERT_EQ (must_ge (ph::make (-3, -6, 0), + ph::make (-3, 0, 100)), N <= 2); + ASSERT_TRUE (must_ge (ph::make (-3, 5, 1), + ph::make (-3, 4, 0))); +} + +/* Test must_lt for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_must_lt () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_lt (T, C). */ + ASSERT_EQ (must_lt (ph::make (3, 5, -1), ch::make (4)), N == 1); + ASSERT_EQ (must_lt (ph::make (40, -10, 60), ch::make (41)), N == 1); + ASSERT_TRUE (must_lt (ph::make (11, 0, 0), ch::make (-15))); + + /* Test must_lt (C, T). */ + ASSERT_TRUE (must_lt (ch::make (2), ph::make (3, 5, -1))); + ASSERT_TRUE (must_lt (ch::make (15), ph::make (40, -10, 60))); + ASSERT_TRUE (must_lt (ch::make (13), ph::make (-14, 0, 0))); + + /* Test must_lt (T, T). */ + ASSERT_EQ (must_lt (ph::make (-3, -5, -1), + ph::make (-2, 4, -2)), N == 1); + ASSERT_EQ (must_lt (ph::make (-3, 0, 100), + ph::make (-2, -6, 0)), N <= 2); + ASSERT_TRUE (must_lt (ph::make (-3, 4, 0), + ph::make (-2, 5, 1))); +} + +/* Test must_le for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_must_le () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test must_le (T, C). */ + ASSERT_EQ (must_le (ph::make (3, 5, -1), ch::make (4)), N == 1); + ASSERT_EQ (must_le (ph::make (40, -10, 60), ch::make (41)), N == 1); + ASSERT_TRUE (must_le (ph::make (40, 0, 0), ch::make (-45))); + ASSERT_EQ (must_le (ph::make (-2, -2, -2), ch::make (-2)), N == 1); + + /* Test must_le (C, T). */ + ASSERT_TRUE (must_le (ch::make (2), ph::make (3, 5, -1))); + ASSERT_TRUE (must_le (ch::make (15), ph::make (40, -10, 60))); + ASSERT_TRUE (must_le (ch::make (18), ph::make (-18, 0, 0))); + ASSERT_TRUE (must_le (ch::make (-2), ph::make (-2, -2, -2))); + + /* Test must_le (T, T). */ + ASSERT_EQ (must_le (ph::make (-3, -5, -1), + ph::make (-3, 4, -2)), N == 1); + ASSERT_EQ (must_le (ph::make (-3, 0, 100), + ph::make (-3, -6, 0)), N <= 2); + ASSERT_TRUE (must_le (ph::make (-3, 4, 0), + ph::make (-3, 5, 1))); +} + +/* Test ordered_p for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_ordered_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_p (T, C). */ + ASSERT_EQ (ordered_p (ph::make (3, 5, -1), ch::make (4)), N == 1); + ASSERT_TRUE (ordered_p (ph::make (3, 5, -1), ch::make (3))); + ASSERT_TRUE (ordered_p (ph::make (3, 5, -1), ch::make (2))); + ASSERT_EQ (ordered_p (ph::make (40, -10, 60), ch::make (41)), N == 1); + ASSERT_TRUE (ordered_p (ph::make (40, -10, 60), ch::make (40))); + ASSERT_TRUE (ordered_p (ph::make (40, -10, 60), ch::make (39))); + ASSERT_TRUE (ordered_p (ph::make (4, -4, -4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (4, 0, -4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (4, 4, -4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (-4, 4, 4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (-4, 0, 4), ch::make (0))); + ASSERT_TRUE (ordered_p (ph::make (-4, -4, 4), ch::make (0))); + + /* Test ordered_p (C, T). */ + ASSERT_EQ (ordered_p (ch::make (4), ph::make (3, 5, -1)), N == 1); + ASSERT_TRUE (ordered_p (ch::make (3), ph::make (3, 5, -1))); + ASSERT_TRUE (ordered_p (ch::make (2), ph::make (3, 5, -1))); + ASSERT_EQ (ordered_p (ch::make (41), ph::make (40, -10, 60)), N == 1); + ASSERT_TRUE (ordered_p (ch::make (40), ph::make (40, -10, 60))); + ASSERT_TRUE (ordered_p (ch::make (39), ph::make (40, -10, 60))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (4, -4, -4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (4, 0, -4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (4, 4, -4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (-4, 4, 4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (-4, 0, 4))); + ASSERT_TRUE (ordered_p (ch::make (0), ph::make (-4, -4, 4))); +} + +/* Test ordered_min for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_ordered_min () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_min (T, C). */ + ASSERT_MUST_EQ (ordered_min (ph::make (5, -12, -14), ch::make (5)), + ch::make (5)); + + /* Test ordered_min (C, T). */ + ASSERT_MUST_EQ (ordered_min (ch::make (9), ph::make (9, -90, -77)), + ch::make (9)); + + /* Test ordered_min (T, T). */ + ASSERT_MUST_EQ (ordered_min (ph::make (4, 9, 17), ph::make (4, -1, 17)), + ph::make (4, 9, 17)); +} + +/* Test ordered_max for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_ordered_max () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test ordered_max (T, C). */ + ASSERT_MUST_EQ (ordered_max (ph::make (5, -12, -14), ch::make (5)), + ph::make (5, -12, -14)); + + /* Test ordered_max (C, T). */ + ASSERT_MUST_EQ (ordered_max (ch::make (9), ph::make (9, -90, -77)), + ph::make (9, -90, -77)); + + /* Test ordered_max (T, T). */ + ASSERT_MUST_EQ (ordered_max (ph::make (4, 9, 17), ph::make (4, -1, 17)), + ph::make (4, -1, 17)); +} + +/* Test lower_bound for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_lower_bound () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test lower_bound (T, C). */ + ASSERT_MUST_EQ (lower_bound (ph::make (4, -1, 3), ch::make (5)), + ch::make (4)); + ASSERT_MUST_EQ (lower_bound (ph::make (6, 5, -14), ch::make (-11)), + ch::make (6)); + + /* Test lower_bound (C, T). */ + ASSERT_MUST_EQ (lower_bound (ch::make (5), ph::make (4, -1, 3)), + ch::make (4)); + ASSERT_MUST_EQ (lower_bound (ch::make (-11), ph::make (6, 5, -14)), + ch::make (6)); + + /* Test lower_bound (T, T). */ + ASSERT_MUST_EQ (lower_bound (ph::make (4, -1, 3), ph::make (5, 7, -2)), + ph::make (4, 7, 3)); + ASSERT_MUST_EQ (lower_bound (ph::make (6, 5, -14), ph::make (-11, 4, 3)), + ph::make (6, 4, 3)); +} + +/* Test upper_bound for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_upper_bound () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test upper_bound (T, C). */ + ASSERT_MUST_EQ (upper_bound (ph::make (4, -1, 3), ch::make (5)), + ph::make (5, -1, 3)); + ASSERT_MUST_EQ (upper_bound (ph::make (6, 5, -14), ch::make (-11)), + ph::make (-11, 5, -14)); + + /* Test upper_bound (C, T). */ + ASSERT_MUST_EQ (upper_bound (ch::make (5), ph::make (4, -1, 3)), + ph::make (5, -1, 3)); + ASSERT_MUST_EQ (upper_bound (ch::make (-11), ph::make (6, 5, -14)), + ph::make (-11, 5, -14)); + + /* Test upper_bound (T, T). */ + ASSERT_MUST_EQ (upper_bound (ph::make (4, -1, 3), ph::make (5, 7, -2)), + ph::make (5, -1, -2)); + ASSERT_MUST_EQ (upper_bound (ph::make (6, 5, -14), ph::make (-11, 4, 3)), + ph::make (-11, 5, -14)); +} + +/* Test maybe_in_range_p for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_maybe_in_range_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Unknown size for N == 1. */ + ASSERT_TRUE (maybe_in_range_p (ch::make (-1), + ch::make (0), + ph::make (-1, -1, -1))); + /* Unknown size for all N. */ + ASSERT_TRUE (maybe_in_range_p (ph::make (-1, -1, -1), + ch::make (0), + ch::make (-1))); + /* Unknown size for N == 1. */ + ASSERT_EQ (maybe_in_range_p (ph::make (-1, -1, -1), + ch::make (0), + ph::make (-1, -1, -1)), N == 1); + ASSERT_EQ (maybe_in_range_p (ch::make (-2), + ch::make (0), + ph::make (-2, -2, -2)), N >= 2); + ASSERT_FALSE (maybe_in_range_p (ph::make (-2, -2, -2), + ch::make (0), + ch::make (-2))); + ASSERT_FALSE (maybe_in_range_p (ph::make (-2, -2, -2), + ch::make (0), + ph::make (-2, -2, -2))); + ASSERT_TRUE (maybe_in_range_p (ph::make (-2, -2, -2), + ch::make (1), + ph::make (-2, -2, -2))); + ASSERT_TRUE (maybe_in_range_p (ph::make (-2, -2, -2), + ch::make (1), + ch::make (-2))); +} + +/* Test known_in_range_p for unsigned C. */ + +template<unsigned int N, typename C, typename T> +static void +test_unsigned_known_in_range_p () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + ASSERT_FALSE (known_in_range_p (ch::make (4), + ph::make (5, 1, 2), + ch::make (-2))); + ASSERT_TRUE (known_in_range_p (ph::make (6, 1, 2), + ph::make (5, 1, 2), + ch::make (-2))); + ASSERT_TRUE (known_in_range_p (ph::make (6, 1, 2), + ph::make (5, 1, 2), + ph::make (-2, -2, -2))); +} + +/* Test things that work for poly_int-based types T, given that the + coefficient type C is a primitive integer type. N is the number of + coefficients in C */ + +template<unsigned int N, typename C, typename T> +static void +test_hwi () +{ + typedef coeff_helper<C> ch; + typedef poly_helper<T> ph; + + /* Test coeff_gcd. */ + ASSERT_EQ (coeff_gcd (ph::make (30, 45, 10)), + N == 1 ? 30 : N == 2 ? 15 : 5); + ASSERT_EQ (coeff_gcd (ph::make (0, 18, 21)), + N == 1 ? 0 : N == 2 ? 18 : 3); + ASSERT_EQ (coeff_gcd (ph::make (0, 0, 101)), + N <= 2 ? 0 : 101); + ASSERT_EQ (coeff_gcd (ph::make (21, 0, 28)), + N <= 2 ? 21 : 7); + ASSERT_EQ (coeff_gcd (ph::make (100, 175, 0)), + N == 1 ? 100 : 25); + + /* Test common_multiple (T, C). */ + ASSERT_MUST_EQ (common_multiple (ph::make (8, 24, 16), 6), + ph::make (24, 72, 48)); + ASSERT_MUST_EQ (common_multiple (ph::make (30, 0, 0), 45), + ch::make (90)); + if (N >= 2) + ASSERT_MUST_EQ (common_multiple (ph::make (18, 15, 0), 12), + ph::make (72, 60, 0)); + if (N == 3) + ASSERT_MUST_EQ (common_multiple (ph::make (18, 15, 4), 12), + ph::make (216, 180, 48)); + + /* Test common_multiple (C, T). */ + ASSERT_MUST_EQ (common_multiple (6, ph::make (8, 24, 16)), + ph::make (24, 72, 48)); + ASSERT_MUST_EQ (common_multiple (45, ph::make (30, 0, 0)), + ch::make (90)); + if (N >= 2) + ASSERT_MUST_EQ (common_multiple (12, ph::make (18, 15, 0)), + ph::make (72, 60, 0)); + if (N == 3) + ASSERT_MUST_EQ (common_multiple (12, ph::make (18, 15, 4)), + ph::make (216, 180, 48)); + + /* Test force_common_multiple. */ + ASSERT_MUST_EQ (force_common_multiple (ph::make (30, 0, 0), + ph::make (25, 0, 0)), + ph::make (150, 0, 0)); + if (N >= 2) + { + ASSERT_MUST_EQ (force_common_multiple (ph::make (16, 24, 0), + ph::make (24, 36, 0)), + ph::make (48, 72, 0)); + ASSERT_MUST_EQ (force_common_multiple (ph::make (16, 24, 0), + ph::make (12, 0, 0)), + ph::make (48, 72, 0)); + ASSERT_MUST_EQ (force_common_multiple (ph::make (15, 0, 0), + ph::make (21, 9, 0)), + ph::make (105, 45, 0)); + } + if (N == 3) + { + ASSERT_MUST_EQ (force_common_multiple (ph::make (33, 99, 66), + ph::make (22, 66, 44)), + ph::make (66, 198, 132)); + ASSERT_MUST_EQ (force_common_multiple (ph::make (30, 0, 45), + ph::make (12, 0, 18)), + ph::make (60, 0, 90)); + ASSERT_MUST_EQ (force_common_multiple (ph::make (40, 0, 50), + ph::make (8, 0, 0)), + ph::make (160, 0, 200)); + ASSERT_MUST_EQ (force_common_multiple (ph::make (6, 0, 0), + ph::make (10, 0, 15)), + ph::make (60, 0, 90)); + ASSERT_MUST_EQ (force_common_multiple (ph::make (20, 40, 30), + ph::make (15, 0, 0)), + ph::make (60, 120, 90)); + ASSERT_MUST_EQ (force_common_multiple (ph::make (9, 0, 0), + ph::make (90, 81, 27)), + ph::make (90, 81, 27)); + } +} + +/* Test poly_int<N, C>::to_shwi, using in-range source coefficient value + SRCV (equal to DESTV) and adding DELTA to get an out-of-range value. */ + +template<unsigned int N, typename C> +static void +test_to_shwi (const C &srcv, int delta, HOST_WIDE_INT destv) +{ + typedef poly_helper< poly_int<N, HOST_WIDE_INT> > ps64h; + typedef poly_int<N, C> T; + typedef poly_helper<T> ph; + poly_int<N, HOST_WIDE_INT> shwi; + + /* Test in-range T::to_shwi. */ + ASSERT_TRUE (ph::make (srcv, + srcv - delta, + srcv - delta * 2).to_shwi (&shwi)); + ASSERT_MUST_EQ (shwi, ps64h::make (destv, + destv - delta, + destv - delta * 2)); + + /* Test partially in-range T::to_shwi. */ + ASSERT_EQ (ph::make (srcv, + srcv + delta, + srcv + delta * 2).to_shwi (&shwi), N == 1); + if (N == 1) + ASSERT_MUST_EQ (shwi, destv); + ASSERT_EQ (ph::make (srcv - delta, + srcv, + srcv + delta).to_shwi (&shwi), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (shwi, ps64h::make (destv - delta, + destv, + destv /* ignored */)); + + /* Test fully out-of-range T::to_shwi. */ + ASSERT_FALSE (ph::make (srcv + delta, srcv, srcv).to_shwi (&shwi)); +} + +/* Test poly_int<N, C>::to_uhwi, using in-range source coefficient value + SRCV (equal to DESTV) and adding DELTA to get an out-of-range value. */ + +template<unsigned int N, typename C> +static void +test_to_uhwi (const C &srcv, int delta, unsigned HOST_WIDE_INT destv) +{ + typedef poly_helper< poly_int<N, unsigned HOST_WIDE_INT> > pu64h; + typedef poly_int<N, C> T; + typedef poly_helper<T> ph; + poly_int<N, unsigned HOST_WIDE_INT> uhwi; + + /* Test in-range T::to_uhwi. */ + ASSERT_TRUE (ph::make (srcv, + srcv - delta, + srcv - delta * 2).to_uhwi (&uhwi)); + ASSERT_MUST_EQ (uhwi, pu64h::make (destv, + destv - delta, + destv - delta * 2)); + + /* Test partially in-range T::to_uhwi. */ + ASSERT_EQ (ph::make (srcv, + srcv + delta, + srcv + delta * 2).to_uhwi (&uhwi), N == 1); + if (N == 1) + ASSERT_MUST_EQ (uhwi, destv); + ASSERT_EQ (ph::make (srcv - delta, + srcv, + srcv + delta).to_uhwi (&uhwi), N <= 2); + if (N <= 2) + ASSERT_MUST_EQ (uhwi, pu64h::make (destv - delta, + destv, + destv /* ignored */)); + + /* Test fully out-of-range T::to_uhwi. */ + ASSERT_FALSE (ph::make (srcv + delta, srcv, srcv).to_uhwi (&uhwi)); +} + +/* Test poly_int<N, C>::force_shwi and poly_int<N, C>::force_uhwi, given + that MASK66 has the low 66 bits set and the rest clear. */ + +template<unsigned int N, typename C> +static void +test_force_hwi (const C &mask66) +{ + typedef poly_helper< poly_int<N, HOST_WIDE_INT> > ps64h; + typedef poly_helper< poly_int<N, unsigned HOST_WIDE_INT> > pu64h; + typedef poly_int<N, C> T; + typedef poly_helper<T> ph; + poly_int<N, HOST_WIDE_INT> shwi; + poly_int<N, unsigned HOST_WIDE_INT> uhwi; + + C mask65 = wi::arshift (mask66, 1); + C mask64 = wi::arshift (mask66, 2); + C mask63 = wi::arshift (mask66, 3); + C mask62 = wi::arshift (mask66, 4); + C mask61 = wi::arshift (mask66, 5); + + /* Test force_shwi. */ + ASSERT_MUST_EQ (ph::make (mask66, mask65, mask64).force_shwi (), + ps64h::make (HOST_WIDE_INT_M1, + HOST_WIDE_INT_M1, + HOST_WIDE_INT_M1)); + ASSERT_MUST_EQ (ph::make (mask65, mask64, mask63).force_shwi (), + ps64h::make (HOST_WIDE_INT_M1, + HOST_WIDE_INT_M1, + HOST_WIDE_INT_MAX)); + ASSERT_MUST_EQ (ph::make (mask64, mask63, mask62).force_shwi (), + ps64h::make (HOST_WIDE_INT_M1, + HOST_WIDE_INT_MAX, + HOST_WIDE_INT_MAX / 2)); + ASSERT_MUST_EQ (ph::make (mask63, mask62, mask61).force_shwi (), + ps64h::make (HOST_WIDE_INT_MAX, + HOST_WIDE_INT_MAX / 2, + HOST_WIDE_INT_MAX / 4)); + + /* Test force_uhwi. */ + ASSERT_MUST_EQ (ph::make (mask66, mask65, mask64).force_uhwi (), + pu64h::make (HOST_WIDE_INT_M1U, + HOST_WIDE_INT_M1U, + HOST_WIDE_INT_M1U)); + ASSERT_MUST_EQ (ph::make (mask65, mask64, mask63).force_uhwi (), + pu64h::make (HOST_WIDE_INT_M1U, + HOST_WIDE_INT_M1U, + HOST_WIDE_INT_M1U >> 1)); + ASSERT_MUST_EQ (ph::make (mask64, mask63, mask62).force_uhwi (), + pu64h::make (HOST_WIDE_INT_M1U, + HOST_WIDE_INT_M1U >> 1, + HOST_WIDE_INT_M1U >> 2)); + ASSERT_MUST_EQ (ph::make (mask63, mask62, mask61).force_uhwi (), + pu64h::make (HOST_WIDE_INT_M1U >> 1, + HOST_WIDE_INT_M1U >> 2, + HOST_WIDE_INT_M1U >> 3)); +} + +/* Test poly_int<N, wide_int>::from. */ + +template<unsigned int N> +static void +test_wide_int_from () +{ + typedef poly_helper< poly_int<N, unsigned char> > pu8h; + typedef poly_int<N, wide_int> T; + typedef poly_helper<T> ph; + + /* Test narrowing cases of T::from. */ + T p_8_3_1 = ph::make (wi::uhwi (8, 3), + wi::uhwi (3, 3), + wi::uhwi (1, 3)); + ASSERT_MUST_EQ (T::from (pu8h::make (0xf8,0x23,0x81), 3, SIGNED), + p_8_3_1); + ASSERT_MUST_EQ (T::from (pu8h::make (0xf8,0x23,0x81), 3, UNSIGNED), + p_8_3_1); + + /* Test equal-sized cases of T::from. */ + T p_f8_23_81 = ph::make (wi::uhwi (0xf8, 8), + wi::uhwi (0x23, 8), + wi::uhwi (0x81, 8)); + ASSERT_MUST_EQ (T::from (pu8h::make (0xf8,0x23,0x81), 8, SIGNED), + p_f8_23_81); + ASSERT_MUST_EQ (T::from (pu8h::make (0xf8,0x23,0x81), 8, UNSIGNED), + p_f8_23_81); + + /* Test widening cases of T::from. */ + T p_fff8_0023_ff81 = ph::make (wi::uhwi (0xfff8, 16), + wi::uhwi (0x0023, 16), + wi::uhwi (0xff81, 16)); + ASSERT_MUST_EQ (T::from (pu8h::make (0xf8,0x23,0x81), 16, SIGNED), + p_fff8_0023_ff81); + T p_00f8_0023_0081 = ph::make (wi::uhwi (0xf8, 16), + wi::uhwi (0x23, 16), + wi::uhwi (0x81, 16)); + ASSERT_MUST_EQ (T::from (pu8h::make (0xf8,0x23,0x81), 16, UNSIGNED), + p_00f8_0023_0081); +} + +/* Test wi::sext for poly_int<N, wide_int>. */ + +template<unsigned int N> +static void +test_wide_int_sext () +{ + typedef poly_int<N, wide_int> T; + typedef poly_helper<T> ph; + + ASSERT_MUST_EQ (wi::sext (ph::make (wi::shwi (16, 12), + wi::shwi (63, 12), + wi::shwi (14, 12)), 5), + ph::make (wi::shwi (-16, 12), + wi::shwi (-1, 12), + wi::shwi (14, 12))); + ASSERT_MUST_EQ (wi::sext (ph::make (wi::shwi (1024, 12), + wi::shwi (1023, 12), + wi::shwi (1200, 12)), 11), + ph::make (wi::shwi (-1024, 12), + wi::shwi (1023, 12), + wi::shwi (-848, 12))); +} + +/* Test wi::zext for poly_int<N, wide_int>. */ + +template<unsigned int N> +static void +test_wide_int_zext () +{ + typedef poly_int<N, wide_int> T; + typedef poly_helper<T> ph; + + ASSERT_MUST_EQ (wi::zext (ph::make (wi::uhwi (16, 12), + wi::uhwi (63, 12), + wi::uhwi (14, 12)), 5), + ph::make (wi::uhwi (16, 12), + wi::uhwi (31, 12), + wi::uhwi (14, 12))); + ASSERT_MUST_EQ (wi::zext (ph::make (wi::uhwi (1024, 12), + wi::uhwi (1023, 12), + wi::uhwi (3248, 12)), 11), + ph::make (wi::uhwi (1024, 12), + wi::uhwi (1023, 12), + wi::uhwi (1200, 12))); +} + +/* Test wi::add for poly_int<N, wide_int>. */ + +template<unsigned int N> +static void +test_wide_int_add () +{ + typedef poly_int<N, wide_int> T; + typedef poly_helper<T> ph; + + bool overflow; + ASSERT_MUST_EQ (wi::add (ph::make (wi::uhwi (15, 4), + wi::uhwi (4, 4), + wi::uhwi (2, 4)), + ph::make (wi::uhwi (1, 4), + wi::uhwi (0, 4), + wi::uhwi (0, 4)), + UNSIGNED, &overflow), + ph::make (wi::uhwi (0, 4), + wi::uhwi (4, 4), + wi::uhwi (2, 4))); + ASSERT_TRUE (overflow); + ASSERT_MUST_EQ (wi::add (ph::make (wi::uhwi (30, 5), + wi::uhwi (6, 5), + wi::uhwi (11, 5)), + ph::make (wi::uhwi (1, 5), + wi::uhwi (26, 5), + wi::uhwi (19, 5)), + UNSIGNED, &overflow), + ph::make (wi::uhwi (31, 5), + wi::uhwi (0, 5), + wi::uhwi (30, 5))); + ASSERT_EQ (overflow, N >= 2); + ASSERT_MUST_EQ (wi::add (ph::make (wi::uhwi (1, 6), + wi::uhwi (63, 6), + wi::uhwi (50, 6)), + ph::make (wi::uhwi (61, 6), + wi::uhwi (0, 6), + wi::uhwi (50, 6)), + UNSIGNED, &overflow), + ph::make (wi::uhwi (62, 6), + wi::uhwi (63, 6), + wi::uhwi (36, 6))); + ASSERT_EQ (overflow, N == 3); + + ASSERT_MUST_EQ (wi::add (ph::make (wi::shwi (7, 4), + wi::shwi (7, 4), + wi::shwi (-8, 4)), + ph::make (wi::shwi (1, 4), + wi::shwi (0, 4), + wi::shwi (0, 4)), + SIGNED, &overflow), + ph::make (wi::shwi (-8, 4), + wi::shwi (7, 4), + wi::shwi (-8, 4))); + ASSERT_TRUE (overflow); + ASSERT_MUST_EQ (wi::add (ph::make (wi::shwi (-1, 5), + wi::shwi (6, 5), + wi::shwi (11, 5)), + ph::make (wi::shwi (15, 5), + wi::shwi (11, 5), + wi::shwi (-15, 5)), + SIGNED, &overflow), + ph::make (wi::shwi (14, 5), + wi::shwi (-15, 5), + wi::shwi (-4, 5))); + ASSERT_EQ (overflow, N >= 2); + ASSERT_MUST_EQ (wi::add (ph::make (wi::shwi (4, 6), + wi::shwi (0, 6), + wi::shwi (-1, 6)), + ph::make (wi::shwi (-32, 6), + wi::shwi (-32, 6), + wi::shwi (-32, 6)), + SIGNED, &overflow), + ph::make (wi::shwi (-28, 6), + wi::shwi (-32, 6), + wi::shwi (31, 6))); + ASSERT_EQ (overflow, N == 3); +} + +/* Test wi::sub for poly_int<N, wide_int>. */ + +template<unsigned int N> +static void +test_wide_int_sub () +{ + typedef poly_int<N, wide_int> T; + typedef poly_helper<T> ph; + + bool overflow; + ASSERT_MUST_EQ (wi::sub (ph::make (wi::uhwi (0, 4), + wi::uhwi (4, 4), + wi::uhwi (2, 4)), + ph::make (wi::uhwi (1, 4), + wi::uhwi (0, 4), + wi::uhwi (0, 4)), + UNSIGNED, &overflow), + ph::make (wi::uhwi (15, 4), + wi::uhwi (4, 4), + wi::uhwi (2, 4))); + ASSERT_TRUE (overflow); + ASSERT_MUST_EQ (wi::sub (ph::make (wi::uhwi (30, 5), + wi::uhwi (29, 5), + wi::uhwi (11, 5)), + ph::make (wi::uhwi (1, 5), + wi::uhwi (31, 5), + wi::uhwi (9, 5)), + UNSIGNED, &overflow), + ph::make (wi::uhwi (29, 5), + wi::uhwi (30, 5), + wi::uhwi (2, 5))); + ASSERT_EQ (overflow, N >= 2); + ASSERT_MUST_EQ (wi::sub (ph::make (wi::uhwi (0, 6), + wi::uhwi (63, 6), + wi::uhwi (0, 6)), + ph::make (wi::uhwi (0, 6), + wi::uhwi (0, 6), + wi::uhwi (52, 6)), + UNSIGNED, &overflow), + ph::make (wi::uhwi (0, 6), + wi::uhwi (63, 6), + wi::uhwi (12, 6))); + ASSERT_EQ (overflow, N == 3); + + ASSERT_MUST_EQ (wi::sub (ph::make (wi::shwi (-8, 4), + wi::shwi (5, 4), + wi::shwi (-7, 4)), + ph::make (wi::shwi (1, 4), + wi::shwi (0, 4), + wi::shwi (0, 4)), + SIGNED, &overflow), + ph::make (wi::shwi (7, 4), + wi::shwi (5, 4), + wi::shwi (-7, 4))); + ASSERT_TRUE (overflow); + ASSERT_MUST_EQ (wi::sub (ph::make (wi::shwi (-1, 5), + wi::shwi (-7, 5), + wi::shwi (0, 5)), + ph::make (wi::shwi (15, 5), + wi::shwi (11, 5), + wi::shwi (-15, 5)), + SIGNED, &overflow), + ph::make (wi::shwi (-16, 5), + wi::shwi (14, 5), + wi::shwi (15, 5))); + ASSERT_EQ (overflow, N >= 2); + ASSERT_MUST_EQ (wi::sub (ph::make (wi::shwi (-32, 6), + wi::shwi (-1, 6), + wi::shwi (0, 6)), + ph::make (wi::shwi (-32, 6), + wi::shwi (-32, 6), + wi::shwi (-32, 6)), + SIGNED, &overflow), + ph::make (wi::shwi (0, 6), + wi::shwi (31, 6), + wi::shwi (-32, 6))); + ASSERT_EQ (overflow, N == 3); +} + +/* Test wi::mul for poly_int<N, wide_int>. */ + +template<unsigned int N> +static void +test_wide_int_mul () +{ + typedef poly_int<N, wide_int> T; + typedef poly_helper<T> ph; + + bool overflow; + ASSERT_MUST_EQ (wi::mul (ph::make (wi::uhwi (4, 4), + wi::uhwi (3, 4), + wi::uhwi (2, 4)), 4, + UNSIGNED, &overflow), + ph::make (wi::uhwi (0, 4), + wi::uhwi (12, 4), + wi::uhwi (8, 4))); + ASSERT_TRUE (overflow); + ASSERT_MUST_EQ (wi::mul (ph::make (wi::uhwi (15, 5), + wi::uhwi (31, 5), + wi::uhwi (7, 5)), 2, + UNSIGNED, &overflow), + ph::make (wi::uhwi (30, 5), + wi::uhwi (30, 5), + wi::uhwi (14, 5))); + ASSERT_EQ (overflow, N >= 2); + ASSERT_MUST_EQ (wi::mul (ph::make (wi::uhwi (1, 6), + wi::uhwi (0, 6), + wi::uhwi (2, 6)), 63, + UNSIGNED, &overflow), + ph::make (wi::uhwi (63, 6), + wi::uhwi (0, 6), + wi::uhwi (62, 6))); + ASSERT_EQ (overflow, N == 3); + + ASSERT_MUST_EQ (wi::mul (ph::make (wi::shwi (-1, 4), + wi::shwi (1, 4), + wi::shwi (0, 4)), -8, + SIGNED, &overflow), + ph::make (wi::shwi (-8, 4), + wi::shwi (-8, 4), + wi::shwi (0, 4))); + ASSERT_TRUE (overflow); + ASSERT_MUST_EQ (wi::mul (ph::make (wi::shwi (2, 5), + wi::shwi (-3, 5), + wi::shwi (1, 5)), 6, + SIGNED, &overflow), + ph::make (wi::shwi (12, 5), + wi::shwi (14, 5), + wi::shwi (6, 5))); + ASSERT_EQ (overflow, N >= 2); + ASSERT_MUST_EQ (wi::mul (ph::make (wi::shwi (5, 6), + wi::shwi (-6, 6), + wi::shwi (7, 6)), -5, + SIGNED, &overflow), + ph::make (wi::shwi (-25, 6), + wi::shwi (30, 6), + wi::shwi (29, 6))); + ASSERT_EQ (overflow, N == 3); +} + +/* Test wi::neg for poly_int<N, wide_int>. */ + +template<unsigned int N> +static void +test_wide_int_neg () +{ + typedef poly_int<N, wide_int> T; + typedef poly_helper<T> ph; + + bool overflow; + ASSERT_MUST_EQ (wi::neg (ph::make (wi::shwi (-8, 4), + wi::shwi (7, 4), + wi::shwi (-7, 4)), &overflow), + ph::make (wi::shwi (-8, 4), + wi::shwi (-7, 4), + wi::shwi (7, 4))); + ASSERT_TRUE (overflow); + ASSERT_MUST_EQ (wi::neg (ph::make (wi::shwi (-15, 5), + wi::shwi (-16, 5), + wi::shwi (15, 5)), &overflow), + ph::make (wi::shwi (15, 5), + wi::shwi (-16, 5), + wi::shwi (-15, 5))); + ASSERT_EQ (overflow, N >= 2); + ASSERT_MUST_EQ (wi::neg (ph::make (wi::shwi (-28, 6), + wi::shwi (30, 6), + wi::shwi (-32, 6)), &overflow), + ph::make (wi::shwi (28, 6), + wi::shwi (-30, 6), + wi::shwi (-32, 6))); + ASSERT_EQ (overflow, N == 3); +} + +/* Test poly_int<N, C> for things that only make sense when C is an + offset_int or widest_int. */ + +template<unsigned int N, typename C> +static void +test_fixed_int (void) +{ + typedef poly_helper< poly_int<N, int> > pih; + typedef poly_int<N, C> T; + typedef poly_helper<T> ph; + + /* Test signed case. */ + ASSERT_MUST_EQ (T::from (pih::make (-100, 200, -300), SIGNED), + ph::make (-100, 200, -300)); + ASSERT_MAY_NE (T::from (pih::make (-100, 200, -300), SIGNED), + ph::make (-100U, 200U, -300U)); + + /* Test unsigned case. */ + ASSERT_MAY_NE (T::from (pih::make (-100, 200, -300), UNSIGNED), + ph::make (-100, 200, -300)); + ASSERT_MUST_EQ (T::from (pih::make (-100, 200, -300), UNSIGNED), + ph::make (-100U, 200U, -300U)); + + C one = 1; + test_to_shwi<N> (-(one << 63), -1, HOST_WIDE_INT_MIN); + test_to_shwi<N> ((one << 63) - 1, 1, HOST_WIDE_INT_MAX); + test_to_uhwi<N> (C (0), -1, 0U); + test_to_uhwi<N> ((one << 64) - 1, 1, HOST_WIDE_INT_M1U); + + /* Test force_shwi and force_uhwi. */ + test_force_hwi<N> ((one << 66) - 1); +} + +/* Test type promotions. */ + +template<unsigned int N> +static void +test_type_promotions () +{ + typedef poly_helper< poly_int<N, unsigned short> > pu16h; + typedef poly_helper< poly_int<N, HOST_WIDE_INT> > ps64h; + HOST_WIDE_INT mask32 = ~0U; + + /* Test that + on unsigned short promotes to HOST_WIDE_INT. */ + ASSERT_MUST_EQ (pu16h::make (0xffff, 0xfffe, 0xfffd) + 16, + ps64h::make (0x1000f, 0xfffe, 0xfffd)); + ASSERT_MUST_EQ (32 + pu16h::make (0xffff, 0xfffe, 0xfffd), + ps64h::make (0x1001f, 0xfffe, 0xfffd)); + ASSERT_MUST_EQ (pu16h::make (0xffff, 0xfffe, 0xfffd) + + pu16h::make (4, 10, 17), + ps64h::make (0x10003, 0x10008, 0x1000e)); + + /* Test that - on unsigned short promotes to HOST_WIDE_INT. */ + ASSERT_MUST_EQ (pu16h::make (1, 2, 3) - ~0U, + ps64h::make (-mask32 + 1, 2, 3)); + ASSERT_MUST_EQ (INT_MIN - pu16h::make (4, 5, 6), + ps64h::make ((HOST_WIDE_INT) INT_MIN - 4, -5, -6)); + ASSERT_MUST_EQ (pu16h::make (1, 2, 3) - pu16h::make (100, 200, 300), + ps64h::make (-99, -198, -297)); + + /* Same for unary -. */ + ASSERT_MUST_EQ (-pu16h::make (0x8000, 0x9000, 0xa000), + ps64h::make (-0x8000, -0x9000, -0xa000)); + ASSERT_MAY_NE (-pu16h::make (0x8000, 0x9000, 0xa000), + ps64h::make (0x8000, 0x9000, 0xa000)); + + /* Test that * on unsigned short promotes to HOST_WIDE_INT. */ + ASSERT_MUST_EQ (pu16h::make (10, 14, 17) * ~0U, + ps64h::make (10 * mask32, 14 * mask32, 17 * mask32)); + ASSERT_MUST_EQ (-400000 * pu16h::make (10, 14, 17), + ps64h::make (-4000000, -5600000, -6800000)); + + /* Test that << on unsigned short promotes to HOST_WIDE_INT. */ + ASSERT_MUST_EQ (pu16h::make (4, 5, 6) << 50, + ps64h::make ((HOST_WIDE_INT) 4 << 50, + (HOST_WIDE_INT) 5 << 50, + (HOST_WIDE_INT) 6 << 50)); + + /* Test that can_align_up doesn't truncate to the type of the alignment. */ + poly_int<N, HOST_WIDE_INT> aligned; + HOST_WIDE_INT a = (HOST_WIDE_INT_1 << 50); + HOST_WIDE_INT b = (HOST_WIDE_INT_1 << 51); + HOST_WIDE_INT c = (HOST_WIDE_INT_1 << 52); + ASSERT_TRUE (can_align_up (ps64h::make (a - 31, b, c), 16U, &aligned)); + ASSERT_MUST_EQ (aligned, ps64h::make (a - 16, b, c)); + + /* Likewise for can_align_down. */ + ASSERT_TRUE (can_align_down (ps64h::make (a - 31, b, c), 16U, &aligned)); + ASSERT_MUST_EQ (aligned, ps64h::make (a - 32, b, c)); + + /* Same for the force_* routines. */ + ASSERT_MUST_EQ (force_align_up (ps64h::make (a - 31, b, c), 16U), + ps64h::make (a - 16, b, c)); + ASSERT_MUST_EQ (force_align_down (ps64h::make (a - 31, b, c), 16U), + ps64h::make (a - 32, b, c)); + + /* Same for the aligned_*_bound routines. */ + ASSERT_MUST_EQ (aligned_upper_bound (ps64h::make (a - 31, b - 33, c - 55), + 16U), + ps64h::make (a - 16, b - 32, c - 48)); + ASSERT_MUST_EQ (aligned_lower_bound (ps64h::make (a - 31, b - 33, c - 55), + 16U), + ps64h::make (a - 32, b - 48, c - 64)); +} + +/* Test endpoint_representable_p. */ + +static void +test_endpoint_representable (void) +{ + /* True because the size is unknown. */ + ASSERT_TRUE (endpoint_representable_p ((unsigned char) 0x80, + (unsigned char) 0xff)); + ASSERT_FALSE (endpoint_representable_p ((unsigned char) 0x80, + (unsigned char) 0xfe)); + ASSERT_FALSE (endpoint_representable_p ((unsigned char) 0x80, + (unsigned char) 0x80)); + ASSERT_TRUE (endpoint_representable_p ((unsigned char) 0x80, + (unsigned char) 0x7f)); + ASSERT_FALSE (endpoint_representable_p ((unsigned char) 0x11, + (unsigned char) 0xef)); + ASSERT_TRUE (endpoint_representable_p ((unsigned char) 0x11, + (unsigned char) 0xee)); + + /* True because the size is unknown. */ + ASSERT_TRUE (endpoint_representable_p (INT_MAX, -1)); + ASSERT_FALSE (endpoint_representable_p (INT_MAX - 100, INT_MAX)); + ASSERT_FALSE (endpoint_representable_p (INT_MAX - 100, 101)); + ASSERT_TRUE (endpoint_representable_p (INT_MAX - 100, 100)); + ASSERT_TRUE (endpoint_representable_p (0, INT_MAX)); + ASSERT_TRUE (endpoint_representable_p (INT_MIN, INT_MAX)); + + /* True because the size is unknown. */ + ASSERT_TRUE (endpoint_representable_p (UINT_MAX, -1U)); + ASSERT_FALSE (endpoint_representable_p (UINT_MAX - 400, UINT_MAX - 1)); + ASSERT_FALSE (endpoint_representable_p (UINT_MAX - 400, 401U)); + ASSERT_TRUE (endpoint_representable_p (UINT_MAX - 400, 400U)); +} + +/* Test wi::shwi with N coefficients. */ + +template<unsigned int N> +static void +test_shwi () +{ + typedef poly_int<N, wi::hwi_with_prec> T; + typedef poly_helper<T> ph; + + poly_int<N, wide_int> mult; + mult = ph::make (wi::shwi (80, 16), + wi::shwi (-10, 16), + wi::shwi (70, 16)) * 3; + ASSERT_MUST_EQ (mult, ph::make (wi::shwi (240, 16), + wi::shwi (-30, 16), + wi::shwi (210, 16))); +} + +/* Test wi::uhwi with N coefficients. */ + +template<unsigned int N> +static void +test_uhwi () +{ + typedef poly_int<N, wi::hwi_with_prec> T; + typedef poly_helper<T> ph; + + poly_int<N, wide_int> mult; + mult = ph::make (wi::uhwi (80, 16), + wi::uhwi (-10, 16), + wi::uhwi (70, 16)) * 3; + ASSERT_MUST_EQ (mult, ph::make (wi::uhwi (240, 16), + wi::uhwi (-30, 16), + wi::uhwi (210, 16))); +} + +/* Test known_size_p for non-polynomial T. */ + +template<typename T> +static void +test_nonpoly_known_size_p () +{ + ASSERT_TRUE (known_size_p (T (0))); + ASSERT_TRUE (known_size_p (T (1))); + ASSERT_TRUE (known_size_p (T (2))); + ASSERT_FALSE (known_size_p (T (-1))); +} + +/* Test poly-int.h operations on non-polynomial type T. */ + +template<typename T> +static void +test_nonpoly_type () +{ + test_nonpoly_known_size_p<T> (); +} + +/* Test poly-int.h operations on non-polynomial values. */ + +static void +test_nonpoly () +{ + test_nonpoly_type<unsigned char> (); + test_nonpoly_type<unsigned short> (); + test_nonpoly_type<int> (); + test_nonpoly_type<unsigned int> (); + test_nonpoly_type<HOST_WIDE_INT> (); + test_nonpoly_type<unsigned HOST_WIDE_INT> (); + test_nonpoly_type<offset_int> (); + test_nonpoly_type<widest_int> (); +} + +/* Test things that work for all poly_int-based types T, given that T + has N coefficients of type C. RC is the type to which C promotes + after an operator. */ + +template<unsigned int N, typename C, typename RC, typename T> +static void +test_general () +{ + test_poly_int_traits<N, C, T> (); + test_constants<N, C, T> (); + test_plus_equals<N, C, T> (); + test_minus_equals<N, C, T> (); + test_times_equals<N, C, T> (); + test_shl_equals<N, C, T> (); + test_is_constant<N, C, T> (); + test_to_constant<N, C, T> (); + test_addition<N, C, T> (); + test_subtraction<N, C, RC, T> (); + test_negation<N, C, RC, T> (); + test_multiplication<N, C, T> (); + test_shift_left<N, C, T> (); + test_may_ne<N, C, T> (); + test_must_eq<N, C, T> (); + test_can_align_p<N, C, T> (); + test_can_align_up<N, C, T> (); + test_can_align_down<N, C, T> (); + test_known_equal_after_align_up<N, C, T> (); + test_known_equal_after_align_down<N, C, T> (); + test_force_align_up<N, C, T> (); + test_force_align_down<N, C, T> (); + test_aligned_lower_bound<N, C, T> (); + test_aligned_upper_bound<N, C, T> (); + test_known_misalignment<N, C, T> (); + test_force_get_misalignment<N, C, T> (); + test_known_alignment<N, C, T> (); + test_can_ior_p<N, C, T> (); + test_known_size_p<N, C, T> (); +} + +/* Test things that work for poly_int<2, C>, given that C is signed. */ + +template<typename C> +static void +test_ordered_2 () +{ + test_may_eq_2<C> (); + test_must_ne_2<C> (); +} + +/* Test things that work for poly_int-based types T, given that the + coefficient type C supports all the normal C operators. N is the + number of coefficients in C and RC is the type to which C promotes + after an operator. */ + +template<unsigned int N, typename C, typename RC, typename T> +static void +test_ordered () +{ + test_general<N, C, RC, T> (); + test_may_le<N, C, T> (); + test_may_lt<N, C, T> (); + test_may_ge<N, C, T> (); + test_may_gt<N, C, T> (); + test_must_gt<N, C, T> (); + test_must_ge<N, C, T> (); + test_must_lt<N, C, T> (); + test_must_le<N, C, T> (); + test_ordered_p<N, C, T> (); + test_ordered_min<N, C, T> (); + test_ordered_max<N, C, T> (); + test_constant_lower_bound<N, C, T> (); + test_lower_bound<N, C, T> (); + test_upper_bound<N, C, T> (); + test_compare_sizes_for_sort<N, C, T> (); + test_force_align_up_and_div<N, C, T> (); + test_force_align_down_and_div<N, C, T> (); + test_constant_multiple_p<N, C, T> (); + test_multiple_p<N, C, T> (); + test_multiple_p_with_result<N, C, T> (); + test_exact_div<N, C, T> (); + test_can_div_trunc_p_const<N, C, T> (); + test_can_div_trunc_p_poly<N, C, T> (); + test_can_div_away_from_zero_p<N, C, T> (); + test_maybe_in_range_p<N, C, T> (); + test_known_in_range_p<N, C, T> (); + test_ranges_may_overlap_p<N, C, T> (); + test_ranges_must_overlap_p<N, C, T> (); + test_known_subrange_p<N, C, T> (); + test_coeffs_in_range_p<N, C, T> (); +} + +/* Test things that work for poly_int<2, C>, given that C is signed. */ + +template<typename C> +static void +test_signed_2 () +{ + test_ordered_2<C> (); + test_signed_may_eq_2<C> (); + test_signed_must_ne_2<C> (); +} + +/* Test things that work for poly_int-based types T, given that the + coefficient type C is signed. N is the number of coefficients in C + and RC is the type to which C promotes after an operator. */ + +template<unsigned int N, typename C, typename RC, typename T> +static void +test_signed () +{ + test_ordered<N, C, RC, T> (); + test_signed_negation<N, C, RC, T> (); + test_signed_may_le<N, C, T> (); + test_signed_may_lt<N, C, T> (); + test_signed_may_ge<N, C, T> (); + test_signed_may_gt<N, C, T> (); + test_signed_must_gt<N, C, T> (); + test_signed_must_ge<N, C, T> (); + test_signed_must_lt<N, C, T> (); + test_signed_must_le<N, C, T> (); + test_signed_ordered_p<N, C, T> (); + test_signed_ordered_min<N, C, T> (); + test_signed_ordered_max<N, C, T> (); + test_signed_lower_bound<N, C, T> (); + test_signed_upper_bound<N, C, T> (); + test_signed_constant_multiple_p<N, C, T> (); + test_signed_multiple_p<N, C, T> (); + test_signed_multiple_p_with_result<N ,C, T> (); + test_signed_exact_div<N, C, T> (); + test_signed_can_div_trunc_p_const<N, C, T> (); + test_signed_can_div_trunc_p_poly<N, C, T> (); + test_signed_can_div_away_from_zero_p<N, C, T> (); + test_signed_maybe_in_range_p<N, C, T> (); +} + +/* Test things that work for poly_int-based types T, given that the + coefficient type C is unsigned. N is the number of coefficients in C + and RC is the type to which C promotes after an operator. */ + +template<unsigned int N, typename C, typename RC, typename T> +static void +test_unsigned () +{ + test_ordered<N, C, RC, T> (); + test_unsigned_may_le<N, C, T> (); + test_unsigned_may_lt<N, C, T> (); + test_unsigned_may_ge<N, C, T> (); + test_unsigned_may_gt<N, C, T> (); + test_unsigned_must_gt<N, C, T> (); + test_unsigned_must_ge<N, C, T> (); + test_unsigned_must_lt<N, C, T> (); + test_unsigned_must_le<N, C, T> (); + test_unsigned_ordered_p<N, C, T> (); + test_unsigned_ordered_min<N, C, T> (); + test_unsigned_ordered_max<N, C, T> (); + test_unsigned_lower_bound<N, C, T> (); + test_unsigned_upper_bound<N, C, T> (); + test_unsigned_maybe_in_range_p<N, C, T> (); + test_unsigned_known_in_range_p<N, C, T> (); +} + +/* Test things that are specific to coefficients of type wide_int, + using a poly_int with N coefficients. */ + +template<unsigned int N> +static void +test_wide_int () +{ + test_wide_int_from<N> (); + + test_to_shwi<N> (wi::mask (63, true, 77), -1, HOST_WIDE_INT_MIN); + test_to_shwi<N> (wi::mask (63, false, 77), 1, HOST_WIDE_INT_MAX); + test_to_uhwi<N> (wide_int (wi::zero (94)), -1, 0U); + test_to_uhwi<N> (wi::mask (64, false, 94), 1, HOST_WIDE_INT_M1U); + + test_force_hwi<N> (wi::mask (66, false, 81)); + + test_wide_int_sext<N> (); + test_wide_int_zext<N> (); + test_wide_int_add<N> (); + test_wide_int_sub<N> (); + test_wide_int_mul<N> (); + test_wide_int_neg<N> (); +} + +/* Run the tests that are common to all coefficient counts N. */ + +template<unsigned int N> +static void +test_num_coeffs_core () +{ + test_unsigned<N, unsigned short, HOST_WIDE_INT, + poly_int<N, unsigned short> > (); + test_signed<N, HOST_WIDE_INT, HOST_WIDE_INT, + poly_int<N, HOST_WIDE_INT> > (); + test_unsigned<N, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, + poly_int<N, unsigned HOST_WIDE_INT> >(); + + test_general<N, wide_int, wide_int, poly_int<N, wide_int> > (); + + test_hwi<N, unsigned short, poly_int<N, unsigned short> > (); + test_hwi<N, HOST_WIDE_INT, poly_int<N, HOST_WIDE_INT> > (); + test_hwi<N, unsigned HOST_WIDE_INT, poly_int<N, unsigned HOST_WIDE_INT> > (); + + test_wide_int<N> (); + test_fixed_int<N, offset_int> (); + test_fixed_int<N, widest_int> (); + + test_type_promotions<N> (); + test_shwi<N> (); + test_uhwi<N> (); +} + +/* Run extra tests for the most important coefficient counts N. */ + +template<unsigned int N> +static void +test_num_coeffs_extra () +{ + /* Test the most common POD types. */ + test_unsigned<N, unsigned short, HOST_WIDE_INT, + poly_int_pod<N, unsigned short> > (); + test_signed<N, HOST_WIDE_INT, HOST_WIDE_INT, + poly_int_pod<N, HOST_WIDE_INT> > (); + test_unsigned<N, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, + poly_int_pod<N, unsigned HOST_WIDE_INT> > (); + + /* Test some coefficient types that weren't covered in the core tests. */ + test_signed<N, int, HOST_WIDE_INT, + poly_int<N, int> > (); + test_signed<N, offset_int, offset_int, + poly_int<N, offset_int> > (); + test_signed<N, widest_int, widest_int, + poly_int<N, widest_int> > (); +} diff --git a/gcc/testsuite/gcc.dg/pr78768.c b/gcc/testsuite/gcc.dg/pr78768.c index b6cda47c6a8..72ac3f87129 100644 --- a/gcc/testsuite/gcc.dg/pr78768.c +++ b/gcc/testsuite/gcc.dg/pr78768.c @@ -9,7 +9,7 @@ int main (void) { char *d = (char *)__builtin_alloca (12); /* { dg-warning "argument to .alloca. is too large" } */ - __builtin_sprintf (d, "%32s", "x"); /* { dg-warning "directive writing 32 bytes into a region of size 12" "-Wformat-overflow" { xfail *-*-* } } */ + __builtin_sprintf (d, "%32s", "x"); /* { dg-warning "directive writing 32 bytes into a region of size 12" "-Wformat-overflow" } */ return 0; } diff --git a/gcc/testsuite/gcc.dg/sancov/cmp0.c b/gcc/testsuite/gcc.dg/sancov/cmp0.c index 3a17de91ce2..36f55aae205 100644 --- a/gcc/testsuite/gcc.dg/sancov/cmp0.c +++ b/gcc/testsuite/gcc.dg/sancov/cmp0.c @@ -1,6 +1,7 @@ /* Basic test on number of inserted callbacks. */ /* { dg-do compile } */ /* { dg-options "-fsanitize-coverage=trace-cmp -fdump-tree-optimized" } */ +/* { dg-skip-if "" { aarch64*-*-* } { "-O0" } { "" } } */ void foo (char *a, short *b, int *c, long long *d, float *e, double *f) diff --git a/gcc/testsuite/gcc.dg/tree-prof/section-attr-1.c b/gcc/testsuite/gcc.dg/tree-prof/section-attr-1.c index ee6662ea6e5..96b88e0fc9a 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/section-attr-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/section-attr-1.c @@ -2,8 +2,12 @@ all later functions from being partitioned into hot and cold blocks. */ /* { dg-require-effective-target freorder } */ /* { dg-options "-O2 -fno-profile-reorder-functions -freorder-blocks-and-partition -save-temps" } */ +/* { dg-options "-O2 -fno-profile-reorder-functions -freorder-blocks-and-partition -save-temps -DN=20000" { target simulator } } */ #define SIZE 10000 +#ifndef N +#define N 1000000 +#endif #define NOINLINE __attribute__((noinline)) __attribute__ ((noclone)) @@ -20,7 +24,7 @@ main (int argc, char *argv[]) int i; buf_hot = "hello"; buf_cold = "world"; - for (i = 0; i < 1000000; i++) + for (i = 0; i < N; i++) foo (argc); return 0; } diff --git a/gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c b/gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c index 6a9af083a9a..414dc092c0d 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c @@ -37,4 +37,4 @@ int main () } /* autofdo cannot do that precise execution numbers */ /* { dg-final-use-not-autofdo { scan-rtl-dump-times ";; basic block\[^\\n\]*count 4000" 2 "expand"} } */ -/* { dg-final-use-not-autofdo { scan-rtl-dump-times ";; basic block\[^\\n\]*count 2000" 1 "expand"} } */ +/* { dg-final-use-not-autofdo { scan-rtl-dump-times ";; basic block\[^\\n\]*count 2000" 1 "expand" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c index f392fbeb6e6..d28029a1083 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -Warray-bounds -fno-tree-vectorize -fdump-tree-cunroll-details" } */ int a[3]; int b[4]; int diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c index dce6ad57a04..b437518487d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c @@ -19,7 +19,7 @@ int bla(void) } /* Since the loop is removed, there should be no addition. */ -/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ /* The if from the loop header copying remains in the code. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c index 40a1f3a2132..36f3ea063ea 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details" } */ struct foo {int b; int a[3];} foo; void add(struct foo *a,int l) { diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr23294.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23294.c index 8f9fffb1647..f6b51907d6c 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr23294.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23294.c @@ -31,6 +31,6 @@ int f6(int a, int b) return 6*a - 2*b; } -/* { dg-final { scan-tree-dump-times "a_..D. \\\* 5" 3 "optimized" } } */ -/* { dg-final { scan-tree-dump-times " \\\* 2" 3 "optimized" } } */ -/* { dg-final { scan-tree-dump-not "\\\* 6" "optimized" } } */ +/* { dg-final { scan-tree-dump-times "a_..D. \\\* 5" 3 "optimized" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times " \\\* 2" 3 "optimized" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-not "\\\* 6" "optimized" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr63586-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr63586-2.c index 0dcfe327358..578ca10fb38 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr63586-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr63586-2.c @@ -28,5 +28,5 @@ int f1_int (int x) /* { dg-final { scan-tree-dump-times "\\\* 8\\\.0e\\\+0" 1 "reassoc1" } } */ /* { dg-final { scan-tree-dump-times "\\\* 5\\\.0e\\\+0" 1 "reassoc1" } } */ -/* { dg-final { scan-tree-dump-times "\\\* 6" 1 "reassoc1" } } */ +/* { dg-final { scan-tree-dump-times "\\\* 6" 1 "reassoc1" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c index e402bf92b75..6c4fca79fa4 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-10.c @@ -18,5 +18,5 @@ foo (signed char s, signed char l) } /* Address of array reference is scev. */ -/* { dg-final { scan-tree-dump-times " Type:\\tADDRESS\n Use \[0-9\].\[0-9\]:" 1 "ivopts" } } */ +/* { dg-final { scan-tree-dump-times " Type:\\tREFERENCE ADDRESS\n Use \[0-9\].\[0-9\]:" 1 "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c index 088771ebe40..c665fd7f20d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c @@ -23,4 +23,4 @@ foo (int n) } /* Address of array reference to b is scev. */ -/* { dg-final { scan-tree-dump-times " Type:\\tADDRESS\n Use \[0-9\].\[0-9\]:" 2 "ivopts" } } */ +/* { dg-final { scan-tree-dump-times " Type:\\tREFERENCE ADDRESS\n Use \[0-9\].\[0-9\]:" 2 "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c index c112639d4cb..f598c7b2955 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c @@ -24,4 +24,4 @@ foo (int x, int n) } /* Address of array reference to b is not scev. */ -/* { dg-final { scan-tree-dump-times " Type:\\tADDRESS\n Use \[0-9\].\[0-9\]:" 1 "ivopts" } } */ +/* { dg-final { scan-tree-dump-times " Type:\\tREFERENCE ADDRESS\n Use \[0-9\].\[0-9\]:" 1 "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c index b11b2f1b482..2863918a687 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-9.c @@ -18,5 +18,5 @@ foo (unsigned char s, unsigned char l) } /* Address of array reference is scev. */ -/* { dg-final { scan-tree-dump-times " Type:\\tADDRESS\n Use \[0-9\].\[0-9\]:" 1 "ivopts" } } */ +/* { dg-final { scan-tree-dump-times " Type:\\tREFERENCE ADDRESS\n Use \[0-9\].\[0-9\]:" 1 "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c index a660e82bed2..98b9a73bf70 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c @@ -25,4 +25,4 @@ foo () but the loop reads only one element at a time, and DOM cannot resolve these. The same happens on powerpc depending on the SIMD support available. */ -/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* } || { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* } || { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp101.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp101.c index c9feb256857..aad41f91f47 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp101.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp101.c @@ -10,4 +10,4 @@ int main () return 0; } -/* { dg-final { scan-tree-dump "<bb 2> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\[\n\r \]*return 0;" "optimized" } } */ +/* { dg-final { scan-tree-dump "<bb 2> \\\[\[0-9.\]+%\\\] \\\[count: \[0-9INV\]*\\\]:\[\n\r \]*return 0;" "optimized" { xfail aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect-opt-info-1.c b/gcc/testsuite/gcc.dg/vect-opt-info-1.c new file mode 100644 index 00000000000..913b0138085 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect-opt-info-1.c @@ -0,0 +1,11 @@ +/* { dg-options "-std=c99 -fopt-info -O3" } */ + +void +vadd (int *dst, int *op1, int *op2, int count) +{ + for (int i = 0; i < count; ++i) + dst[i] = op1[i] + op2[i]; +} + +/* { dg-message "loop vectorized" "" { target *-*-* } 6 } */ +/* { dg-message "loop versioned for vectorization because of possible aliasing" "" { target *-*-* } 6 } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-1.c index 8baba4d5f9f..f64514eeb84 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-1.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-1.c @@ -25,9 +25,7 @@ main1 (int dummy) *pout++ = *pin++; *pout++ = *pin++; *pout++ = *pin++; - /* Avoid loop vectorization. */ - if (dummy == 32) - abort (); + asm volatile ("" ::: "memory"); } /* check results: */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-24.c b/gcc/testsuite/gcc.dg/vect/bb-slp-24.c index d0c12426c88..d5b6bfbc4de 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-24.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-24.c @@ -9,7 +9,7 @@ short src[N], dst[N]; void foo (short * __restrict__ dst, short * __restrict__ src, int h, - int stride, int dummy) + int stride) { int i; h /= 8; @@ -25,8 +25,7 @@ void foo (short * __restrict__ dst, short * __restrict__ src, int h, dst[7] += A*src[7]; dst += stride; src += stride; - if (dummy == 32) - abort (); + asm volatile ("" ::: "memory"); } } @@ -43,7 +42,7 @@ int main (void) src[i] = i; } - foo (dst, src, N, 8, 0); + foo (dst, src, N, 8); for (i = 0; i < N; i++) { diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-25.c b/gcc/testsuite/gcc.dg/vect/bb-slp-25.c index 14314c283f0..ec31329f353 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-25.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-25.c @@ -9,7 +9,8 @@ short src[N], dst[N]; -void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) +void foo (short * __restrict__ dst, short * __restrict__ src, int h, + int stride) { int i; h /= 16; @@ -25,8 +26,7 @@ void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, dst[7] += A*src[7] + src[7+stride]; dst += 8; src += 8; - if (dummy == 32) - abort (); + asm volatile ("" ::: "memory"); } } @@ -43,7 +43,7 @@ int main (void) src[i] = i; } - foo (dst, src, N, 8, 0); + foo (dst, src, N, 8); for (i = 0; i < N/2; i++) { diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-26.c b/gcc/testsuite/gcc.dg/vect/bb-slp-26.c index 071c25372de..91b6cacd4c5 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-26.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-26.c @@ -10,8 +10,7 @@ char src[N], dst[N]; void __attribute__((noinline,noclone)) -foo (char * __restrict__ dst, char * __restrict__ src, int h, - int stride, int dummy) +foo (char * __restrict__ dst, char * __restrict__ src, int h, int stride) { int i; h /= 16; @@ -27,8 +26,7 @@ foo (char * __restrict__ dst, char * __restrict__ src, int h, dst[7] += A*src[7] + src[7+stride]; dst += 8; src += 8; - if (dummy == 32) - abort (); + asm volatile ("" ::: "memory"); } } @@ -45,7 +43,7 @@ int main (void) src[i] = i/8; } - foo (dst, src, N, 8, 0); + foo (dst, src, N, 8); for (i = 0; i < N/2; i++) { diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-29.c b/gcc/testsuite/gcc.dg/vect/bb-slp-29.c index 7a622ae44b9..747896b034b 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-29.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-29.c @@ -9,7 +9,8 @@ short src[N], dst[N]; -void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy) +void foo (short * __restrict__ dst, short * __restrict__ src, int h, + int stride) { int i; h /= 16; @@ -25,8 +26,7 @@ void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, dst[7] = A*src[7] + B*src[8]; dst += stride; src += stride; - if (dummy == 32) - abort (); + asm volatile ("" ::: "memory"); } } @@ -43,7 +43,7 @@ int main (void) src[i] = i; } - foo (dst, src, N, 8, 0); + foo (dst, src, N, 8); for (i = 0; i < N/2; i++) { diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c index ddad85374b6..4bd286bf08c 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c @@ -28,7 +28,10 @@ int main () check_vect (); for (i = 0; i < N; i++) - a[i] = i; + { + a[i] = i; + asm volatile ("" ::: "memory"); + } foo (a, 4); @@ -42,10 +45,6 @@ int main () return 0; } -/* Basic blocks of if-converted loops are vectorized from within the loop - vectorizer pass. In this case it is really a deficiency in loop - vectorization data dependence analysis that causes us to require - basic block vectorization in the first place. */ - -/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "vect" { target vect_element_align } } } */ +/* { dg-final { scan-tree-dump {(no need for alias check [^\n]* when VF is 1|no alias between [^\n]* when [^\n]* is outside \(-16, 16\))} "vect" { target vect_element_align } } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized" 1 "vect" { target vect_element_align } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c index 0e4f1a71b6b..e78dc46611e 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c @@ -59,4 +59,6 @@ int main() /* We should also be able to use 2-lane SLP to initialize the real and imaginary components in the first loop of main. */ -/* { dg-final { scan-tree-dump-times "basic block vectorized" 2 "slp1" } } */ +/* For targets with gather/scatter we can vectorize the unrolled loop + directly, before SLP runs. That's probably a pessimisation though. */ +/* { dg-final { scan-tree-dump-times "basic block vectorized" 2 "slp1" { xfail vect_gather_scatter } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c index 9f1e71a3db1..85f9a02582f 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c @@ -2,11 +2,21 @@ /* { dg-additional-options "-O3" } */ /* { dg-require-effective-target vect_unpack } */ +#include "tree-vect.h" + +#if VECTOR_BITS > 512 +#define N (VECTOR_BITS * 10 / 16) +#else +#define N 320 +#endif + void foo(unsigned *p1, unsigned short *p2) { int n; - for (n = 0; n < 320; n++) + for (n = 0; n < N; n++) p1[n] = p2[n * 2]; } -/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" } } */ +/* Disable for SVE because for long or variable-length vectors we don't + get an unrolled epilogue loop. */ +/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c b/gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c index 7c7acd5bab6..99ecb793973 100644 --- a/gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c +++ b/gcc/testsuite/gcc.dg/vect/fast-math-slp-38.c @@ -18,4 +18,5 @@ foo (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* Requires VF <= 4. */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { aarch64_sve && { ! vect256 } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c index def8efb20c4..a9a8b864e66 100644 --- a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c +++ b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c @@ -34,4 +34,4 @@ int main (void) } /* Requires fast-math. */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_ieee_add_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-7.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-7.c index 224148d2b1e..f56bd2e50af 100644 --- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-7.c +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-7.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 256 +#define N (VECTOR_BITS / 16) +#else #define N 16 +#endif unsigned short in[N]; unsigned short coef[N]; diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c index ccebb711306..fe9e7e7ab40 100644 --- a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c @@ -52,5 +52,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c index 3bc73077b1f..50b4998bb6c 100644 --- a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 256 +#define N (VECTOR_BITS / 16 + 10) +#else #define N 26 +#endif __attribute__ ((noinline)) unsigned int main1 () diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c index c7b8b0eeff2..c3b242157ce 100644 --- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c +++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c @@ -4,7 +4,12 @@ #include <stdarg.h> #include "tree-vect.h" +/* N / 2 bytes has to be worth vectorizing even with peeling. */ +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 4 / 8) +#else #define N 64 +#endif struct t{ int k[N]; @@ -89,4 +94,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-36.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-36.c index 0fb9594ea16..7663ca7281a 100644 --- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-36.c +++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-36.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 8) +#else #define N 32 +#endif struct { char ca[N]; diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c index fc52c92b89a..470bbfb5537 100644 --- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c +++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c @@ -4,12 +4,18 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 16 +#if VECTOR_BITS > 128 +#define NINTS (VECTOR_BITS / 32) +#else +#define NINTS 4 +#endif + +#define N (NINTS * 4) int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; -int ia[N][4][N+1]; -int ic[N][N][3][2*N+2]; -int id[N][N][N+4]; +int ia[N][NINTS][N + 1]; +int ic[N][N][NINTS - 1][2 * N + 2]; +int id[N][N][N + NINTS]; __attribute__ ((noinline)) int main1 () @@ -85,4 +91,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c index 1458ba60426..f7ad45b4f7a 100644 --- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c +++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c @@ -5,7 +5,13 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 24 +#if VECTOR_BITS > 128 +#define NINTS (VECTOR_BITS / 32) +#else +#define NINTS 4 +#endif + +#define N (NINTS * 6) struct s{ int m; @@ -19,8 +25,7 @@ struct s2{ struct test1{ struct s a; /* array a.n is unaligned */ - int b; - int c; + int pad[NINTS - 2]; struct s e; /* array e.n is aligned */ }; @@ -54,13 +59,13 @@ int main1 () } /* 2. aligned */ - for (i = 3; i < N-1; i++) + for (i = NINTS - 1; i < N - 1; i++) { tmp1[2].a.n[1][2][i] = 6; } /* check results: */ - for (i = 3; i < N-1; i++) + for (i = NINTS; i < N - 1; i++) { if (tmp1[2].a.n[1][2][i] != 6) abort (); @@ -86,18 +91,18 @@ int main1 () } /* 4. unaligned (unknown misalignment) */ - for (i = 0; i < N-4; i++) + for (i = 0; i < N - NINTS; i++) { - for (j = 0; j < N-4; j++) + for (j = 0; j < N - NINTS; j++) { tmp2[2].e.n[1][i][j] = 8; } } /* check results: */ - for (i = 0; i < N-4; i++) + for (i = 0; i < N - NINTS; i++) { - for (j = 0; j < N-4; j++) + for (j = 0; j < N - NINTS; j++) { if (tmp2[2].e.n[1][i][j] != 8) abort (); diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c index 8548f8a6f15..91eb28218bd 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c @@ -46,5 +46,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { target { ! vect_multiple_sizes } } } } */ -/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump "can't determine dependence" "vect" { target vect_multiple_sizes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c index 53a427d8d78..51f62788dbf 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c @@ -14,7 +14,6 @@ struct extraction static int a[N] = {1,2,3,4,5,6,7,8,9}; static int b[N] = {2,3,4,5,6,7,8,9,9}; -volatile int foo; __attribute__ ((noinline)) int main1 (int x, int y) { @@ -24,9 +23,8 @@ int main1 (int x, int y) { for (i = 0; i < N; i++) { - p->a[i] = a[i]; - if (foo == 135) - abort (); /* to avoid vectorization */ + p->a[i] = a[i]; + asm volatile ("" ::: "memory"); } /* Not vectorizable: distance 1. */ @@ -48,11 +46,10 @@ int main (void) { check_vect (); - foo = 0; return main1 (0, N); } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { target { ! vect_multiple_sizes } } } } */ -/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump "possible dependence between data-refs" "vect" { target vect_multiple_sizes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c index c81a93b5683..581438823fd 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c @@ -14,7 +14,6 @@ struct extraction static int a[N] = {1,2,3,4,5,6,7,8,9}; static int b[N] = {2,3,4,5,6,7,8,9,9}; -volatile int foo; __attribute__ ((noinline)) int main1 (int x, int y) { @@ -24,9 +23,8 @@ int main1 (int x, int y) { for (i = 0; i < N; i++) { - p->a[i] = a[i]; - if (foo == 135) - abort (); /* to avoid vectorization */ + p->a[i] = a[i]; + asm volatile ("" ::: "memory"); } /* Not vectorizable: distance 1. */ @@ -48,11 +46,10 @@ int main (void) { check_vect (); - foo = 0; return main1 (0, N); } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { target { ! vect_multiple_sizes } } } } */ -/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump "possible dependence between data-refs" "vect" { target vect_multiple_sizes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c index 739757d1f44..6f4c84b4cd2 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c @@ -59,4 +59,4 @@ int main (void) prevent vectorization on some targets. */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target { ! vect_multiple_sizes } } } } */ -/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump "can't determine dependence" "vect" { target vect_multiple_sizes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c index 8d1c01736f7..6e9ddcfa5ce 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c @@ -47,4 +47,4 @@ int main (void) prevent vectorization on some targets. */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { target { ! vect_multiple_sizes } } } } */ -/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump "can't determine dependence" "vect" { target vect_multiple_sizes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c index 227116306bc..1880d1edb32 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c @@ -51,4 +51,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c index cd0eb844b21..e5914d970e3 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c @@ -183,4 +183,4 @@ int main () } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" {xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr42709.c b/gcc/testsuite/gcc.dg/vect/pr42709.c index 22c541937f5..77818c4f0dd 100644 --- a/gcc/testsuite/gcc.dg/vect/pr42709.c +++ b/gcc/testsuite/gcc.dg/vect/pr42709.c @@ -9,7 +9,7 @@ int *res[N]; int -main1 (int *a, int *b, int *c, int *d, int dummy) +main1 (int *a, int *b, int *c, int *d) { int i; @@ -19,8 +19,7 @@ main1 (int *a, int *b, int *c, int *d, int dummy) res[i+1] = b + 16; res[i+2] = c + 16; res[i+3] = d + 16; - if (dummy == 32) - abort (); + asm volatile ("" ::: "memory"); } } diff --git a/gcc/testsuite/gcc.dg/vect/pr45752.c b/gcc/testsuite/gcc.dg/vect/pr45752.c index 5e5f45555c4..755205b275a 100644 --- a/gcc/testsuite/gcc.dg/vect/pr45752.c +++ b/gcc/testsuite/gcc.dg/vect/pr45752.c @@ -35,7 +35,11 @@ #define M34 7716 #define M44 16 +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 5 / 32) +#else #define N 20 +#endif void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput, @@ -77,12 +81,6 @@ void foo (unsigned int *__restrict__ pInput, int main (int argc, const char* argv[]) { unsigned int input[N], output[N], i, input2[N], output2[N]; - unsigned int check_results[N] - = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, - 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619 }; - unsigned int check_results2[N] - = {7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243, - 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463 }; check_vect (); @@ -95,6 +93,57 @@ int main (int argc, const char* argv[]) __asm__ volatile (""); } +#if N == 20 + unsigned int check_results[N] + = { 3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, + 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, + 31619 }; + unsigned int check_results2[N] + = { 7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243, + 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, + 35463 }; +#else + volatile unsigned int check_results[N]; + volatile unsigned int check_results2[N]; + + for (i = 0; i < N / 5; i++) + { + unsigned int a = input[i * 5]; + unsigned int b = input[i * 5 + 1]; + unsigned int c = input[i * 5 + 2]; + unsigned int d = input[i * 5 + 3]; + unsigned int e = input[i * 5 + 4]; + + check_results[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e; + check_results[i * 5 + 1] = (M10 * a + M11 * b + M12 * c + + M13 * d + M14 * e); + check_results[i * 5 + 2] = (M20 * a + M21 * b + M22 * c + + M23 * d + M24 * e); + check_results[i * 5 + 3] = (M30 * a + M31 * b + M32 * c + + M33 * d + M34 * e); + check_results[i * 5 + 4] = (M40 * a + M41 * b + M42 * c + + M43 * d + M44 * e); + + a = input2[i * 5]; + b = input2[i * 5 + 1]; + c = input2[i * 5 + 2]; + d = input2[i * 5 + 3]; + e = input2[i * 5 + 4]; + + check_results2[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e; + check_results2[i * 5 + 1] = (M10 * a + M11 * b + M12 * c + + M13 * d + M14 * e); + check_results2[i * 5 + 2] = (M20 * a + M21 * b + M22 * c + + M23 * d + M24 * e); + check_results2[i * 5 + 3] = (M30 * a + M31 * b + M32 * c + + M33 * d + M34 * e); + check_results2[i * 5 + 4] = (M40 * a + M41 * b + M42 * c + + M43 * d + M44 * e); + + asm volatile ("" ::: "memory"); + } +#endif + foo (input, output, input2, output2); for (i = 0; i < N; i++) @@ -109,4 +158,4 @@ int main (int argc, const char* argv[]) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "gaps requires scalar epilogue loop" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_gather_scatter && { ! vect_perm5_int } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr53773.c b/gcc/testsuite/gcc.dg/vect/pr53773.c index 1bee98b75d9..9f59f0e7a51 100644 --- a/gcc/testsuite/gcc.dg/vect/pr53773.c +++ b/gcc/testsuite/gcc.dg/vect/pr53773.c @@ -14,5 +14,6 @@ foo (int integral, int decimal, int power_ten) return integral+decimal; } -/* { dg-final { scan-tree-dump-times "\\* 10" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\* 10" 2 "optimized" { target { { ! vect_fully_masked } || vect_variable_length } } } } */ +/* { dg-final { scan-tree-dump-times "\\* 10" 0 "optimized" { target { vect_fully_masked && { ! vect_variable_length } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65310.c b/gcc/testsuite/gcc.dg/vect/pr65310.c index 14053800ab8..57e90d45c9f 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65310.c +++ b/gcc/testsuite/gcc.dg/vect/pr65310.c @@ -18,5 +18,5 @@ int t(b *a) /* The memory access is through a pointer of type c which means *ptr is not aligned. */ -/* { dg-final { scan-tree-dump "can't force alignment" "vect" } } */ -/* { dg-final { scan-tree-dump-not "misalign = 0" "vect" } } */ +/* { dg-final { scan-tree-dump "can't force alignment" "vect" { xfail vect_element_align_preferred } } } */ +/* { dg-final { scan-tree-dump-not "misalign = 0" "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65518.c b/gcc/testsuite/gcc.dg/vect/pr65518.c index b194c9899ce..3e5b986183c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65518.c +++ b/gcc/testsuite/gcc.dg/vect/pr65518.c @@ -1,10 +1,19 @@ #include "tree-vect.h"
+#if VECTOR_BITS > 256
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 8
+#endif
+
+#define N (NINTS * 2)
+#define RESULT (NINTS * (NINTS - 1) / 2 * N + NINTS)
+
extern void abort (void);
typedef struct giga
{
- unsigned int g[16];
+ unsigned int g[N];
} giga;
unsigned long __attribute__((noinline,noclone))
@@ -19,17 +28,17 @@ addfst(giga const *gptr, int num) int main ()
{
- struct giga g[8];
+ struct giga g[NINTS];
unsigned int n = 1;
int i, j;
check_vect ();
- for (i = 0; i < 8; ++i)
- for (j = 0; j < 16; ++j)
+ for (i = 0; i < NINTS; ++i)
+ for (j = 0; j < N; ++j)
{
g[i].g[j] = n++;
__asm__ volatile ("");
}
- if (addfst (g, 8) != 456)
+ if (addfst (g, NINTS) != RESULT)
abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c index 9072f11a104..bb886137dfd 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c @@ -41,4 +41,4 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c index 321cb8c9211..6016cefc6a1 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c @@ -42,5 +42,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c index 8e2c46f1a6b..973105cc251 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c @@ -42,4 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c index 061777af34c..f0735072df0 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c @@ -42,4 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c index a28e80bb9fc..c118f2a1b6e 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c @@ -1,3 +1,4 @@ +/* { dg-do run { xfail { ! vect_last_reduc } } } */ /* { dg-require-effective-target vect_condition } */ #include "tree-vect.h" @@ -41,4 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction based on integer induction" 4 "vect" { target { ! vect_last_reduc } } } }*/ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c index d72fffa6720..0dbf9e5622b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c @@ -42,4 +42,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c index 98945ba505d..ba5b9c2c76a 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c @@ -52,4 +52,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c index 695889d743b..a6f92d9757c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c @@ -41,5 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c index 04d9b38d58a..709f17f80a4 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c @@ -4,7 +4,11 @@ extern void abort (void) __attribute__ ((noreturn)); +#if VECTOR_BITS > 256 +#define N (VECTOR_BITS / 8) +#else #define N 32 +#endif /* Condition reduction where loop size is not known at compile time. Will fail to vectorize. Version inlined into main loop will vectorize. */ @@ -30,6 +34,11 @@ main (void) 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 }; + for (int i = 32; i < N; ++i) + { + a[i] = 70 + (i & 3); + asm volatile ("" ::: "memory"); + } check_vect (); @@ -41,6 +50,8 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ -/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_last_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c index caa4a14120a..7a93326582b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c @@ -41,4 +41,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 4 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c index e8f20aabbdd..8ef154d1751 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c @@ -45,5 +45,8 @@ main () return 0; } -/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ -/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_last_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_last_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB" 2 "vect" { target vect_last_reduc } } } */ +/* { dg-final { scan-tree-dump-not "Optimizing condition reduction" "vect" { target { ! vect_last_reduc } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr68445.c b/gcc/testsuite/gcc.dg/vect/pr68445.c index e2b0c01c2af..15bffdc7e05 100644 --- a/gcc/testsuite/gcc.dg/vect/pr68445.c +++ b/gcc/testsuite/gcc.dg/vect/pr68445.c @@ -16,4 +16,4 @@ void IMB_double_fast_x (int *destf, int *dest, int y, int *p1f) } } -/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { xfail vect_variable_length } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr79920.c b/gcc/testsuite/gcc.dg/vect/pr79920.c index c066b91e73f..b2640e83091 100644 --- a/gcc/testsuite/gcc.dg/vect/pr79920.c +++ b/gcc/testsuite/gcc.dg/vect/pr79920.c @@ -41,4 +41,5 @@ int main() return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_double && { ! vect_ieee_add_reduc } } && { vect_perm && vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { vect_double && vect_ieee_add_reduc } && { vect_perm && vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr81136.c b/gcc/testsuite/gcc.dg/vect/pr81136.c index c67357684df..0ae4a4d4d5b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr81136.c +++ b/gcc/testsuite/gcc.dg/vect/pr81136.c @@ -1,6 +1,8 @@ /* { dg-do compile } */ -struct __attribute__((aligned (32))) +#include "tree-vect.h" + +struct __attribute__((aligned (VECTOR_BITS / 8))) { char misaligner; int foo[100]; diff --git a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-69.c b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-69.c index 26bcf4b0d76..e3466d0da1d 100644 --- a/gcc/testsuite/gcc.dg/vect/section-anchors-vect-69.c +++ b/gcc/testsuite/gcc.dg/vect/section-anchors-vect-69.c @@ -4,7 +4,13 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 32 +#if VECTOR_BITS > 128 +#define NINTS (VECTOR_BITS / 32) +#else +#define NINTS 4 +#endif + +#define N (NINTS * 8) struct s{ int m; @@ -18,16 +24,15 @@ struct s2{ struct test1{ struct s a; /* array a.n is unaligned */ - int b; - int c; + int pad[NINTS - 2]; struct s e; /* array e.n is aligned */ }; struct test2{ - struct s2 a; /* array a.n is unaligned */ + struct s2 a; int b; int c; - struct s2 e; /* array e.n is aligned */ + struct s2 e; }; @@ -52,13 +57,13 @@ int main1 () } /* 2. aligned */ - for (i = 3; i < N-1; i++) + for (i = NINTS - 1; i < N - 1; i++) { tmp1[2].a.n[1][2][i] = 6; } /* check results: */ - for (i = 3; i < N-1; i++) + for (i = NINTS - 1; i < N - 1; i++) { if (tmp1[2].a.n[1][2][i] != 6) abort (); @@ -84,18 +89,18 @@ int main1 () } /* 4. unaligned */ - for (i = 0; i < N-4; i++) + for (i = 0; i < N - NINTS; i++) { - for (j = 0; j < N-4; j++) + for (j = 0; j < N - NINTS; j++) { tmp2[2].e.n[1][i][j] = 8; } } /* check results: */ - for (i = 0; i < N-4; i++) + for (i = 0; i < N - NINTS; i++) { - for (j = 0; j < N-4; j++) + for (j = 0; j < N - NINTS; j++) { if (tmp2[2].e.n[1][i][j] != 8) abort (); diff --git a/gcc/testsuite/gcc.dg/vect/slp-1.c b/gcc/testsuite/gcc.dg/vect/slp-1.c index aebeac84eff..26b71d65425 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-1.c +++ b/gcc/testsuite/gcc.dg/vect/slp-1.c @@ -118,5 +118,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-10.c b/gcc/testsuite/gcc.dg/vect/slp-10.c index 61c5d3c5119..da44f26601a 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-10.c +++ b/gcc/testsuite/gcc.dg/vect/slp-10.c @@ -107,7 +107,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult }} } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-13-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-13-big-array.c index 5540387b028..b553b61cc5a 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-13-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-13-big-array.c @@ -4,7 +4,6 @@ #include "tree-vect.h" #define N 64 -volatile int y = 0; int main1 () @@ -18,8 +17,7 @@ main1 () for (i = 0; i < N*8; i++) { in[i] = in2[i] = i; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } /* Induction is SLPable. */ @@ -133,7 +131,8 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { vect_interleave && vect_extract_even_odd } && { ! vect_pack_trunc } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { vect_interleave && vect_extract_even_odd } && { { ! vect_pack_trunc } && { ! vect_gather_scatter } } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_gather_scatter } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { ! vect_pack_trunc } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { { vect_interleave && vect_extract_even_odd } && vect_pack_trunc } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_pack_trunc } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-13.c b/gcc/testsuite/gcc.dg/vect/slp-13.c index e7482667e2a..57dc28bafe3 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-13.c +++ b/gcc/testsuite/gcc.dg/vect/slp-13.c @@ -125,7 +125,8 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { vect_interleave && vect_extract_even_odd } && { ! vect_pack_trunc } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { { vect_interleave && vect_extract_even_odd } && { { ! vect_pack_trunc } && { ! vect_gather_scatter } } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_gather_scatter } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { ! vect_pack_trunc } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { { vect_interleave && vect_extract_even_odd } && vect_pack_trunc } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_pack_trunc } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-15.c b/gcc/testsuite/gcc.dg/vect/slp-15.c index dbced88c98d..154347903fd 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-15.c +++ b/gcc/testsuite/gcc.dg/vect/slp-15.c @@ -112,6 +112,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target vect_int_mult } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! { vect_int_mult } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-16.c b/gcc/testsuite/gcc.dg/vect/slp-16.c index a7da9932c54..a19deb92552 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-16.c +++ b/gcc/testsuite/gcc.dg/vect/slp-16.c @@ -66,5 +66,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult xfail { vect_variable_length && vect_gather_scatter } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-17.c b/gcc/testsuite/gcc.dg/vect/slp-17.c index 8beede87299..6fa11e4c53a 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-17.c +++ b/gcc/testsuite/gcc.dg/vect/slp-17.c @@ -51,5 +51,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-19c.c b/gcc/testsuite/gcc.dg/vect/slp-19c.c index 82e87678503..cda6a096332 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-19c.c +++ b/gcc/testsuite/gcc.dg/vect/slp-19c.c @@ -3,14 +3,27 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 32) +#else #define N 16 +#endif int main1 () { unsigned int i; unsigned int out[N*8]; +#if N == 16 unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; +#else + unsigned int in[N * 8]; + for (unsigned int i = 0; i < N * 8; ++i) + { + in[i] = i & 63; + asm volatile ("" ::: "memory"); + } +#endif unsigned int ia[N*2], a0, a1, a2, a3; /* The last stmt requires interleaving of not power of 2 size - not @@ -90,5 +103,7 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_gather } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_gather } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_gather } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_gather } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-2.c b/gcc/testsuite/gcc.dg/vect/slp-2.c index 1204e06b7d2..6c889c14469 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-2.c +++ b/gcc/testsuite/gcc.dg/vect/slp-2.c @@ -140,5 +140,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-20.c b/gcc/testsuite/gcc.dg/vect/slp-20.c index 4df47381a90..dc5eab669ea 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-20.c +++ b/gcc/testsuite/gcc.dg/vect/slp-20.c @@ -110,5 +110,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-21.c b/gcc/testsuite/gcc.dg/vect/slp-21.c index 5aef87f09cd..1f8c82e8ba8 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-21.c +++ b/gcc/testsuite/gcc.dg/vect/slp-21.c @@ -201,6 +201,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided4 || vect_extract_even_odd } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided4 || vect_extract_even_odd } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-22.c b/gcc/testsuite/gcc.dg/vect/slp-22.c index 394460cbb88..e2a0002ffaf 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-22.c +++ b/gcc/testsuite/gcc.dg/vect/slp-22.c @@ -129,5 +129,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-23.c b/gcc/testsuite/gcc.dg/vect/slp-23.c index b1fe6e4272e..88708e645d6 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-23.c +++ b/gcc/testsuite/gcc.dg/vect/slp-23.c @@ -97,8 +97,7 @@ int main (void) arr[i].f = i * 5; arr[i].g = i - 3; arr[i].h = 56; - if (arr[i].a == 178) - abort(); + asm volatile ("" ::: "memory"); } main1 (arr); @@ -108,6 +107,8 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } } } } */ +/* We fail to vectorize the second loop with variable-length SVE but + fall back to 128-bit vectors, which does use SLP. */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_perm } xfail aarch64_sve } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c index e2ecbbe9b7d..abd3a878f1a 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-24-big-array.c @@ -15,7 +15,6 @@ typedef struct { unsigned char ub[N*2]; unsigned char uc[N]; -volatile int y = 0; unsigned char check_diff = 2; void @@ -69,13 +68,11 @@ int main (void) ub[i] = (i%5 == 0)?i*3:i; uc[i] = i; check_diff += (unsigned char) (ub[i] - uc[i]); - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (; i < 2*N; i++) { ub[i] = 0; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) @@ -84,8 +81,7 @@ int main (void) arr[i].b = i * 2 + 10; arr[i].c = 17; arr[i].d = i+34; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } check_vect (); diff --git a/gcc/testsuite/gcc.dg/vect/slp-24.c b/gcc/testsuite/gcc.dg/vect/slp-24.c index 29448bcd456..a45ce7de71f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-24.c +++ b/gcc/testsuite/gcc.dg/vect/slp-24.c @@ -16,8 +16,6 @@ typedef struct { unsigned char ub[N*2] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; -volatile int y = 0; - void main1 (unsigned char x, unsigned char max_result, unsigned char min_result, s *arr) { @@ -69,8 +67,7 @@ int main (void) arr[i].b = i * 2 + 10; arr[i].c = 17; arr[i].d = i+34; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } check_vect (); diff --git a/gcc/testsuite/gcc.dg/vect/slp-25.c b/gcc/testsuite/gcc.dg/vect/slp-25.c index 8ee3f1c2759..ff7eff202cb 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-25.c +++ b/gcc/testsuite/gcc.dg/vect/slp-25.c @@ -57,4 +57,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || { ! vect_natural_alignment } } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { { ! vect_unaligned_possible } || { ! vect_natural_alignment } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-28.c b/gcc/testsuite/gcc.dg/vect/slp-28.c index e697f0efe09..4211b94ad7f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-28.c +++ b/gcc/testsuite/gcc.dg/vect/slp-28.c @@ -3,19 +3,27 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 32 +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 4 / 16) +#else +#define N 32 +#endif -unsigned short in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; -unsigned short in2[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; -unsigned short in3[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; -unsigned short check[N] = {0,1,2,3,5,6,7,8,10,11,12,13,15,16,17,18,20,21,22,23,25,26,27,28,30,31,32,33,35,36,37,38}; -unsigned short check3[N] = {0,1,2,3,4,5,6,7,8,9,10,11,5,6,7,8,9,10,11,12,13,14,15,16,10,11,12,13,14,15,16,17}; +unsigned short in[N] = {}; +unsigned short in2[N] = {}; +unsigned short in3[N] = {}; int main1 () { int i; + for (i = 0; i < N; i++) + { + in[i] = in2[i] = in3[i] = i; + asm volatile ("" ::: "memory"); + } + for (i = 0; i < N/4; i++) { in[i*4] = in[i*4] + 5; @@ -43,9 +51,9 @@ main1 () } /* check results: */ - for (i = 4; i < N; i++) + for (i = 0; i < N; i++) { - if (in2[i] != check[i]) + if (in2[i] != (i % 4) + (i / 4) * 5) abort (); } @@ -61,9 +69,9 @@ main1 () } /* check results: */ - for (i = 12; i < N; i++) + for (i = 0; i < N; i++) { - if (in3[i] != check3[i]) + if (in3[i] != (i % 12) + (i / 12) * 5) abort (); } @@ -80,6 +88,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-3-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-3-big-array.c index ca6c44ed98d..4cf0e7a0ece 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-3-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-3-big-array.c @@ -6,7 +6,6 @@ #define N 96 unsigned short in[N*8]; -volatile int y = 0; int main1 () @@ -17,8 +16,7 @@ main1 () for (i = 0; i < N*8; i++) { in[i] = i&63; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/slp-3.c b/gcc/testsuite/gcc.dg/vect/slp-3.c index 20933a720d1..5e40499ff96 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-3.c +++ b/gcc/testsuite/gcc.dg/vect/slp-3.c @@ -141,6 +141,8 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { ! vect_fully_masked } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_fully_masked } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target { ! vect_fully_masked } } } }*/ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target vect_fully_masked } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-33.c b/gcc/testsuite/gcc.dg/vect/slp-33.c index ad74daf5dce..2404a5f19b4 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-33.c +++ b/gcc/testsuite/gcc.dg/vect/slp-33.c @@ -105,7 +105,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && {! {vect_int_mult}}} } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target {vect_uintfloat_cvt && vect_int_mult} xfail { vect_variable_length && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && {! {vect_int_mult}}} } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-34-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-34-big-array.c index 6f794085fd5..9e9c8207f7b 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-34-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-34-big-array.c @@ -7,7 +7,6 @@ unsigned short in[N*8]; unsigned short in2[N*8]; -volatile int y = 0; int main1 () @@ -19,8 +18,7 @@ main1 () for (i = 0; i < N*8; i++) { in[i] = in2[i] = i; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } /* SLP with unrolling by 8. */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-35.c b/gcc/testsuite/gcc.dg/vect/slp-35.c index 1e4aaeafe83..2e8f57c7067 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-35.c +++ b/gcc/testsuite/gcc.dg/vect/slp-35.c @@ -58,8 +58,7 @@ int main (void) arr[i].c = 17; arr[i].d = i+34; arr[i].e = i * 3 + 5; - if (arr[i].a == 178) - abort(); + asm volatile ("" ::: "memory"); } main1 (arr); @@ -68,5 +67,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_variable_length && vect_gather_scatter } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-37.c b/gcc/testsuite/gcc.dg/vect/slp-37.c index 9b1d275dbfe..700ffd85f91 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-37.c +++ b/gcc/testsuite/gcc.dg/vect/slp-37.c @@ -49,9 +49,7 @@ int main (void) arr1[i].a = i; arr1[i].b = i * 2; arr1[i].c = (void *)arr1; - - if (arr1[i].a == 178) - abort(); + asm volatile ("" ::: "memory"); } @@ -60,6 +58,7 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_scatter } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_scatter xfail vect_variable_length } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c index 940134e440b..98ac3f1f283 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c @@ -4,7 +4,6 @@ #include "tree-vect.h" #define N 128 -volatile int y = 0; int main1 () @@ -17,8 +16,7 @@ main1 () for (i = 0; i < N*8; i++) { in[i] = i; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/slp-42.c b/gcc/testsuite/gcc.dg/vect/slp-42.c index ea5fe167cdb..6b78246c2df 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-42.c +++ b/gcc/testsuite/gcc.dg/vect/slp-42.c @@ -15,5 +15,5 @@ void foo (int n) } } -/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { xfail vect_variable_length } } } */ /* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-6.c b/gcc/testsuite/gcc.dg/vect/slp-6.c index a7a1d0f89cc..ec85eb77236 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-6.c +++ b/gcc/testsuite/gcc.dg/vect/slp-6.c @@ -116,6 +116,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target vect_int_mult} } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { ! { vect_int_mult } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c index c25acb90199..1850f063eb4 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c @@ -52,5 +52,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c index 44ed1c835ac..a3d0670cea9 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11-big-array.c @@ -13,7 +13,6 @@ struct s }; char in[N*3]; -volatile int y = 0; __attribute__ ((noinline)) int main1 () @@ -24,8 +23,7 @@ main1 () for (i = 0; i < N; i++) { in[i] = i&127; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) @@ -57,5 +55,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c index 308153ba033..5200ed1cd94 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c @@ -49,5 +49,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c index ce4313efe20..d4c929de2ec 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c @@ -62,5 +62,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c index 28a645c7947..b1b34a89efa 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c @@ -77,5 +77,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c index faf17d6f0cd..cbfd5f538cd 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c +++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c @@ -52,5 +52,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c index ee211f2d7a2..6bd16ef43b0 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c @@ -13,7 +13,11 @@ #define M12 191 #define M22 500 +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 3 / 32 + 4) +#else #define N 16 +#endif void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) { @@ -34,7 +38,6 @@ void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) int main (int argc, const char* argv[]) { unsigned int input[N], output[N], i; - unsigned int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0}; check_vect (); @@ -45,6 +48,25 @@ int main (int argc, const char* argv[]) __asm__ volatile (""); } +#if N == 16 + unsigned int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0}; +#else + volatile unsigned int check_results[N] = {}; + + for (unsigned int i = 0; i < N / 3; i++) + { + unsigned int a = input[i * 3]; + unsigned int b = input[i * 3 + 1]; + unsigned int c = input[i * 3 + 2]; + + check_results[i * 3] = M00 * a + M01 * b + M02 * c; + check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c; + check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c; + + asm volatile ("" ::: "memory"); + } +#endif + foo (input, output); for (i = 0; i < N; i++) @@ -58,9 +80,9 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_int && {! vect_load_lanes } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ -/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-4.c b/gcc/testsuite/gcc.dg/vect/slp-perm-4.c index 60748926f63..3a4420c53e4 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-4.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-4.c @@ -34,7 +34,11 @@ #define M34 7716 #define M44 16 +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 5 / 32) +#else #define N 20 +#endif void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) { @@ -59,20 +63,43 @@ void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) int main (int argc, const char* argv[]) { unsigned int input[N], output[N], i; - unsigned int check_results[N] - = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, - 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619}; check_vect (); for (i = 0; i < N; i++) { input[i] = i%256; - if (input[i] > 200) - abort(); output[i] = 0; - __asm__ volatile (""); + asm volatile ("" ::: "memory"); + } + +#if N == 20 + unsigned int check_results[N] + = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, + 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619}; +#else + volatile unsigned int check_results[N]; + + for (i = 0; i < N / 5; i++) + { + unsigned int a = input[i * 5]; + unsigned int b = input[i * 5 + 1]; + unsigned int c = input[i * 5 + 2]; + unsigned int d = input[i * 5 + 3]; + unsigned int e = input[i * 5 + 4]; + + check_results[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e; + check_results[i * 5 + 1] = (M10 * a + M11 * b + M12 * c + + M13 * d + M14 * e); + check_results[i * 5 + 2] = (M20 * a + M21 * b + M22 * c + + M23 * d + M24 * e); + check_results[i * 5 + 3] = (M30 * a + M31 * b + M32 * c + + M33 * d + M34 * e); + check_results[i * 5 + 4] = (M40 * a + M41 * b + M42 * c + + M43 * d + M44 * e); + asm volatile (""); } +#endif foo (input, output); @@ -87,4 +114,4 @@ int main (int argc, const char* argv[]) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "gaps requires scalar epilogue loop" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { { ! vect_perm5_int } && vect_gather_scatter } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c index 806603f7f73..52939133ca8 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c @@ -18,7 +18,11 @@ #define K01 4322 #define K11 135 +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 3 / 32 + 4) +#else #define N 16 +#endif void foo (int *__restrict__ pInput, int *__restrict__ pOutput, int *__restrict__ pInput2, int *__restrict__ pOutput2) @@ -46,9 +50,7 @@ void foo (int *__restrict__ pInput, int *__restrict__ pOutput, int main (int argc, const char* argv[]) { int input[N], output[N], i; - int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0}; int input2[N], output2[N]; - int check_results2[N] = {4322, 135, 13776, 629, 23230, 1123, 32684, 1617, 42138, 2111, 0, 0, 0, 0, 0, 0}; check_vect (); @@ -63,6 +65,35 @@ int main (int argc, const char* argv[]) __asm__ volatile (""); } +#if N == 16 + int check_results[N] = { 1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, + 195035, 14934, 4175, 278417, 19422, 5435, 361799, + 0 }; + int check_results2[N] = { 4322, 135, 13776, 629, 23230, 1123, 32684, 1617, + 42138, 2111, 0, 0, 0, 0, 0, 0 }; +#else + volatile int check_results[N] = {}; + volatile int check_results2[N] = {}; + + for (int i = 0; i < N / 3; i++) + { + int a = input[i * 3]; + int b = input[i * 3 + 1]; + int c = input[i * 3 + 2]; + int d = input2[i * 2]; + int e = input2[i * 2 + 1]; + + check_results[i * 3] = M00 * a + M01 * b + M02 * c; + check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c; + check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c; + + check_results2[i * 2] = K00 * d + K01 * e; + check_results2[i * 2 + 1] = K10 * d + K11 * e; + + asm volatile ("" ::: "memory"); + } +#endif + foo (input, output, input2, output2); for (i = 0; i < N; i++) @@ -73,9 +104,9 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ -/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c index 0fb4a6b0fc9..4eb648ac71b 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c @@ -18,7 +18,11 @@ #define K01 4322 #define K11 135 +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 3 / 32 + 4) +#else #define N 16 +#endif void foo (int *__restrict__ pInput, int *__restrict__ pOutput, int *__restrict__ pInput2, int *__restrict__ pOutput2) @@ -47,9 +51,7 @@ void foo (int *__restrict__ pInput, int *__restrict__ pOutput, int main (int argc, const char* argv[]) { int input[N], output[N], i; - int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0}; int input2[N], output2[N]; - int check_results2[N] = {0, 112, 810, 336, 1620, 560, 2430, 784, 3240, 1008, 0, 0, 0, 0, 0, 0}; check_vect (); @@ -62,6 +64,35 @@ int main (int argc, const char* argv[]) __asm__ volatile (""); } +#if N == 16 + int check_results[N] = { 1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, + 195035, 14934, 4175, 278417, 19422, 5435, 361799, + 0 }; + int check_results2[N] = { 0, 112, 810, 336, 1620, 560, 2430, 784, 3240, 1008, + 0, 0, 0, 0, 0, 0 }; +#else + volatile int check_results[N] = {}; + volatile int check_results2[N] = {}; + + for (int i = 0; i < N / 3; i++) + { + int a = input[i * 3]; + int b = input[i * 3 + 1]; + int c = input[i * 3 + 2]; + int d = input2[i * 2]; + int e = input2[i * 2 + 1]; + + check_results[i * 3] = M00 * a + M01 * b + M02 * c; + check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c; + check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c; + + check_results2[i * 2] = K00 * d; + check_results2[i * 2 + 1] = K10 * e; + + asm volatile ("" ::: "memory"); + } +#endif + foo (input, output, input2, output2); for (i = 0; i < N; i++) @@ -72,8 +103,8 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */ -/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c index a2881f9a2c4..baf7f7888a3 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c @@ -18,7 +18,11 @@ #define K01 4322 #define K11 135 +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 3 / 32 + 4) +#else #define N 16 +#endif /* SLP with load permutation and loop-based vectorization. */ void foo (int *__restrict__ pInput, int *__restrict__ pOutput, @@ -45,9 +49,7 @@ void foo (int *__restrict__ pInput, int *__restrict__ pOutput, int main (int argc, const char* argv[]) { int input[N], output[N], i; - int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0}; int input2[N], output2[N]; - int check_results2[N] = {0, 405, 810, 1215, 1620, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; check_vect (); @@ -57,9 +59,32 @@ int main (int argc, const char* argv[]) input2[i] = i%256; output[i] = 0; output2[i] = 0; - if (input[i] > 200) - abort (); + asm volatile ("" ::: "memory"); + } + +#if N == 16 + int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0}; + int check_results2[N] = {0, 405, 810, 1215, 1620, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +#else + volatile int check_results[N] = {}; + volatile int check_results2[N] = {}; + + for (int i = 0; i < N / 3; i++) + { + int a = input[i * 3]; + int b = input[i * 3 + 1]; + int c = input[i * 3 + 2]; + int d = input2[i]; + + check_results[i * 3] = M00 * a + M01 * b + M02 * c; + check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c; + check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c; + + check_results2[i] = K00 * d; + + asm volatile ("" ::: "memory"); } +#endif foo (input, output, input2, output2); @@ -70,9 +95,9 @@ int main (int argc, const char* argv[]) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ -/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c index f804dcc8163..94d4455dfd9 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c @@ -3,12 +3,17 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 512 +#define N (VECTOR_BITS * 6 / 16) +#else #define N 200 +#endif void __attribute__((noinline)) foo (unsigned char *__restrict__ pInput, unsigned char *__restrict__ pOutput) { - unsigned char i, a, b, c; + unsigned char a, b, c; + unsigned int i; for (i = 0; i < N / 3; i++) { @@ -24,8 +29,9 @@ foo (unsigned char *__restrict__ pInput, unsigned char *__restrict__ pOutput) int main (int argc, const char* argv[]) { - unsigned char input[N], output[N], i; + unsigned char input[N], output[N]; unsigned char check_results[N]; + unsigned int i; check_vect (); @@ -54,8 +60,8 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_byte && { ! vect_load_lanes } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ -/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm_byte && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_byte && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c index b9b5a3b87ad..b01d493b6e7 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c @@ -3,7 +3,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 512 +#define N (VECTOR_BITS * 6 / 16) +#else #define N 200 +#endif void __attribute__((noinline)) foo (unsigned short *__restrict__ pInput, unsigned short *__restrict__ pOutput) @@ -33,8 +37,7 @@ int main (int argc, const char* argv[]) { input[i] = i; output[i] = 0; - if (input[i] > 256) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N / 3; i++) @@ -54,9 +57,9 @@ int main (int argc, const char* argv[]) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { {! vect_perm } || {! vect_sizes_16B_8B } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { { vect_perm } && { vect_sizes_16B_8B } } } } } */ -/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm_short } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { {! vect_perm } || {! vect_sizes_32B_16B } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { vect_perm } && { vect_sizes_32B_16B } } } } } */ - +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { vect_perm_short || vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_short || vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target { vect_perm_short && { ! vect_perm3_short } } } } } */ +/* { dg-final { scan-tree-dump-not "permutation requires at least three vectors" "vect" { target vect_perm3_short } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { { ! vect_perm3_short } || vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_short && { ! vect_load_lanes } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c index 511fff56b7d..9c8124c9b5f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c @@ -58,4 +58,4 @@ int main (void) /* The initialization loop in main also gets vectorized. */ /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si && vect_unpack } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_widen_sum_hi_to_si_pattern || { ! vect_unpack } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_widen_sum_hi_to_si_pattern || { ! vect_unpack } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c index c41d322445a..d58e5b0fd22 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c @@ -57,5 +57,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_min_max } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_min_max } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_int_min_max || vect_variable_length } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c index d7cc6cae8b4..43d1cee9fbe 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c @@ -55,5 +55,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_add } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_int_add || vect_variable_length } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c index d19d42cc048..5121414260b 100644 --- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c +++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c @@ -46,5 +46,9 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "Detected reduction\\." 2 "vect" } } */ +/* 2 for the first loop. */ +/* { dg-final { scan-tree-dump-times "Detected reduction\\." 3 "vect" { target { ! vect_multiple_sizes } } } } */ +/* { dg-final { scan-tree-dump "Detected reduction\\." "vect" { target vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump-times "not vectorized" 1 "vect" { target { ! vect_multiple_sizes } } } } */ +/* { dg-final { scan-tree-dump "not vectorized" "vect" { target vect_multiple_sizes } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! vect_no_int_min_max } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/tree-vect.h b/gcc/testsuite/gcc.dg/vect/tree-vect.h index ab40e7f3c2e..69c93ac8092 100644 --- a/gcc/testsuite/gcc.dg/vect/tree-vect.h +++ b/gcc/testsuite/gcc.dg/vect/tree-vect.h @@ -75,3 +75,13 @@ check_vect (void) #endif signal (SIGILL, SIG_DFL); } + +#if defined (__ARM_FEATURE_SVE) +# if __ARM_FEATURE_SVE_BITS == 0 +# define VECTOR_BITS 1024 +# else +# define VECTOR_BITS __ARM_FEATURE_SVE_BITS +# endif +#else +# define VECTOR_BITS 128 +#endif diff --git a/gcc/testsuite/gcc.dg/vect/vect-10-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-10-big-array.c index 71ec0b80dd3..f266ac8617c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-10-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-10-big-array.c @@ -8,8 +8,6 @@ extern void abort (void); short a[N]; short d[N]; -volatile int y = 0; - int foo () { int i; @@ -19,10 +17,7 @@ int foo () { b[i] = i*3; c[i] = i; - - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* Strided access pattern. */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-103.c b/gcc/testsuite/gcc.dg/vect/vect-103.c index e0fd1b61e02..4a9e1574eb0 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-103.c +++ b/gcc/testsuite/gcc.dg/vect/vect-103.c @@ -4,7 +4,13 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 9 +#if VECTOR_BITS > 256 +#define NINTS (VECTOR_BITS / 32) +#else +#define NINTS 8 +#endif + +#define N (NINTS + 1) struct extraction { @@ -14,8 +20,6 @@ struct extraction static int a[N] = {1,2,3,4,5,6,7,8,9}; static int b[N] = {17,24,7,0,2,3,4,31,82}; -static int c[N] = {9,17,24,7,0,2,3,4,31}; -volatile int foo; __attribute__ ((noinline)) int main1 (int x, int y) { @@ -25,24 +29,22 @@ int main1 (int x, int y) { for (i = 0; i < N; i++) { - p->a[i] = a[i]; - p->b[i] = b[i]; - if (foo == 135) - abort (); /* to avoid vectorization */ + p->a[i] = a[i]; + p->b[i] = b[i]; + asm volatile ("" ::: "memory"); } /* Vectorizable: distance > VF. */ for (i = 0; i < N; i++) - { - *((int *)p + x + i) = *((int *)p + x + i + 8); - } + *((int *)p + x + i) = *((int *)p + x + i + NINTS); /* check results: */ - for (i = 0; i < N; i++) - { - if (p->a[i] != c[i]) - abort(); - } + if (p->a[0] != a[N - 1]) + abort (); + for (i = 1; i < N; i++) + if (p->a[i] != b[i - 1]) + abort (); + return 0; } @@ -50,7 +52,6 @@ int main (void) { check_vect (); - foo = 0; return main1 (0, N); } diff --git a/gcc/testsuite/gcc.dg/vect/vect-104.c b/gcc/testsuite/gcc.dg/vect/vect-104.c index c7478382915..a77c98735eb 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-104.c +++ b/gcc/testsuite/gcc.dg/vect/vect-104.c @@ -16,7 +16,6 @@ struct extraction static int a[N][N] = {{1,2,3},{4,5,6},{7,8,9}}; static int b[N][N] = {{17,24,7},{0,2,3},{4,31,82}}; static int c[N][N] = {{1,2,3},{4,5,5},{5,5,5}}; -volatile int foo; __attribute__ ((noinline)) int main1 (int x) { @@ -30,8 +29,7 @@ int main1 (int x) { { p->a[i][j] = a[i][j]; p->b[i][j] = b[i][j]; - if (foo == 135) - abort (); /* to avoid vectorization */ + asm volatile ("" ::: "memory"); } } @@ -60,11 +58,10 @@ int main (void) { check_vect (); - foo = 0; return main1 (N); } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { target { ! vect_multiple_sizes } } } } */ -/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump "possible dependence between data-refs" "vect" { target vect_multiple_sizes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-105-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-105-big-array.c index 8b483e0c01d..433565bfd4d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-105-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-105-big-array.c @@ -16,8 +16,6 @@ static int a[N][N]; static int b[N][N]; static int c[N][N]; -volatile int y; - __attribute__ ((noinline)) int main1 (int x) { int i,j, off; @@ -29,8 +27,7 @@ int main1 (int x) { { a[i][j] = (i*7 + j*17)%53; b[i][j] = (i*11+ j*13)%41; - if (y) - abort (); /* to avoid vectorization. */ + asm volatile ("" ::: "memory"); } } for (i = 0; i < N; i++) @@ -38,8 +35,7 @@ int main1 (int x) { for (j = 0; j < N; j++) { c[i][j] = a[i][j]; - if (y) - abort (); /* to avoid vectorization. */ + asm volatile ("" ::: "memory"); } } for (i = 1; i < N; i++) @@ -53,8 +49,7 @@ int main1 (int x) { *(&c[0][0]+x+i+j) = *(&b[0][0] + off - N*N); else *(&c[0][0]+x+i+j) = *(&a[0][0] + off); - if (y) - abort (); /* to avoid vectorization. */ + asm volatile ("" ::: "memory"); } } @@ -64,10 +59,7 @@ int main1 (int x) { { p->a[i][j] = a[i][j]; p->b[i][j] = b[i][j]; - /* Because Y is volatile, the compiler cannot move this check out - of the loop. */ - if (y) - abort (); /* to avoid vectorization. */ + asm volatile ("" ::: "memory"); } } diff --git a/gcc/testsuite/gcc.dg/vect/vect-105.c b/gcc/testsuite/gcc.dg/vect/vect-105.c index e5483b33b94..0024457f9e2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-105.c +++ b/gcc/testsuite/gcc.dg/vect/vect-105.c @@ -16,8 +16,6 @@ static int a[N][N] = {{1,2,3,11},{4,5,6,12},{7,8,9,13},{34,45,67,83}}; static int b[N][N] = {{17,28,15,23},{0,2,3,24},{4,31,82,25},{29,31,432,256}}; static int c[N][N] = {{1,2,3,11},{4,9,13,34},{45,67,83,13},{34,45,67,83}}; -volatile int y; - __attribute__ ((noinline)) int main1 (int x) { int i,j; @@ -30,10 +28,7 @@ int main1 (int x) { { p->a[i][j] = a[i][j]; p->b[i][j] = b[i][j]; - /* Because Y is volatile, the compiler cannot move this check out - of the loop. */ - if (y) - abort (); /* to avoid vectorization */ + asm volatile ("" ::: "memory"); } } diff --git a/gcc/testsuite/gcc.dg/vect/vect-109.c b/gcc/testsuite/gcc.dg/vect/vect-109.c index cf54c5f1116..9a507105899 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-109.c +++ b/gcc/testsuite/gcc.dg/vect/vect-109.c @@ -76,5 +76,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_element_align } } } */ /* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 2 "vect" { xfail vect_element_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_element_align } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_element_align xfail { ! vect_unaligned_possible } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-112-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-112-big-array.c index 54aef699e7c..a99a590d9ac 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-112-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-112-big-array.c @@ -8,8 +8,6 @@ char cb[N]; char cc[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (void) { @@ -20,9 +18,7 @@ main1 (void) cb[i] = i + 2; cc[i] = i + 1; check_diff += (cb[i] - cc[i]); - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* Cross-iteration cycle. */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-114.c b/gcc/testsuite/gcc.dg/vect/vect-114.c index 929c8045d32..557b44110a0 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-114.c +++ b/gcc/testsuite/gcc.dg/vect/vect-114.c @@ -34,6 +34,9 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_perm } } } } } */ +/* Requires reverse for SVE, which is implemented by a later patch. + Until then we fall back to Advanced SIMD and successfully vectorize + the loop. */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_perm } xfail { aarch64_sve && vect_variable_length } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-117.c b/gcc/testsuite/gcc.dg/vect/vect-117.c index bb1aebcf03f..22f8e011872 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-117.c +++ b/gcc/testsuite/gcc.dg/vect/vect-117.c @@ -17,8 +17,6 @@ static int c[N][N] = {{ 1, 2, 3, 4, 5}, {34,38,42,46,50}, {55,60,65,70,75}}; -volatile int foo; - __attribute__ ((noinline)) int main1 (int A[N][N], int n) { @@ -43,7 +41,6 @@ int main (void) check_vect (); - foo = 0; main1 (a, N); /* check results: */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-15-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-15-big-array.c index d9457c3273a..5313eae598b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-15-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-15-big-array.c @@ -5,8 +5,6 @@ #define N 128 -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -17,8 +15,7 @@ int main1 () for (i = 0; i <N; i++) { b[i] = i*3; - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* Not vectorizable yet (reverse access and forward access). */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-2-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-2-big-array.c index 04ba58d21e3..162cb54b58d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-2-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-2-big-array.c @@ -9,8 +9,6 @@ char cb[N]; char ca[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -19,9 +17,7 @@ int main1 () for (i = 0; i < N; i++) { cb[i] = i*3; - /* To avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-23.c b/gcc/testsuite/gcc.dg/vect/vect-23.c index 45496f390e6..69e0848c8ec 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-23.c +++ b/gcc/testsuite/gcc.dg/vect/vect-23.c @@ -125,4 +125,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-24.c b/gcc/testsuite/gcc.dg/vect/vect-24.c index 0511f7b0b9c..3abf7c8453b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-24.c +++ b/gcc/testsuite/gcc.dg/vect/vect-24.c @@ -123,4 +123,4 @@ int main (void) return main1 (); } /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail { { ! aarch64*-*-* } && { ! arm-*-* } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-25.c b/gcc/testsuite/gcc.dg/vect/vect-25.c index fa1a681538a..904eea8a17b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-25.c +++ b/gcc/testsuite/gcc.dg/vect/vect-25.c @@ -51,4 +51,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-26.c b/gcc/testsuite/gcc.dg/vect/vect-26.c index c13dbd2bb55..4f0472b5d0f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-26.c +++ b/gcc/testsuite/gcc.dg/vect/vect-26.c @@ -36,5 +36,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_element_align_preferred || { ! vect_align_stack_vars } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-27.c b/gcc/testsuite/gcc.dg/vect/vect-27.c index 1a10f27c5fd..590217feee7 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-27.c +++ b/gcc/testsuite/gcc.dg/vect/vect-27.c @@ -46,5 +46,5 @@ int main (void) /* The initialization induction loop (with aligned access) is also vectorized. */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-29.c b/gcc/testsuite/gcc.dg/vect/vect-29.c index d38ad7b0f96..86ec2cc1ddf 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-29.c +++ b/gcc/testsuite/gcc.dg/vect/vect-29.c @@ -51,6 +51,6 @@ int main (void) /* The initialization induction loop (with aligned access) is also vectorized. */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" {target { vect_no_align && { ! vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-32-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-32-big-array.c index 8863b58536a..3e1403bbe96 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-32-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-32-big-array.c @@ -36,4 +36,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-32.c b/gcc/testsuite/gcc.dg/vect/vect-32.c index 6fe5099ac1a..2684cf2e0d3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-32.c +++ b/gcc/testsuite/gcc.dg/vect/vect-32.c @@ -3,8 +3,12 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS / 8) +#else #define N 16 - +#endif + __attribute__ ((noinline)) int main1 () { @@ -36,4 +40,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-33.c b/gcc/testsuite/gcc.dg/vect/vect-33.c index 9964c492a67..e215052ff77 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-33.c +++ b/gcc/testsuite/gcc.dg/vect/vect-33.c @@ -37,6 +37,6 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump "Vectorizing an unaligned access" "vect" { target { { { ! powerpc*-*-* } && vect_hw_misalign } && { { ! vect64 } || vect_multiple_sizes } } } } } */ +/* { dg-final { scan-tree-dump "Vectorizing an unaligned access" "vect" { target { { { ! powerpc*-*-* } && vect_hw_misalign } && { { ! vect64 } || vect_multiple_sizes } } xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" { target { vector_alignment_reachable && { vect64 && {! vect_multiple_sizes} } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { { {! vector_alignment_reachable} || {! vect64} } && {! vect_hw_misalign} } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-34-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-34-big-array.c index 2991f76fca5..0aa6d507a82 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-34-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-34-big-array.c @@ -10,8 +10,6 @@ struct { } s; char cb[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -20,9 +18,7 @@ int main1 () for (i = 0; i < N; i++) { cb[i] = i*3; - /* To avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) { diff --git a/gcc/testsuite/gcc.dg/vect/vect-40.c b/gcc/testsuite/gcc.dg/vect/vect-40.c index 88c2eb11ddc..c74703268f9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-40.c +++ b/gcc/testsuite/gcc.dg/vect/vect-40.c @@ -59,4 +59,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"} } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c index d7d81f2c7c7..a65b4a62276 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-42.c +++ b/gcc/testsuite/gcc.dg/vect/vect-42.c @@ -67,5 +67,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 3 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { { ! vector_alignment_reachable } && { ! vect_element_align } } } } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || { { ! vector_alignment_reachable } || vect_element_align } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_element_align } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_element_align xfail { ! { vect_unaligned_possible && vect_align_stack_vars } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || { { ! vector_alignment_reachable } || vect_element_align } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c index f4efb17c0eb..03ef2c0f671 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-44.c +++ b/gcc/testsuite/gcc.dg/vect/vect-44.c @@ -65,7 +65,7 @@ int main (void) two loads to be aligned). */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || {! vector_alignment_reachable} } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { {! vector_alignment_reachable} && {{! vect_no_align} && {! vect_hw_misalign} } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-46.c b/gcc/testsuite/gcc.dg/vect/vect-46.c index 95a63c54431..185ac1424f9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-46.c +++ b/gcc/testsuite/gcc.dg/vect/vect-46.c @@ -61,4 +61,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-48.c b/gcc/testsuite/gcc.dg/vect/vect-48.c index ed60cdd77dc..bac6ef6b8dd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-48.c +++ b/gcc/testsuite/gcc.dg/vect/vect-48.c @@ -55,6 +55,7 @@ int main (void) (The store is aligned). */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target { ! vect_align_stack_vars } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target vect_align_stack_vars xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c index cadbcbb9015..c9500ca91e5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-50.c +++ b/gcc/testsuite/gcc.dg/vect/vect-50.c @@ -61,8 +61,7 @@ int main (void) align the store will not force the two loads to be aligned). */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_hw_misalign } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || {! vector_alignment_reachable} } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 3 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { {! vector_alignment_reachable} && { {! vect_no_align } && {! vect_hw_misalign } } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c index e37e0fa64c6..0343d9a24d1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-52.c +++ b/gcc/testsuite/gcc.dg/vect/vect-52.c @@ -56,6 +56,7 @@ int main (void) (The store is aligned). */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target { ! vect_align_stack_vars } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target vect_align_stack_vars xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c index e21792df9c0..58201abe069 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-54.c +++ b/gcc/testsuite/gcc.dg/vect/vect-54.c @@ -60,5 +60,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c index 6a650f7c4ee..8060b05e781 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-56.c +++ b/gcc/testsuite/gcc.dg/vect/vect-56.c @@ -68,7 +68,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_element_align } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target { ! vect_element_align } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_element_align } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { target { { ! vect_element_align } || vect_element_align_preferred} } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_element_align && { ! vect_element_align_preferred } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c index a243ca9edf9..441af51860e 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-58.c +++ b/gcc/testsuite/gcc.dg/vect/vect-58.c @@ -59,4 +59,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-6-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-6-big-array.c index 2818b7ec1fd..c5de86b167a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-6-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-6-big-array.c @@ -12,8 +12,6 @@ float e[N] = {0}; float b[N]; float c[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -25,17 +23,13 @@ int main1 () c[i] = i; results1[i] = 0; results2[i] = 0; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i=0; i<N/2; i++) { results1[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i]; results2[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i]; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N/2; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c index 980c8f67801..3b7477c96ab 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-60.c +++ b/gcc/testsuite/gcc.dg/vect/vect-60.c @@ -69,7 +69,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_element_align } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target { ! vect_element_align } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_element_align } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { target { { ! vect_element_align } || vect_element_align_preferred } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_element_align && { ! vect_element_align_preferred } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-62.c b/gcc/testsuite/gcc.dg/vect/vect-62.c index f11301c84ad..abd3d700668 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-62.c +++ b/gcc/testsuite/gcc.dg/vect/vect-62.c @@ -65,4 +65,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-67.c b/gcc/testsuite/gcc.dg/vect/vect-67.c index 0af0b114523..12183a233c2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-67.c +++ b/gcc/testsuite/gcc.dg/vect/vect-67.c @@ -3,13 +3,19 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 16 +#if VECTOR_BITS > 256 +#define NINTS (VECTOR_BITS / 32) +#else +#define NINTS 8 +#endif + +#define N (NINTS * 2) __attribute__ ((noinline)) int main1 (int a, int b) { int i, j; - int ia[N][4][N+8]; + int ia[N][4][N + NINTS]; /* Multidimensional array. Aligned. The "inner" dimensions are invariant in the inner loop. Store. @@ -18,7 +24,7 @@ int main1 (int a, int b) { for (j = 0; j < N; j++) { - ia[i][1][j+8] = (a == b); + ia[i][1][j + NINTS] = (a == b); } } @@ -27,7 +33,7 @@ int main1 (int a, int b) { for (j = 0; j < N; j++) { - if (ia[i][1][j+8] != (a == b)) + if (ia[i][1][j + NINTS] != (a == b)) abort(); } } @@ -43,4 +49,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-70.c b/gcc/testsuite/gcc.dg/vect/vect-70.c index a110f9c34e8..793dbfb7481 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-70.c +++ b/gcc/testsuite/gcc.dg/vect/vect-70.c @@ -4,11 +4,24 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 24 +#if VECTOR_BITS > 128 +#define NINTS (VECTOR_BITS / 32) +#else +#define NINTS 4 +#endif + +#define N (NINTS * 6) + +/* Keep execution time down. */ +#if N <= 24 +#define OUTERN N +#else +#define OUTERN NINTS +#endif struct s{ int m; - int n[N/6][N/6][N]; + int n[4][4][N]; }; struct test1{ @@ -18,36 +31,43 @@ struct test1{ struct s e[N]; /* array e.n is aligned */ }; +/* Avoid big local temporaries. */ +#if NINTS > 8 +struct test1 tmp1; +#endif + __attribute__ ((noinline)) int main1 () { int i,j; +#if NINTS <= 8 struct test1 tmp1; +#endif - for (i = 0; i < N; i++) - for (j = 3; j < N-3; j++) + for (i = 0; i < OUTERN; i++) + for (j = NINTS - 1; j < N - NINTS + 1; j++) { tmp1.e[i].n[1][2][j] = 8; } /* check results: */ - for (i = 0; i < N; i++) - for (j = 3; j < N-3; j++) + for (i = 0; i < OUTERN; i++) + for (j = NINTS - 1; j < N - NINTS + 1; j++) { if (tmp1.e[i].n[1][2][j] != 8) abort (); } /* not consecutive, will use strided stores */ - for (i = 0; i < N; i++) - for (j = 3; j < N-3; j++) + for (i = 0; i < OUTERN; i++) + for (j = NINTS - 1; j < N - NINTS + 1; j++) { tmp1.e[j].n[1][2][j] = 8; } /* check results: */ - for (i = 0; i < N; i++) - for (j = 3; j < N-3; j++) + for (i = 0; i < OUTERN; i++) + for (j = NINTS - 1; j < N - NINTS + 1; j++) { if (tmp1.e[j].n[1][2][j] != 8) abort (); diff --git a/gcc/testsuite/gcc.dg/vect/vect-72.c b/gcc/testsuite/gcc.dg/vect/vect-72.c index 9c9be8eff13..472d8d57549 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-72.c +++ b/gcc/testsuite/gcc.dg/vect/vect-72.c @@ -48,5 +48,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-73-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-73-big-array.c index f5cc52e90af..1c9d1fdaf9a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-73-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-73-big-array.c @@ -11,8 +11,6 @@ int ib[N]; #define ia (ic+N) -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -21,8 +19,7 @@ int main1 () for (i = 0; i < N; i++) { ib[i] = i*3; - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-74-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-74-big-array.c index 44f22e56b83..ba1ae63bd57 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-74-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-74-big-array.c @@ -13,8 +13,6 @@ float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float b[N+4] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0}; float c[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 7.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5}; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (float *__restrict__ pa, float * __restrict__ pb, float * __restrict__ pc) { @@ -25,14 +23,12 @@ main1 (float *__restrict__ pa, float * __restrict__ pb, float * __restrict__ pc { b[i] = i; c[i] = 0.5 + i; - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (; i < N+4; i++) { b[i] = i; - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c index 8844e15f268..42b2b8d91aa 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c @@ -12,8 +12,6 @@ int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17}; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (int *ib) { @@ -23,8 +21,7 @@ int main1 (int *ib) for (i = OFF; i < N+OFF; i++) { ib[i] = ib[i%OFF]*(i/OFF); - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) { @@ -53,4 +50,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target { ! vect_align_stack_vars } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_align_stack_vars xfail { ! vect_unaligned_possible } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-75.c b/gcc/testsuite/gcc.dg/vect/vect-75.c index e8c77ea98f4..2cdd7032242 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-75.c +++ b/gcc/testsuite/gcc.dg/vect/vect-75.c @@ -3,8 +3,13 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 32) +#define OFF (VECTOR_BITS / 32) +#else #define N 8 #define OFF 8 +#endif /* Check handling of accesses for which the "initial condition" - the expression that represents the first location accessed - is @@ -45,4 +50,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target { ! vect_align_stack_vars } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_align_stack_vars xfail { ! vect_unaligned_possible } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-76-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-76-big-array.c index c2a30accac5..5825cfc4464 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-76-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-76-big-array.c @@ -13,8 +13,6 @@ int ib[N+OFF] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0, 1, 3, 5, 7, 11, 13, 17}; int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17}; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (int *pib) { @@ -24,8 +22,7 @@ int main1 (int *pib) { ib[i] = ib[i%8]*(i/8); ic[i] = ic[i%8]*(i/8); - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = OFF; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c index 535fa16a234..56ee797d10b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c +++ b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c @@ -3,8 +3,13 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 32) +#define OFF (VECTOR_BITS / 32) +#else #define N 8 #define OFF 8 +#endif /* Check handling of accesses for which the "initial condition" - the expression that represents the first location accessed - is @@ -49,7 +54,8 @@ int main (void) both for the load and the store. */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target { ! vect_align_stack_vars } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_align_stack_vars xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { {! vect_no_align} && { unaligned_stack && vector_alignment_reachable } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { { {! unaligned_stack} && { vect_no_align && { ! vect_hw_misalign } } } || {unaligned_stack && { {! vector_alignment_reachable} && {! vect_no_align } } } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target { { unaligned_stack && { vector_alignment_reachable && vect_no_align } } || {unaligned_stack && { {! vector_alignment_reachable} && vect_no_align } } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-global.c b/gcc/testsuite/gcc.dg/vect/vect-77-global.c index fa06ad01304..f0b73505d68 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-77-global.c +++ b/gcc/testsuite/gcc.dg/vect/vect-77-global.c @@ -48,6 +48,6 @@ int main (void) /* Requires versioning for aliasing. */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c index 1386a9da72f..c3ef8a36591 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c +++ b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c @@ -3,8 +3,13 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 32) +#define OFF (VECTOR_BITS / 32) +#else #define N 8 #define OFF 8 +#endif /* Check handling of accesses for which the "initial condition" - the expression that represents the first location accessed - is @@ -50,7 +55,8 @@ int main (void) both for the load and the store. */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_align_stack_vars xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target { ! vect_align_stack_vars } xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { {! vect_no_align} && { unaligned_stack && vector_alignment_reachable } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { { {! unaligned_stack} && { vect_no_align && { ! vect_hw_misalign } } } || {unaligned_stack && { {! vector_alignment_reachable} && { ! vect_no_align } } } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 2 "vect" { target { { unaligned_stack && { vector_alignment_reachable && vect_no_align } } || {unaligned_stack && { {! vector_alignment_reachable} && vect_no_align } } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-global.c b/gcc/testsuite/gcc.dg/vect/vect-78-global.c index 2a28580c98e..241e7fa94b5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-78-global.c +++ b/gcc/testsuite/gcc.dg/vect/vect-78-global.c @@ -48,6 +48,6 @@ int main (void) (The store is aligned). */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-80-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-80-big-array.c index 7ab9ada6a03..0baf4d2859b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-80-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-80-big-array.c @@ -13,8 +13,6 @@ float fc[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); the expression that represents the first location accessed - is more involved than just an ssa_name. */ -volatile int y = 0; - __attribute__ ((noinline)) int main1 (float * __restrict__ pa, float * __restrict__ pb, float *__restrict__ pc) { @@ -24,14 +22,12 @@ main1 (float * __restrict__ pa, float * __restrict__ pb, float *__restrict__ pc) { fb[i] = i; fc[i] = 0.5+i; - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (; i < N+4; i++) { fb[i] = i; - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c index 42355ff7281..decfbee318a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c @@ -45,5 +45,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_element_align_preferred || { ! vect_align_stack_vars } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-89.c b/gcc/testsuite/gcc.dg/vect/vect-89.c index f634751c89d..051698eada2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-89.c +++ b/gcc/testsuite/gcc.dg/vect/vect-89.c @@ -3,7 +3,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 256 +#define N (VECTOR_BITS * 2 / 32) +#else #define N 16 +#endif struct tmp_struct { @@ -45,5 +49,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" { xfail { ! vect_align_stack_vars } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_element_align_preferred || { ! vect_align_stack_vars } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-91.c b/gcc/testsuite/gcc.dg/vect/vect-91.c index 979b8e93aef..9430da3290a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-91.c +++ b/gcc/testsuite/gcc.dg/vect/vect-91.c @@ -7,7 +7,14 @@ #define N 256 -extern int a[N+20]; +/* Pick a value greater than the vector length. */ +#if VECTOR_BITS > 128 +#define OFF (VECTOR_BITS * 5 / 32) +#else +#define OFF 20 +#endif + +extern int a[N + OFF]; /* The alignment of 'pa' is unknown. Yet we do know that both the read access and write access have @@ -52,7 +59,7 @@ main3 () for (i = 0; i < N; i++) { - a[i] = a[i+20]; + a[i] = a[i + OFF]; } return 0; diff --git a/gcc/testsuite/gcc.dg/vect/vect-92.c b/gcc/testsuite/gcc.dg/vect/vect-92.c index 19283d61517..b9a1ce23d02 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-92.c +++ b/gcc/testsuite/gcc.dg/vect/vect-92.c @@ -17,12 +17,18 @@ float pc[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0,1,2,3,4,5, can use this information (generate prolog and epilog loops with known number of iterations, and only if needed). */ +#if VECTOR_BITS > 128 +#define NITER (VECTOR_BITS * 3 / 32) +#else +#define NITER 12 +#endif + __attribute__ ((noinline)) int main1 () { int i; - for (i = 0; i < 10; i++) + for (i = 0; i < NITER - 2; i++) { pa[i+1] = pb[i+1] * pc[i+1]; } @@ -42,7 +48,7 @@ main2 () { int i; - for (i = 0; i < 12; i++) + for (i = 0; i < NITER; i++) { pa[i+1] = pb[i+1] * pc[i+1]; } @@ -92,4 +98,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-93.c b/gcc/testsuite/gcc.dg/vect/vect-93.c index dfa4d42b8b2..397c2ed05aa 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-93.c +++ b/gcc/testsuite/gcc.dg/vect/vect-93.c @@ -4,33 +4,38 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 3001 +#define N1 3001 +#if VECTOR_BITS > 256 +#define N2 (VECTOR_BITS / 32 + 2) +#else +#define N2 10 +#endif __attribute__ ((noinline)) int main1 (float *pa) { int i; - for (i = 0; i < 3001; i++) + for (i = 0; i < N1; i++) { pa[i] = 2.0; } /* check results: */ - for (i = 0; i < 3001; i++) + for (i = 0; i < N1; i++) { if (pa[i] != 2.0) abort (); } - for (i = 1; i <= 10; i++) + for (i = 1; i <= N2; i++) { pa[i] = 3.0; } /* check results: */ - for (i = 1; i <= 10; i++) + for (i = 1; i <= N2; i++) { if (pa[i] != 3.0) abort (); @@ -42,13 +47,14 @@ main1 (float *pa) int main (void) { int i; - float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); - float b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + float a[N1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + float b[N1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); check_vect (); /* from bzip2: */ - for (i=0; i<N; i++) b[i] = i; + for (i = 0; i < N1; i++) + b[i] = i; a[0] = 0; for (i = 1; i <= 256; i++) a[i] = b[i-1]; diff --git a/gcc/testsuite/gcc.dg/vect/vect-95.c b/gcc/testsuite/gcc.dg/vect/vect-95.c index c27c34bcd86..60f2cf4ad8c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-95.c +++ b/gcc/testsuite/gcc.dg/vect/vect-95.c @@ -56,7 +56,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail {vect_element_align} } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { vect_element_align && { ! aarch64*-*-* } } } } } */ /* For targets that support unaligned loads we version for the two unaligned stores and generate misaligned accesses for the loads. For targets that diff --git a/gcc/testsuite/gcc.dg/vect/vect-96.c b/gcc/testsuite/gcc.dg/vect/vect-96.c index 03289543add..0cb935b9f16 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-96.c +++ b/gcc/testsuite/gcc.dg/vect/vect-96.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 256 +#define N (VECTOR_BITS * 2 / 32) +#else #define N 16 +#endif struct tmp { @@ -44,6 +48,7 @@ int main (void) For targets that don't support unaligned loads, version for the store. */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { {! vect_no_align} && vector_alignment_reachable } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { target { { {! vect_no_align} && vector_alignment_reachable } && { ! vect_align_stack_vars } } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { {! vect_no_align} && vector_alignment_reachable } && vect_align_stack_vars } xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align } || { { ! vector_alignment_reachable} || vect_element_align } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { { vect_no_align && { ! vect_hw_misalign } } || { {! vector_alignment_reachable} && {! vect_element_align} } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-97-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-97-big-array.c index 32100860334..977a9d57ed4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-97-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-97-big-array.c @@ -8,8 +8,6 @@ char x[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); char cb[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -21,8 +19,7 @@ int main1 () for (i = 0; i < N; i++) { cb[i] = i*3; - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* Check that datarefs analysis can determine that the access via pointer diff --git a/gcc/testsuite/gcc.dg/vect/vect-98-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-98-big-array.c index 17d11ba24ca..61b749d4669 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-98-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-98-big-array.c @@ -9,8 +9,6 @@ a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11] + \ a[12]*b[12] + a[13]*b[13] + a[14]*b[14] + a[15]*b[15]) -volatile int y = 0; - __attribute__ ((noinline)) int main1 (int ia[][N]) { @@ -41,9 +39,7 @@ int main (void) for (j = 0; j < N; j++) { ia[i][j] = i + j + 1; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } check_vect (); diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c new file mode 100644 index 00000000000..d4eea873ccb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ + +#define N 87 +#define M 6 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 5 / 2) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *a, int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + a[i * step + 0] = a[i * step + 0] + 1; \ + a[i * step + 1] = a[i * step + 1] + 2; \ + a[i * step + 2] = a[i * step + 2] + 4; \ + a[i * step + 3] = a[i * step + 3] + 8; \ + } \ + } \ + void __attribute__((noinline, noclone)) \ + ref_##TYPE (TYPE *a, int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + a[i * step + 0] = a[i * step + 0] + 1; \ + a[i * step + 1] = a[i * step + 1] + 2; \ + a[i * step + 2] = a[i * step + 2] + 4; \ + a[i * step + 3] = a[i * step + 3] + 8; \ + asm volatile (""); \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = -M; j <= M; ++j) \ + { \ + TYPE a[N * M], b[N * M]; \ + for (int i = 0; i < N * M; ++i) \ + a[i] = b[i] = TEST_VALUE (i); \ + int offset = (j < 0 ? N * M - 4 : 0); \ + test_##TYPE (a + offset, j); \ + ref_##TYPE (b + offset, j); \ + if (__builtin_memcmp (a, b, sizeof (a)) != 0) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c new file mode 100644 index 00000000000..601e17fd1dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ + +#define N 87 +#define M 6 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE1(I) ((I) * 5 / 2) +#define TEST_VALUE2(I) ((I) * 11 / 5) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + } \ + } \ + \ + void __attribute__((noinline, noclone)) \ + ref_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + asm volatile (""); \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = -M; j <= M; ++j) \ + { \ + TYPE a1[N * M], a2[N * M], b1[N], b2[N]; \ + for (int i = 0; i < N * M; ++i) \ + a1[i] = a2[i] = TEST_VALUE1 (i); \ + for (int i = 0; i < N; ++i) \ + b1[i] = b2[i] = TEST_VALUE2 (i); \ + int offset = (j < 0 ? N * M - 4 : 0); \ + test_##TYPE (a1 + offset, b1, j); \ + ref_##TYPE (a2 + offset, b2, j); \ + if (__builtin_memcmp (a1, a2, sizeof (a1)) != 0) \ + __builtin_abort (); \ + if (__builtin_memcmp (b1, b2, sizeof (b1)) != 0) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-2, 2\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-3, 3\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-4, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]*\) >= 4} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-4, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-6, 6\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-8, 8\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 2[)]* >= 8} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-8, 8\)} "vect" { target { vect_int || vect_float } } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-12, 12\)} "vect" { target { vect_int || vect_float } } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-16, 16\)} "vect" { target { vect_int || vect_float } } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 4[)]* >= 16} "vect" { target { vect_int || vect_float } } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-16, 16\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-24, 24\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-32, 32\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c new file mode 100644 index 00000000000..a44c9bb5258 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ + +#define N 87 +#define M 7 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE1(I) ((I) * 5 / 2) +#define TEST_VALUE2(I) ((I) * 11 / 5) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + step = step & M; \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + } \ + } \ + \ + void __attribute__((noinline, noclone)) \ + ref_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + for (unsigned short i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + asm volatile (""); \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = 0; j <= M; ++j) \ + { \ + TYPE a1[N * M], a2[N * M], b1[N], b2[N]; \ + for (int i = 0; i < N * M; ++i) \ + a1[i] = a2[i] = TEST_VALUE1 (i); \ + for (int i = 0; i < N; ++i) \ + b1[i] = b2[i] = TEST_VALUE2 (i); \ + test_##TYPE (a1, b1, j); \ + ref_##TYPE (a2, b2, j); \ + if (__builtin_memcmp (a1, a2, sizeof (a1)) != 0) \ + __builtin_abort (); \ + if (__builtin_memcmp (b1, b2, sizeof (b1)) != 0) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 2\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 3\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]*\) >= 4} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 6\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 8\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 2[)]* >= 8} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 8\)} "vect" { target { vect_int || vect_float } }} } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 12\)} "vect" { target { vect_int || vect_float } }} } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 16\)} "vect" { target { vect_int || vect_float } }} } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 4[)]* >= 16} "vect" { target { vect_int || vect_float } }} } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 16\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 24\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 32\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-5.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-5.c index bfa946b9ad2..e17c7150a06 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-5.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-5.c @@ -15,5 +15,5 @@ f1 (struct s *a, struct s *b) } /* { dg-final { scan-tree-dump-times "consider run-time aliasing" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "improved number of alias checks from 1 to 0" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "improved number of alias checks from 1 to 0" 1 "vect" { xfail vect_variable_length } } } */ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c new file mode 100644 index 00000000000..5aeaf2173dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ + +#define N 200 +#define DIST 32 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 5 / 2) + +#define ADD_TEST(TYPE) \ + TYPE a_##TYPE[N * 2]; \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (int x, int y) \ + { \ + for (int i = 0; i < N; ++i) \ + a_##TYPE[i + x] += a_##TYPE[i + y]; \ + } + +#define DO_TEST(TYPE) \ + for (int i = 0; i < DIST * 2; ++i) \ + { \ + for (int j = 0; j < N + DIST * 2; ++j) \ + a_##TYPE[j] = TEST_VALUE (j); \ + test_##TYPE (i, DIST); \ + for (int j = 0; j < N + DIST * 2; ++j) \ + { \ + TYPE expected; \ + if (j < i || j >= i + N) \ + expected = TEST_VALUE (j); \ + else if (i <= DIST) \ + expected = ((TYPE) TEST_VALUE (j) \ + + (TYPE) TEST_VALUE (j - i + DIST)); \ + else \ + expected = ((TYPE) TEST_VALUE (j) \ + + a_##TYPE[j - i + DIST]); \ + if (expected != a_##TYPE[j]) \ + __builtin_abort (); \ + } \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c new file mode 100644 index 00000000000..9bc38af3692 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#define N 200 +#define M 4 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 5 / 2) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *a, TYPE *b) \ + { \ + for (int i = 0; i < N; i += 2) \ + { \ + a[i + 0] = b[i + 0] + 2; \ + a[i + 1] = b[i + 1] + 3; \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = 1; j < M; ++j) \ + { \ + TYPE a[N + M]; \ + for (int i = 0; i < N + M; ++i) \ + a[i] = TEST_VALUE (i); \ + test_##TYPE (a + j, a); \ + for (int i = 0; i < N; i += 2) \ + if (a[i + j] != (TYPE) (a[i] + 2) \ + || a[i + j + 1] != (TYPE) (a[i + 1] + 3)) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-all-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-all-big-array.c index 4826cdb93e8..6eb9533a8bb 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-all-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-all-big-array.c @@ -78,8 +78,6 @@ char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; char ca[N]; short sa[N]; -volatile int y = 0; - /* All of the loops below are currently vectorizable, except initialization ones. */ @@ -101,8 +99,7 @@ main1 () fmul_results[i] = b[i] * c[i]; fresults1[i] = 0; fresults2[i] = 0; - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* Test 1: copy chars. */ @@ -142,15 +139,13 @@ main1 () { fresults1[i] = a[i]; fresults2[i] = e[i]; - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N/2; i++) { fresults1[i] = b[i+N/2] * c[i+N/2] - b[i] * c[i]; fresults2[i+N/2] = b[i] * c[i+N/2] + b[i+N/2] * c[i]; - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* Test 4: access with offset. */ for (i = 0; i < N/2; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c index ce7c6b6d862..3c98b07e425 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c @@ -4,8 +4,6 @@ #define N 128 -volatile int y = 0; - static inline void vfoo16 (unsigned short int* a) { @@ -27,8 +25,7 @@ main (void) { arr[i] = i; expect[i] = __builtin_bswap16 (i); - if (y) /* Avoid vectorisation. */ - abort (); + asm volatile ("" ::: "memory"); } vfoo16 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c index 7f3a915ee97..88d88b5f034 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c @@ -4,8 +4,6 @@ #define N 128 -volatile int y = 0; - static inline void vfoo32 (unsigned int* a) { @@ -27,8 +25,7 @@ main (void) { arr[i] = i; expect[i] = __builtin_bswap32 (i); - if (y) /* Avoid vectorisation. */ - abort (); + asm volatile ("" ::: "memory"); } vfoo32 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c index b9e421d1de4..fd15d713c5d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c @@ -4,8 +4,6 @@ #define N 128 -volatile int y = 0; - static inline void vfoo64 (unsigned long long* a) { @@ -27,8 +25,7 @@ main (void) { arr[i] = i; expect[i] = __builtin_bswap64 (i); - if (y) /* Avoid vectorisation. */ - abort (); + asm volatile ("" ::: "memory"); } vfoo64 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c b/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c index 2b010132984..f1fb8d99b0f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c @@ -38,7 +38,7 @@ foo () } } - /* Not vectorizable. */ + /* Only vectorizable with masked stores. */ for (i = 0; i < N; i++) { c = in1[i].b; @@ -82,4 +82,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || { ! vect_strided2 } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_masked_store } xfail { { vect_no_align && { ! vect_hw_misalign } } || { ! vect_strided2 } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect_masked_store } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-depend-1.c b/gcc/testsuite/gcc.dg/vect/vect-depend-1.c new file mode 100644 index 00000000000..d155f3ede05 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-depend-1.c @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ + +#define N 100 + +void __attribute__ ((noinline, noclone)) +foo (int *a, int *b, int *c) +{ + for (int i = 0; i < N; ++i) + { + a[i * 2] = i; + a[i * 2 + 1] = i + 100; + b[i * 2] += 1; + b[i * 2 + 1] += 2; + c[i] = a[i * 2] + a[i * 2 + 1]; + } +} + +int +main (void) +{ + int a[N * 2], b[N * 2], c[N]; + + for (int i = 0; i < N; ++i) + { + a[i * 2] = 44; + a[i * 2 + 1] = 66; + b[i * 2] = i * 5; + b[i * 2 + 1] = i * 7; + c[i] = 77; + asm volatile (""); + } + foo (a, b, c); + for (int i = 0; i < N; ++i) + { + if (a[i * 2] != i + || a[i * 2 + 1] != i + 100 + || b[i * 2] != i * 5 + 1 + || b[i * 2 + 1] != i * 7 + 2 + || c[i] != i * 2 + 100) + __builtin_abort (); + asm volatile (""); + } + + for (int i = 0; i < N; ++i) + { + a[i * 2] = 44; + a[i * 2 + 1] = 66; + asm volatile (""); + } + foo (a, a, c); + for (int i = 0; i < N; ++i) + { + if (a[i * 2] != i + 1 + || a[i * 2 + 1] != i + 102 + || c[i] != i * 2 + 103) + __builtin_abort (); + asm volatile (""); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6-big-array.c index ce134b08a40..1d9dcdab5e9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6-big-array.c @@ -9,8 +9,6 @@ int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); int out[K]; int check_result[K]; -volatile int y = 0; - __attribute__ ((noinline)) void foo () { @@ -24,9 +22,7 @@ foo () for (i = 0; i < K; i++) { sum *= in[i+k][j]; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } check_result[k] = sum; } diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c index 8a20e721061..62c2b83b097 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c @@ -69,5 +69,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-1.c b/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-1.c index e5dba82d7fa..ee34eea0c3c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-1.c @@ -37,5 +37,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" { target aarch64*-*-* xfail aarch64_sve } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-2.c b/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-2.c index c5beabaa974..fbd6c9065f3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-2.c @@ -36,5 +36,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" { target aarch64*-*-* xfail aarch64_sve } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c index 1afdb463d50..378a5fe642a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c @@ -4,7 +4,14 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 32 +#if VECTOR_BITS > 128 +#define NSHORTS (VECTOR_BITS / 16) +#else +#define NSHORTS 8 +#endif + +#define NINTS (NSHORTS / 2) +#define N (NSHORTS * 4) short sa[N]; short sb[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, @@ -27,14 +34,14 @@ __attribute__ ((noinline)) int main1 (int n) copmutations. Vectorizable. */ for (i = 0; i < n; i++) { - sa[i+7] = sb[i]; - ia[i+3] = ib[i+1]; + sa[i + NSHORTS - 1] = sb[i]; + ia[i + NINTS - 1] = ib[i + 1]; } /* check results: */ for (i = 0; i < n; i++) { - if (sa[i+7] != sb[i] || ia[i+3] != ib[i+1]) + if (sa[i + NSHORTS - 1] != sb[i] || ia[i + NINTS - 1] != ib[i + 1]) abort (); } @@ -57,14 +64,14 @@ __attribute__ ((noinline)) int main2 (int n) copmutations. */ for (i = 0; i < n; i++) { - ia[i+3] = ib[i]; - sa[i+3] = sb[i+1]; + ia[i + NINTS - 1] = ib[i]; + sa[i + NINTS - 1] = sb[i + 1]; } /* check results: */ for (i = 0; i < n; i++) { - if (sa[i+3] != sb[i+1] || ia[i+3] != ib[i]) + if (sa[i + NINTS - 1] != sb[i + 1] || ia[i + NINTS - 1] != ib[i]) abort (); } @@ -75,13 +82,13 @@ int main (void) { check_vect (); - main1 (N-7); - main2 (N-3); + main1 (N - NSHORTS + 1); + main2 (N - NINTS + 1); return 0; } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail {{ vect_no_align && { ! vect_hw_misalign } } || {vect_sizes_32B_16B }}} } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { { ! vect_unaligned_possible } || vect_sizes_32B_16B } } } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_no_align && { { ! vect_hw_misalign } && vect_sizes_32B_16B } } }} } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c index 9e558b81b90..18bf5e80917 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 8) +#else #define N 32 +#endif int ib[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; @@ -55,5 +59,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 3 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" {xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { xfail { ! { vect_unaligned_possible && vect_align_stack_vars } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c index bd02e5aeee9..43887865bf4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c @@ -95,6 +95,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { target { vect_element_align} } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align || vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail { vect_no_align || vect_element_align } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_element_align } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { target { ! vect_element_align } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_element_align } xfail { ! vect_unaligned_possible } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c index 930dc4c0c3d..b47a93ab326 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 8) +#else #define N 32 +#endif unsigned int ic[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; @@ -62,5 +66,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { sparc*-*-* && ilp32 } }} } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 6 "vect" { target { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 6 "vect" {xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 6 "vect" { xfail { ! { vect_unaligned_possible && vect_align_stack_vars } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c index 4e13702621f..229ce987db5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c @@ -3,18 +3,19 @@ #include "tree-vect.h" int ii[32]; -char cc[66] __attribute__((aligned(1))) = +struct { char a; char b[66]; } cc = { 0, { 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, - 30, 0, 31, 0 }; + 30, 0, 31, 0 } +}; void __attribute__((noinline,noclone)) foo (int s) { int i; for (i = 0; i < s; i++) - ii[i] = (int) cc[i*2]; + ii[i] = (int) cc.b[i*2]; } int main (int argc, const char **argv) diff --git a/gcc/testsuite/gcc.dg/vect/vect-ooo-group-1.c b/gcc/testsuite/gcc.dg/vect/vect-ooo-group-1.c new file mode 100644 index 00000000000..416198354ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-ooo-group-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ + +void +f (int *restrict a, int *restrict b, int *restrict c) +{ + for (int i = 0; i < 100; ++i) + if (c[i]) + { + a[i * 2] = b[i * 5 + 2]; + a[i * 2 + 1] = b[i * 5]; + } +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-3a-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-outer-3a-big-array.c index b84f5afa0e2..fd841b182e3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-3a-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-3a-big-array.c @@ -49,4 +49,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "step doesn't divide the vector alignment" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "step doesn't divide the vector alignment" 1 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c b/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c index d3ba837c95d..d26440d1a64 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c @@ -49,4 +49,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "step doesn't divide the vector alignment" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "step doesn't divide the vector alignment" 1 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c index 01ef4d2b68f..3140829c73d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 8 / 16) +#else #define N 64 +#endif /* Modified rgb to rgb conversion from FFmpeg. */ __attribute__ ((noinline)) void diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c index 92404bf74ac..e419f203eca 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c @@ -4,7 +4,7 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 128 +#define N VECTOR_BITS /* Modified rgb to rgb conversion from FFmpeg. */ __attribute__ ((noinline)) void @@ -32,7 +32,9 @@ foo (unsigned char *src, unsigned char *dst) const int g = *s++; const int r = *s++; const int a = *s++; - if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9))) + unsigned short expected + = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)); + if (*d != expected) abort (); d++; } diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c index cc75646514d..9dd1ea553ea 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c @@ -4,7 +4,11 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 8 / 16) +#else #define N 64 +#endif /* Modified rgb to rgb conversion from FFmpeg. */ __attribute__ ((noinline)) int diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c index 0a4e732a08b..fae99ab0b08 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c @@ -48,5 +48,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } xfail { ! vect_unaligned_possible } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_element_align_preferred } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c index 8baee0d5657..d5c0cf10ce1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c +++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c @@ -4,12 +4,24 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define NINTS (VECTOR_BITS / 32) +#define EXTRA (NINTS * 2) +#else +#define NINTS 4 +#define EXTRA 10 +#endif + #define N 128 -#define RES 21640 -int ib[N+10]; -int ia[N+10]; -int ic[N+10]; +#define RES_A (N * N / 4) +#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1)) +#define RES_C (N * (N + 1) / 2 + (N + 1)) +#define RES (RES_A + RES_B + RES_C) + +int ib[N + EXTRA]; +int ia[N + EXTRA]; +int ic[N + EXTRA]; __attribute__ ((noinline)) int main1 () @@ -20,8 +32,8 @@ int main1 () for (i = 0; i <= N; i++) { suma += ia[i]; - sumb += ib[i+5]; - sumc += ic[i+1]; + sumb += ib[i + NINTS + 1]; + sumc += ic[i + 1]; } /* check results: */ @@ -37,7 +49,7 @@ int main (void) check_vect (); - for (i = 0; i < N+10; i++) + for (i = 0; i < N + EXTRA; i++) { asm volatile ("" : "+r" (i)); ib[i] = i; @@ -49,5 +61,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || {vect_sizes_32B_16B } } } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || {vect_sizes_32B_16B } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { { ! vect_unaligned_possible } || vect_sizes_32B_16B } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { ! vect_unaligned_possible } || vect_sizes_32B_16B } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c index 937062ed6f2..88f9f0ddcba 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c @@ -46,5 +46,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { ! vect_unaligned_possible } } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-1char-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-1char-big-array.c index 660e0fdf5fb..e762f4890cc 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-1char-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-1char-big-array.c @@ -9,8 +9,6 @@ unsigned char ub[N]; unsigned char uc[N]; unsigned char diff; -volatile int y = 0; - __attribute__ ((noinline)) void main1 (unsigned char x, unsigned char max_result, unsigned char min_result) { @@ -33,9 +31,7 @@ main1 (unsigned char x, unsigned char max_result, unsigned char min_result) if (uc[i] < min_result) min_result = uc[i]; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) { udiff += (unsigned char) (ub[i] - uc[i]); diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c index 8692f42ae34..e246ae7f3c6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c @@ -4,7 +4,6 @@ #include "tree-vect.h" #define N 256 -volatile int y = 0; __attribute__ ((noinline)) void main1 (signed char x, signed char max_result, signed char min_result) @@ -30,9 +29,7 @@ void main1 (signed char x, signed char max_result, signed char min_result) max_result = c[i]; if (c[i] < min_result) min_result = c[i]; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) { diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c index 1bbf10c7607..0f9fc20567e 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c @@ -50,4 +50,5 @@ int main (void) /* need -ffast-math to vectorizer these loops. */ /* ARM NEON passes -ffast-math to these tests, so expect this to fail. */ -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_ieee_add_reduc || arm_neon_ok } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_ieee_add_reduc } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c index af448920aa4..ead9548c4a6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c @@ -1,31 +1,44 @@ -/* { dg-require-effective-target whole_vector_shift } */ +/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */ /* Write a reduction loop to be reduced using vector shifts. */ #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS / 8) +#else +#define N 16 +#endif + extern void abort(void); -unsigned char in[16] __attribute__((__aligned__(16))); +unsigned char in[N] __attribute__((__aligned__(16))); int main (unsigned char argc, char **argv) { unsigned char i = 0; unsigned char sum = 1; + unsigned char expected = 1; check_vect (); - for (i = 0; i < 16; i++) + for (i = 0; i < N; i++) in[i] = (i + i + 1) & 0xfd; + for (i = 0; i < N; i++) + { + expected |= in[i]; + asm volatile (""); + } + /* Prevent constant propagation of the entire loop below. */ asm volatile ("" : : : "memory"); - for (i = 0; i < 16; i++) + for (i = 0; i < N; i++) sum |= in[i]; - if (sum != 29) + if (sum != expected) { __builtin_printf("Failed %d\n", sum); abort(); @@ -34,5 +47,5 @@ main (unsigned char argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ - +/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */ +/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c index 71a70aef622..799ac173e21 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c @@ -1,27 +1,43 @@ -/* { dg-require-effective-target whole_vector_shift } */ +/* { dg-do run { target { whole_vector_shift || vect_logical_reduc } } } */ /* Write a reduction loop to be reduced using vector shifts and folded. */ #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS / 8) +#else +#define N 16 +#endif + extern void abort(void); int main (unsigned char argc, char **argv) { - unsigned char in[16] __attribute__((aligned(16))); + unsigned char in[N] __attribute__((aligned(16))); unsigned char i = 0; unsigned char sum = 1; + unsigned char expected = 1; check_vect (); - for (i = 0; i < 16; i++) + for (i = 0; i < N; i++) in[i] = (i + i + 1) & 0xfd; - for (i = 0; i < 16; i++) + for (i = 0; i < N; i++) + { + expected |= in[i]; + asm volatile (""); + } + + /* Prevent constant propagation of the entire loop below. */ + asm volatile ("" : : : "memory"); + + for (i = 0; i < N; i++) sum |= in[i]; - if (sum != 29) + if (sum != expected) { __builtin_printf("Failed %d\n", sum); abort(); @@ -30,5 +46,5 @@ main (unsigned char argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ - +/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" { target { ! vect_logical_reduc } } } } */ +/* { dg-final { scan-tree-dump "Reduce using direct vector reduction" "vect" { target vect_logical_reduc } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c index a8cb21b4eb4..b06b234072b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c @@ -3,7 +3,12 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 16) +#else #define N 16 +#endif + unsigned short udata_sh[N] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28 }; #define SUM 210 diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c index 1863403092c..be03c7d011d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c @@ -7,8 +7,6 @@ unsigned char udata_ch[N]; #define SUM N*(N-1) -volatile int y = 0; - __attribute__ ((noinline)) int foo () { @@ -18,9 +16,7 @@ foo () for (i = 0; i < N; i++) { udata_ch[i] = i*2; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* widenning sum: sum chars into int. */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b.c index 1deb147da58..96f8b740ced 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b.c @@ -3,7 +3,12 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 16) +#else #define N 16 +#endif + unsigned char udata_ch[N] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28 }; #define SUM 210 diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c index 91dace25013..c30c85ce911 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c @@ -7,8 +7,6 @@ unsigned char udata_ch[N]; #define SUM N*(N-1) -volatile int y = 0; - __attribute__ ((noinline)) int foo () { @@ -18,9 +16,7 @@ foo () for (i = 0; i < N; i++) { udata_ch[i] = i*2; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* widenning sum: sum chars into short. */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c index 81384300088..a98edd3045a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c @@ -3,7 +3,12 @@ #include <stdarg.h> #include "tree-vect.h" +#if VECTOR_BITS > 128 +#define N (VECTOR_BITS * 2 / 16) +#else #define N 16 +#endif + signed short data_sh[N] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28 }; #define SUM 210 diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c index ce333978253..570e56a8c9b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c @@ -6,8 +6,6 @@ #define N 128 signed char data_ch[N]; -volatile int y = 0; - __attribute__ ((noinline)) int foo () { @@ -19,9 +17,7 @@ foo () { data_ch[i] = i*2; check_intsum += data_ch[i]; - /* Avoid vectorization. */ - if (y) - abort (); + asm volatile ("" ::: "memory"); } /* widenning sum: sum chars into int. */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c index e7a006a8320..da47a824cb6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c @@ -15,8 +15,6 @@ typedef struct { unsigned int b; } ii; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -34,8 +32,7 @@ main1 () arr[i].b = i * 2; iarr[i].a = i; iarr[i].b = i * 3; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c index f1ce49f4725..d53b7669a6b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c @@ -10,8 +10,6 @@ typedef struct { unsigned short b; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -24,8 +22,7 @@ main1 () { arr[i].a = i; arr[i].b = i * 2; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c index 0be68b31198..37ff3abe97d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c @@ -12,8 +12,6 @@ typedef struct { unsigned short d; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -28,8 +26,7 @@ main1 () arr[i].b = i * 2; arr[i].c = 17; arr[i].d = i+34; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c index 5f71bed3f5c..9237a9074de 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c @@ -10,8 +10,6 @@ typedef struct { unsigned short b; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -28,8 +26,7 @@ main1 () arr[i].a = i; arr[i].b = i * 2; iarr[i] = i * 3; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c index 0be7f8b9b78..f64a1347350 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c @@ -10,8 +10,6 @@ typedef struct { unsigned char b; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -24,8 +22,7 @@ main1 () { arr[i].a = i; arr[i].b = i * 2; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2-big-array.c index dd144fefc5c..2add5b48991 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2-big-array.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -47,8 +45,7 @@ main1 () check_res[i].h = arr[i].f; check_res[i].g = arr[i].f - arr[i].a; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c index 24c7cc3517a..2b7a1a4bb77 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -36,8 +34,7 @@ main1 () arr[i].f = i * 2 + 2; arr[i].g = i - 3; arr[i].h = 56; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7-big-array.c index 0132aaf4bc6..e487de8b4e7 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7-big-array.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -52,8 +50,7 @@ main1 () check_res[i].h = arr[i].d; check_res[i].g = u + t; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c index 6eb04d88a51..0f3347e8bb2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 () { @@ -37,8 +35,7 @@ main1 () arr[i].f = i * 5; arr[i].g = i - 3; arr[i].h = 67; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } for (i = 0; i < N; i++) diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c b/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c index 36735963e30..82727e595c1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c @@ -15,8 +15,6 @@ typedef struct { unsigned int b; } ii; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr, ii *iarr) { @@ -64,8 +62,7 @@ int main (void) arr[i].b = i * 2; iarr[i].a = i; iarr[i].b = i * 3; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr, iarr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c b/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c index 21477f34686..0fac6150116 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c @@ -15,8 +15,6 @@ typedef struct { unsigned int b; } ii; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr, ii *iarr) { @@ -64,8 +62,7 @@ int main (void) arr[i].b = i * 2; iarr[i].a = i; iarr[i].b = i * 3; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr, iarr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c b/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c index b8bb8784b74..8c560480bc4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c @@ -12,8 +12,6 @@ typedef struct { s buffer1[N], buffer2[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s * __restrict__ pIn, s* __restrict__ pOut) { @@ -63,8 +61,7 @@ int main (void) buffer1[i].b = i + 8; buffer2[i].a = i * 3; buffer2[i].b = i * 2; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } check_vect (); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c index 5bc23697372..7d264f39c60 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c @@ -10,8 +10,6 @@ typedef struct { unsigned short b; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -48,8 +46,7 @@ int main (void) { arr[i].a = i; arr[i].b = i * 2; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c index 90e58166669..ee8ea0d666d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c @@ -12,8 +12,6 @@ typedef struct { unsigned short d; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -61,8 +59,7 @@ int main (void) arr[i].b = i * 2; arr[i].c = 17; arr[i].d = i+34; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c index 4ddded37851..fe41dbd9cf4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c @@ -12,8 +12,6 @@ typedef struct { int d; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -56,8 +54,7 @@ int main (void) arr[i].b = i * 2; arr[i].c = 17; arr[i].d = i+34; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c index 4c2e30acb7f..a88c0f08456 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c @@ -16,8 +16,6 @@ typedef struct { int h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -70,8 +68,7 @@ int main (void) arr[i].f = i * 5; arr[i].g = i - 3; arr[i].h = 56; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c index 8c541fe44a4..ab841205e4f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c @@ -10,8 +10,6 @@ typedef struct { unsigned char b; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -64,8 +62,7 @@ int main (void) { arr[i].a = i; arr[i].b = i * 2; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c index 55eddf5e9fd..0afd50db0b8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c @@ -10,8 +10,6 @@ typedef struct { unsigned char b; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -47,8 +45,7 @@ int main (void) { arr[i].a = i; arr[i].b = i * 2; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2-big-array.c index 3dd5c0610de..ef532251465 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2-big-array.c @@ -18,8 +18,6 @@ typedef struct { s check_res[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -83,8 +81,7 @@ int main (void) check_res[i].h = arr[i].f; check_res[i].g = arr[i].f - arr[i].b; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c index 23cea24540d..04f18fbb591 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -71,8 +69,7 @@ int main (void) arr[i].f = i * 2 + 2; arr[i].g = i - 3; arr[i].h = 56; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c index 450784b92cc..28ba4170fba 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-big-array.c @@ -18,8 +18,6 @@ typedef struct { s check_res[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -105,8 +103,7 @@ int main (void) check_res[i].h = arr[i].c; check_res[i].g = arr[i].b + arr[i].c; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c index b36ab736791..69b970ef33b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr, int n) { @@ -103,8 +101,7 @@ int main (void) arr[i].f = 16; arr[i].g = 3; arr[i].h = 56; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr, N-2); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c index 1b36df53d73..901b1a925bd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -91,8 +89,7 @@ int main (void) arr[i].f = i * 5; arr[i].g = i - 3; arr[i].h = 56; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7-big-array.c index ac93099372e..b703e636b49 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7-big-array.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7-big-array.c @@ -18,8 +18,6 @@ typedef struct { s check_res[N]; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -93,8 +91,7 @@ int main (void) check_res[i].h = arr[i].d; check_res[i].g = u + t; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c index dfd71b93b5f..764f10d0ada 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -76,8 +74,7 @@ int main (void) arr[i].f = i * 5; arr[i].g = i - 3; arr[i].h = 67; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c index 6cc3bde1509..35bab79ce82 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c @@ -16,8 +16,6 @@ typedef struct { unsigned char h; } s; -volatile int y = 0; - __attribute__ ((noinline)) int main1 (s *arr) { @@ -78,8 +76,7 @@ int main (void) arr[i].f = i + 5; arr[i].g = i + 3; arr[i].h = 67; - if (y) /* Avoid vectorization. */ - abort (); + asm volatile ("" ::: "memory"); } main1 (arr); diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c index b14f0091ff5..6fa9e4e2822 100644 --- a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c @@ -1,5 +1,5 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ +/* { dg-options "-O2 --save-temps -ftree-vectorize -march=armv8-a" } */ /* Each function uses the correspoding 'CLASS' in Marco CHECK (aarch64_simd_valid_immediate). */ diff --git a/gcc/testsuite/gcc.target/aarch64/fmaxmin.c b/gcc/testsuite/gcc.target/aarch64/fmaxmin.c index 4447e33f7ee..a128e953a73 100644 --- a/gcc/testsuite/gcc.target/aarch64/fmaxmin.c +++ b/gcc/testsuite/gcc.target/aarch64/fmaxmin.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-vect-cost-model -save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-vect-cost-model -save-temps -march=armv8-a" } */ extern void abort (void); diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c b/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c index 4ac3ab73444..a40f34a2dc8 100644 --- a/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c +++ b/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-save-temps -O2 -ftree-vectorize -fno-inline -fno-vect-cost-model" } */ +/* { dg-options "-save-temps -O2 -ftree-vectorize -fno-inline -fno-vect-cost-model -march=armv8-a" } */ #define N 1024 diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c index ff6f68350eb..0b6c66a93d9 100644 --- a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c +++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c @@ -1,5 +1,5 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 --save-temps -ftree-vectorize" } */ +/* { dg-options "-O2 --save-temps -ftree-vectorize -march=armv8-a" } */ /* Each function uses the correspoding 'CLASS' in Marco CHECK (aarch64_simd_valid_immediate). */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr62178.c b/gcc/testsuite/gcc.target/aarch64/pr62178.c index 1bf6d838d3a..158de4335ca 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr62178.c +++ b/gcc/testsuite/gcc.target/aarch64/pr62178.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1]; diff --git a/gcc/testsuite/gcc.target/aarch64/pr70044.c b/gcc/testsuite/gcc.target/aarch64/pr70044.c index 1a84941dd7e..6080a074f26 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr70044.c +++ b/gcc/testsuite/gcc.target/aarch64/pr70044.c @@ -11,4 +11,4 @@ main (int argc, char **argv) } /* Check that the frame pointer really is created. */ -/* { dg-final { scan-lto-assembler "add x29, sp," } } */ +/* { dg-final { scan-lto-assembler "(mov|add) x29, sp" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr71727-2.c b/gcc/testsuite/gcc.target/aarch64/pr71727-2.c index 2bc803ab330..be4150a6c5f 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr71727-2.c +++ b/gcc/testsuite/gcc.target/aarch64/pr71727-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mstrict-align -O3" } */ +/* { dg-options "-mstrict-align -O3 -march=armv8-a" } */ unsigned char foo(const unsigned char *buffer, unsigned int length) { diff --git a/gcc/testsuite/gcc.target/aarch64/pr78733.c b/gcc/testsuite/gcc.target/aarch64/pr78733.c index ce462cedf9f..3cdb3ba7373 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr78733.c +++ b/gcc/testsuite/gcc.target/aarch64/pr78733.c @@ -7,4 +7,5 @@ t (void) return (__int128)1 << 80; } -/* { dg-final { scan-assembler "adr" } } */ +/* { dg-final { scan-assembler "\tmov\tx0, 0" } } */ +/* { dg-final { scan-assembler "\tmov\tx1, 65536" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr79041-2.c b/gcc/testsuite/gcc.target/aarch64/pr79041-2.c index a889dfdd895..62856f10438 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr79041-2.c +++ b/gcc/testsuite/gcc.target/aarch64/pr79041-2.c @@ -8,5 +8,6 @@ t (void) return (__int128)1 << 80; } -/* { dg-final { scan-assembler "adr" } } */ +/* { dg-final { scan-assembler "\tmov\tx0, 0" } } */ +/* { dg-final { scan-assembler "\tmov\tx1, 65536" } } */ /* { dg-final { scan-assembler-not "adrp" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-1.c b/gcc/testsuite/gcc.target/aarch64/saddw-1.c index 7500fb82444..f8f616a616c 100644 --- a/gcc/testsuite/gcc.target/aarch64/saddw-1.c +++ b/gcc/testsuite/gcc.target/aarch64/saddw-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ int t6(int len, void * dummy, short * __restrict x) diff --git a/gcc/testsuite/gcc.target/aarch64/saddw-2.c b/gcc/testsuite/gcc.target/aarch64/saddw-2.c index 5d9c8d9edc2..274d23fe79d 100644 --- a/gcc/testsuite/gcc.target/aarch64/saddw-2.c +++ b/gcc/testsuite/gcc.target/aarch64/saddw-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ int t6(int len, void * dummy, int * __restrict x) diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c new file mode 100644 index 00000000000..2ce38483b6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +extern void arf (unsigned long int *, unsigned long int *); +void +frob () +{ + unsigned long int num[1000]; + unsigned long int den[1000]; + arf (den, num); +} + +/* This verifies that the scheduler did not break the dependencies + by adjusting the offsets within the probe and that the scheduler + did not reorder around the stack probes. */ +/* { dg-final { scan-assembler-times "sub\\tsp, sp, #4096\\n\\tstr\\txzr, .sp, 4088." 3 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c new file mode 100644 index 00000000000..d8886835989 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-13.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +#define ARG32(X) X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X +#define ARG192(X) ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X),ARG32(X) +void out1(ARG192(__int128)); +int t1(int); + +int t3(int x) +{ + if (x < 1000) + return t1 (x) + 1; + + out1 (ARG192(1)); + return 0; +} + + + +/* This test creates a large (> 1k) outgoing argument area that needs + to be probed. We don't test the exact size of the space or the + exact offset to make the test a little less sensitive to trivial + output changes. */ +/* { dg-final { scan-assembler-times "sub\\tsp, sp, #....\\n\\tstr\\txzr, \\\[sp" 1 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c new file mode 100644 index 00000000000..59ffe01376d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-14.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +int t1(int); + +int t2(int x) +{ + char *p = __builtin_alloca (4050); + x = t1 (x); + return p[x]; +} + + +/* This test has a constant sized alloca that is smaller than the + probe interval. But it actually requires two probes instead + of one because of the optimistic assumptions we made in the + aarch64 prologue code WRT probing state. + + The form can change quite a bit so we just check for two + probes without looking at the actual address. */ +/* { dg-final { scan-assembler-times "str\\txzr," 2 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c new file mode 100644 index 00000000000..e06db6dc2f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-15.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=12" } */ +/* { dg-require-effective-target supports_stack_clash_protection } */ + +int t1(int); + +int t2(int x) +{ + char *p = __builtin_alloca (x); + x = t1 (x); + return p[x]; +} + + +/* This test has a variable sized alloca. It requires 3 probes. + One in the loop, one for the residual and at the end of the + alloca area. + + The form can change quite a bit so we just check for two + probes without looking at the actual address. */ +/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */ + + + diff --git a/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c b/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c index 95c8f696fee..2691250f79e 100644 --- a/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c +++ b/gcc/testsuite/gcc.target/aarch64/subs_compare_1.c @@ -11,5 +11,5 @@ foo (int a, int b) return 0; } -/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */ +/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c b/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c index 60c6d9e5ccd..d343acc1195 100644 --- a/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c +++ b/gcc/testsuite/gcc.target/aarch64/subs_compare_2.c @@ -11,5 +11,5 @@ foo (int a, int b) return 0; } -/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, #4" 1 } } */ +/* { dg-final { scan-assembler-times "subs\\tw\[0-9\]+, w\[0-9\]+, #4" 1 { xfail *-*-* } } } */ /* { dg-final { scan-assembler-not "cmp\\tw\[0-9\]+, w\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve_arith_1.c new file mode 100644 index 00000000000..1a61d6a7f40 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_arith_1.c @@ -0,0 +1,121 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define DO_REGREG_OPS(TYPE, OP, NAME) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP src[i]; \ +} + +#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP VALUE; \ +} + +#define DO_ARITH_OPS(TYPE, OP, NAME) \ + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0); \ + DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5); \ + DO_IMMEDIATE_OPS (255, TYPE, OP, NAME ## 255); \ + DO_IMMEDIATE_OPS (256, TYPE, OP, NAME ## 256); \ + DO_IMMEDIATE_OPS (257, TYPE, OP, NAME ## 257); \ + DO_IMMEDIATE_OPS (65280, TYPE, OP, NAME ## 65280); \ + DO_IMMEDIATE_OPS (65281, TYPE, OP, NAME ## 65281); \ + DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); + +DO_ARITH_OPS (int8_t, +, add) +DO_ARITH_OPS (int16_t, +, add) +DO_ARITH_OPS (int32_t, +, add) +DO_ARITH_OPS (int64_t, +, add) +DO_ARITH_OPS (int8_t, -, minus) +DO_ARITH_OPS (int16_t, -, minus) +DO_ARITH_OPS (int32_t, -, minus) +DO_ARITH_OPS (int64_t, -, minus) + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #251\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #255\n} 4 } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #65280\n} } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #256\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 1 } } */ + +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #1\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #5\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #65280\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */ + +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */ + +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_1.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_1.c new file mode 100644 index 00000000000..1051fd1f7f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_1.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#define LOOP(TYPE) \ + void \ + f_##TYPE##_1 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i] += a[i - 1]; \ + } \ + \ + void \ + f_##TYPE##_2 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i] += a[i - 2]; \ + } \ + \ + void \ + f_##TYPE##_5 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i] += a[i - 5]; \ + } + +LOOP (char) +LOOP (short) +LOOP (float) +LOOP (double) + +/* { dg-final { scan-assembler-times {\tstrb\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tstrh\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {\tstr\td[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x[0-9]+, x[0-9]+\]} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 4 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x[0-9]+, x[0-9]+\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x[0-9]+, x[0-9]+, lsl 1\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_1_run.c new file mode 100644 index 00000000000..0f280b04f0b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_1_run.c @@ -0,0 +1,37 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#include "sve_cap_1.c" + +#define N 129 + +#define F(X) (((X) % 5) * (X)) + +#define TEST_LOOP(TYPE, M) \ + { \ + TYPE a[N + M]; \ + for (int i = 0; i < N + M; ++i) \ + a[i] = F (i); \ + f_##TYPE##_##M (a + M, N); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE x = a[i]; \ + TYPE y = F (i + M); \ + if (a[i + M] != (TYPE) (x + y)) \ + __builtin_abort (); \ + } \ + } + +#define TEST_LOOPS(TYPE) \ + TEST_LOOP (TYPE, 1) \ + TEST_LOOP (TYPE, 2) \ + TEST_LOOP (TYPE, 5) + +int +main (void) +{ + TEST_LOOPS (char); + TEST_LOOPS (short); + TEST_LOOPS (float); + TEST_LOOPS (double); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_2.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_2.c new file mode 100644 index 00000000000..d46a08c2ee1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_2.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#define LOOP(TYPE) \ + void __attribute__ ((weak)) \ + f_##TYPE##_1 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += a[i * 2 - 2]; \ + a[i * 2 + 1] += a[i * 2 - 1]; \ + } \ + } \ + \ + void __attribute__ ((weak)) \ + f_##TYPE##_2 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += a[i * 2 - 4]; \ + a[i * 2 + 1] += a[i * 2 - 3]; \ + } \ + } \ + \ + void __attribute__ ((weak)) \ + f_##TYPE##_5 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += a[i * 2 - 10]; \ + a[i * 2 + 1] += a[i * 2 - 9]; \ + } \ + } + +LOOP (char) +LOOP (short) +LOOP (float) +LOOP (double) + +/* { dg-final { scan-assembler-times {\tstrb\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tstp\ts[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {\tstp\td[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld1b\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 4 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_2_run.c new file mode 100644 index 00000000000..0f8ed957c79 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_2_run.c @@ -0,0 +1,44 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#include "sve_cap_2.c" + +#define N 129 + +#define F(X) (((X) % 5) * (X)) +#define G(X) ((X) ^ 39) + +#define TEST_LOOP(TYPE, M) \ + { \ + TYPE a[(N + M) * 2]; \ + for (int i = 0; i < N + M; ++i) \ + { \ + a[i * 2] = F (i); \ + a[i * 2 + 1] = G (i); \ + } \ + f_##TYPE##_##M (a + M * 2, N); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE x0 = a[i * 2]; \ + TYPE y0 = F (i + M); \ + TYPE x1 = a[i * 2 + 1]; \ + TYPE y1 = G (i + M); \ + if (a[(i + M) * 2] != (TYPE) (x0 + y0) \ + || a[(i + M) * 2 + 1] != (TYPE) (x1 + y1)) \ + __builtin_abort (); \ + } \ + } + +#define TEST_LOOPS(TYPE) \ + TEST_LOOP (TYPE, 1) \ + TEST_LOOP (TYPE, 2) \ + TEST_LOOP (TYPE, 5) + +int +main (void) +{ + TEST_LOOPS (char); + TEST_LOOPS (short); + TEST_LOOPS (float); + TEST_LOOPS (double); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_3.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_3.c new file mode 100644 index 00000000000..6515465b7f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_3.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model -msve-vector-bits=scalable" } */ + +#define LOOP(TYPE) \ + void __attribute__ ((weak)) \ + f_##TYPE##_1 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += a[i * 2 - 2]; \ + a[i * 2 + 1] -= a[i * 2 - 1]; \ + } \ + } \ + \ + void __attribute__ ((weak)) \ + f_##TYPE##_2 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += a[i * 2 - 4]; \ + a[i * 2 + 1] -= a[i * 2 - 3]; \ + } \ + } \ + \ + void __attribute__ ((weak)) \ + f_##TYPE##_5 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += a[i * 2 - 10]; \ + a[i * 2 + 1] -= a[i * 2 - 9]; \ + } \ + } + +LOOP (char) +LOOP (short) +LOOP (float) +LOOP (double) + +/* { dg-final { scan-assembler-times {\tstrb\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tstp\ts[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {\tstp\td[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2b\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tld2w\t} 4 } } */ +/* { dg-final { scan-assembler-times {\tld2d\t} 4 } } */ + +/* { dg-final { scan-assembler-times {\tst2b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2w\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2d\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_3_run.c new file mode 100644 index 00000000000..fe26162a812 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_3_run.c @@ -0,0 +1,44 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#include "sve_cap_3.c" + +#define N 129 + +#define F(X) (((X) % 5) * (X)) +#define G(X) ((X) ^ 39) + +#define TEST_LOOP(TYPE, M) \ + { \ + TYPE a[(N + M) * 2]; \ + for (int i = 0; i < N + M; ++i) \ + { \ + a[i * 2] = F (i); \ + a[i * 2 + 1] = G (i); \ + } \ + f_##TYPE##_##M (a + M * 2, N); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE x0 = a[i * 2]; \ + TYPE y0 = F (i + M); \ + TYPE x1 = a[i * 2 + 1]; \ + TYPE y1 = G (i + M); \ + if (a[(i + M) * 2] != (TYPE) (y0 + x0) \ + || a[(i + M) * 2 + 1] != (TYPE) (y1 - x1)) \ + __builtin_abort (); \ + } \ + } + +#define TEST_LOOPS(TYPE) \ + TEST_LOOP (TYPE, 1) \ + TEST_LOOP (TYPE, 2) \ + TEST_LOOP (TYPE, 5) + +int +main (void) +{ + TEST_LOOPS (char); + TEST_LOOPS (short); + TEST_LOOPS (float); + TEST_LOOPS (double); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c new file mode 100644 index 00000000000..b22828d621b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_4.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#define LOOP(TYPE) \ + void \ + f_##TYPE##_1 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i * 5] += a[i * 5 - 5]; \ + } \ + \ + void \ + f_##TYPE##_2 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i * 5] += a[i * 5 - 10]; \ + } \ + \ + void \ + f_##TYPE##_5 (TYPE *a, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + a[i * 5] += a[i * 5 - 25]; \ + } + +LOOP (char) +LOOP (short) +LOOP (float) +LOOP (double) + +/* At the moment we can't use extending loads and truncating stores. + Please add ld and st scan-assemblers below if that changes. */ +/* { dg-final { scan-assembler-times {\tstrb\t} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tstrh\t} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tstr\ts[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {\tstr\td[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 4 } } */ + +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+\.s, sxtw 2\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+\.d\]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_4_run.c new file mode 100644 index 00000000000..f39bc7fc3cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_4_run.c @@ -0,0 +1,37 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#include "sve_cap_4.c" + +#define N 129 + +#define F(X) (((X) % 5) * (X)) + +#define TEST_LOOP(TYPE, M) \ + { \ + TYPE a[(N + M) * 5]; \ + for (int i = 0; i < N + M; ++i) \ + a[i * 5] = F (i); \ + f_##TYPE##_##M (a + M * 5, N); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE x = a[i * 5]; \ + TYPE y = F (i + M); \ + if (a[(i + M) * 5] != (TYPE) (x + y)) \ + __builtin_abort (); \ + } \ + } + +#define TEST_LOOPS(TYPE) \ + TEST_LOOP (TYPE, 1) \ + TEST_LOOP (TYPE, 2) \ + TEST_LOOP (TYPE, 5) + +int +main (void) +{ + TEST_LOOPS (char); + TEST_LOOPS (short); + TEST_LOOPS (float); + TEST_LOOPS (double); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_5.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_5.c new file mode 100644 index 00000000000..4d4987773b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_5.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +void __attribute__ ((noinline, noclone)) +f (double *x, float *y, int n) +{ + for (int i = 0; i < n; ++i) + { + x[i * 3 + 18] = x[i * 3 + 0] + y[i]; + x[i * 3 + 19] = x[i * 3 + 1] - y[i]; + x[i * 3 + 20] = x[i * 3 + 2]; + } +} + +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3d\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tpunpklo\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tpunpkhi\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcvt\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cap_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cap_5_run.c new file mode 100644 index 00000000000..57fd625d20a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cap_5_run.c @@ -0,0 +1,40 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -fno-vect-cost-model" } */ + +#include "sve_cap_5.c" + +#define COUNT 5 +#define N ((COUNT + 2) * 6) + +int +main (void) +{ + double x[N * 3]; + float y[N]; + for (int i = 0; i < N; ++i) + { + x[i * 3 + 0] = i * 2; + x[i * 3 + 1] = i * 3; + x[i * 3 + 2] = i * 5; + y[i] = i * 4; + } + f (x, y, COUNT * 6); + for (int i = 0; i < N; ++i) + { + if (i >= 6 && i < (COUNT + 1) * 6) + { + if (x[i * 3 + 0] != x[i * 3 - 18] + (i - 6) * 4 + || x[i * 3 + 1] != x[i * 3 - 17] - (i - 6) * 4 + || x[i * 3 + 2] != x[i * 3 - 16]) + __builtin_abort (); + } + else + { + if (x[i * 3 + 0] != i * 2 + || x[i * 3 + 1] != i * 3 + || x[i * 3 + 2] != i * 5) + __builtin_abort (); + } + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c new file mode 100644 index 00000000000..a176d9ce251 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c @@ -0,0 +1,22 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +#define N 32 + +/* Simple condition reduction. */ + +int +condition_reduction (int *a, int min_v) +{ + int last = 66; /* High start value. */ + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = i; + + return last; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ +/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c new file mode 100644 index 00000000000..8e6444e4239 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c @@ -0,0 +1,24 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_clastb_1.c" + +extern void abort (void) __attribute__ ((noreturn)); + +int +main (void) +{ + int a[N] = { + 11, -12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, -3, 4, 5, 6, 7, -8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 + }; + + int ret = condition_reduction (a, 1); + + if (ret != 17) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c new file mode 100644 index 00000000000..dcae41f5425 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c @@ -0,0 +1,27 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +#if !defined(TYPE) +#define TYPE unsigned int +#endif + +#define N 254 + +/* Non-simple condition reduction. */ + +TYPE +condition_reduction (TYPE *a, TYPE min_v) +{ + TYPE last = 65; + + for (TYPE i = 0; i < N; i++) + if (a[i] < min_v) + last = a[i]; + + return last; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ +/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c new file mode 100644 index 00000000000..0503ba36c3d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c @@ -0,0 +1,25 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_clastb_2.c" + +extern void abort (void) __attribute__ ((noreturn)); + +int +main (void) +{ + unsigned int a[N] = { + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 + }; + __builtin_memset (a+32, 43, (N-32)*sizeof (int)); + + unsigned int ret = condition_reduction (a, 16); + + if (ret != 10) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c new file mode 100644 index 00000000000..1061194a08e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c @@ -0,0 +1,11 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +#define TYPE unsigned char + +#include "sve_clastb_2.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ +/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c new file mode 100644 index 00000000000..90c3e4a0cf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c @@ -0,0 +1,25 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_clastb_3.c" + +extern void abort (void) __attribute__ ((noreturn)); + +int +main (void) +{ + unsigned char a[N] = { + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 + }; + __builtin_memset (a+32, 43, N-32); + + unsigned char ret = condition_reduction (a, 16); + + if (ret != 10) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c new file mode 100644 index 00000000000..698d958693a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c @@ -0,0 +1,11 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +#define TYPE short + +#include "sve_clastb_2.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ +/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c new file mode 100644 index 00000000000..d0337ab300d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c @@ -0,0 +1,25 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_clastb_4.c" + +extern void abort (void) __attribute__ ((noreturn)); + +int +main (void) +{ + short a[N] = { + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 + }; + __builtin_memset (a+32, 43, (N-32)*sizeof (short)); + + short ret = condition_reduction (a, 16); + + if (ret != 10) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c new file mode 100644 index 00000000000..655f95f410a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c @@ -0,0 +1,11 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +#define TYPE long + +#include "sve_clastb_2.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ +/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c new file mode 100644 index 00000000000..573787233d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c @@ -0,0 +1,25 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_clastb_5.c" + +extern void abort (void) __attribute__ ((noreturn)); + +int +main (void) +{ + long a[N] = { + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32 + }; + __builtin_memset (a+32, 43, (N-32)*sizeof (long)); + + long ret = condition_reduction (a, 16); + + if (ret != 10) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c new file mode 100644 index 00000000000..bf1bc1a346a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c @@ -0,0 +1,28 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +#define N 32 + +#ifndef TYPE +#define TYPE float +#endif + +/* Non-integer data types. */ + +TYPE +condition_reduction (TYPE *a, TYPE min_v) +{ + TYPE last = 0; + + for (int i = 0; i < N; i++) + if (a[i] < min_v) + last = a[i]; + + return last; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ +/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c new file mode 100644 index 00000000000..4c760daba89 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c @@ -0,0 +1,24 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_clastb_6.c" + +extern void abort (void) __attribute__ ((noreturn)); + +int +main (void) +{ + float a[N] = { + 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20, + 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6, + 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30, + 31.111, 32.322 + }; + + float ret = condition_reduction (a, 16.7); + + if (ret != (float)10.6) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c new file mode 100644 index 00000000000..12e53b75e8a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c @@ -0,0 +1,11 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +#define TYPE double +#include "sve_clastb_6.c" + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing condition reduction with CLASTB." 2 "vect" } } */ +/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c new file mode 100644 index 00000000000..d0001a923e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c @@ -0,0 +1,24 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_clastb_7.c" + +extern void abort (void) __attribute__ ((noreturn)); + +int +main (void) +{ + double a[N] = { + 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20, + 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6, + 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30, + 31.111, 32.322 + }; + + double ret = condition_reduction (a, 16.7); + + if (ret != (double)10.6) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_1.c new file mode 100644 index 00000000000..707a08e0faa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_1.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define TEST(TYPE, NAME, OP) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE##_##NAME (TYPE *__restrict x, \ + TYPE *__restrict y, \ + TYPE *__restrict z, \ + TYPE *__restrict pred, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + x[i] = pred[i] != 1 ? y[i] OP z[i] : y[i]; \ + } + +#define TEST_INT_TYPE(TYPE) \ + TEST (TYPE, div, /) + +#define TEST_FP_TYPE(TYPE) \ + TEST (TYPE, add, +) \ + TEST (TYPE, sub, -) \ + TEST (TYPE, mul, *) \ + TEST (TYPE, div, /) + +#define TEST_ALL \ + TEST_INT_TYPE (int8_t) \ + TEST_INT_TYPE (uint8_t) \ + TEST_INT_TYPE (int16_t) \ + TEST_INT_TYPE (uint16_t) \ + TEST_INT_TYPE (int32_t) \ + TEST_INT_TYPE (uint32_t) \ + TEST_INT_TYPE (int64_t) \ + TEST_INT_TYPE (uint64_t) \ + TEST_FP_TYPE (float) \ + TEST_FP_TYPE (double) + +TEST_ALL + +/* { dg-final { scan-assembler-not {\t.div\tz[0-9]+\.b} } } */ \ +/* { dg-final { scan-assembler-not {\t.div\tz[0-9]+\.h} } } */ \ +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,} 7 } } */ +/* At present we don't vectorize the uint8_t or uint16_t loops because the + division is done directly in the narrow type, rather than being widened + to int first. */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* We fail to optimize away the SEL for the int8_t and int16_t loops, + because the 32-bit result is converted before selection. */ +/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_1_run.c new file mode 100644 index 00000000000..5ad84ead7fb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_1_run.c @@ -0,0 +1,33 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_cond_arith_1.c" + +#define N 99 + +#undef TEST +#define TEST(TYPE, NAME, OP) \ + { \ + TYPE x[N], y[N], z[N], pred[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + y[i] = i * i; \ + z[i] = ((i + 2) % 3) * (i + 1); \ + pred[i] = i % 3; \ + } \ + test_##TYPE##_##NAME (x, y, z, pred, N); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i % 3 != 1 ? y[i] OP z[i] : y[i]; \ + if (x[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +int +main (void) +{ + TEST_ALL + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_2.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_2.c new file mode 100644 index 00000000000..e525ce8898a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_2.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define TEST(DATA_TYPE, PRED_TYPE, NAME, OP) \ + void __attribute__ ((noinline, noclone)) \ + test_##DATA_TYPE##_##PRED_TYPE##_##NAME (DATA_TYPE *__restrict x, \ + DATA_TYPE *__restrict y, \ + DATA_TYPE *__restrict z, \ + PRED_TYPE *__restrict pred, \ + int n) \ + { \ + for (int i = 0; i < n; ++i) \ + x[i] = pred[i] != 1 ? y[i] OP z[i] : y[i]; \ + } + +#define TEST_INT_TYPE(DATA_TYPE, PRED_TYPE) \ + TEST (DATA_TYPE, PRED_TYPE, div, /) + +#define TEST_FP_TYPE(DATA_TYPE, PRED_TYPE) \ + TEST (DATA_TYPE, PRED_TYPE, add, +) \ + TEST (DATA_TYPE, PRED_TYPE, sub, -) \ + TEST (DATA_TYPE, PRED_TYPE, mul, *) \ + TEST (DATA_TYPE, PRED_TYPE, div, /) + +#define TEST_ALL \ + TEST_INT_TYPE (int32_t, int8_t) \ + TEST_INT_TYPE (uint32_t, int8_t) \ + TEST_INT_TYPE (int32_t, int16_t) \ + TEST_INT_TYPE (uint32_t, int16_t) \ + TEST_INT_TYPE (int64_t, int8_t) \ + TEST_INT_TYPE (uint64_t, int8_t) \ + TEST_INT_TYPE (int64_t, int16_t) \ + TEST_INT_TYPE (uint64_t, int16_t) \ + TEST_INT_TYPE (int64_t, int32_t) \ + TEST_INT_TYPE (uint64_t, int32_t) \ + TEST_FP_TYPE (float, int8_t) \ + TEST_FP_TYPE (float, int16_t) \ + TEST_FP_TYPE (double, int8_t) \ + TEST_FP_TYPE (double, int16_t) \ + TEST_FP_TYPE (double, int32_t) + +TEST_ALL + +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_2_run.c new file mode 100644 index 00000000000..623c27ddb65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_2_run.c @@ -0,0 +1,34 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_cond_arith_2.c" + +#define N 99 + +#undef TEST +#define TEST(DATA_TYPE, PRED_TYPE, NAME, OP) \ + { \ + DATA_TYPE x[N], y[N], z[N]; \ + PRED_TYPE pred[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + y[i] = i * i; \ + z[i] = ((i + 2) % 3) * (i + 1); \ + pred[i] = i % 3; \ + } \ + test_##DATA_TYPE##_##PRED_TYPE##_##NAME (x, y, z, pred, N); \ + for (int i = 0; i < N; ++i) \ + { \ + DATA_TYPE expected = i % 3 != 1 ? y[i] OP z[i] : y[i]; \ + if (x[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +int +main (void) +{ + TEST_ALL + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_3.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_3.c new file mode 100644 index 00000000000..264366f99f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_3.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define TEST(TYPE, NAME, OP) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE##_##NAME (TYPE *__restrict x, \ + TYPE *__restrict y, \ + TYPE z1, TYPE z2, \ + TYPE *__restrict pred, int n) \ + { \ + for (int i = 0; i < n; i += 2) \ + { \ + x[i] = (pred[i] != 1 ? y[i] OP z1 : y[i]); \ + x[i + 1] = (pred[i + 1] != 1 ? y[i + 1] OP z2 : y[i + 1]); \ + } \ + } + +#define TEST_INT_TYPE(TYPE) \ + TEST (TYPE, div, /) + +#define TEST_FP_TYPE(TYPE) \ + TEST (TYPE, add, +) \ + TEST (TYPE, sub, -) \ + TEST (TYPE, mul, *) \ + TEST (TYPE, div, /) + +#define TEST_ALL \ + TEST_INT_TYPE (int32_t) \ + TEST_INT_TYPE (uint32_t) \ + TEST_INT_TYPE (int64_t) \ + TEST_INT_TYPE (uint64_t) \ + TEST_FP_TYPE (float) \ + TEST_FP_TYPE (double) + +TEST_ALL + +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 12 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 6 } } */ + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z,} 12 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7],} 6 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_3_run.c new file mode 100644 index 00000000000..38736e11006 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_3_run.c @@ -0,0 +1,32 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_cond_arith_3.c" + +#define N 98 + +#undef TEST +#define TEST(TYPE, NAME, OP) \ + { \ + TYPE x[N], y[N], pred[N], z[2] = { 5, 7 }; \ + for (int i = 0; i < N; ++i) \ + { \ + y[i] = i * i; \ + pred[i] = i % 3; \ + } \ + test_##TYPE##_##NAME (x, y, z[0], z[1], pred, N); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i % 3 != 1 ? y[i] OP z[i & 1] : y[i]; \ + if (x[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +int +main (void) +{ + TEST_ALL + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_4.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_4.c new file mode 100644 index 00000000000..8517aa5a474 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_4.c @@ -0,0 +1,85 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define TEST(DATA_TYPE, OTHER_TYPE, NAME, OP) \ + void __attribute__ ((noinline, noclone)) \ + test_##DATA_TYPE##_##OTHER_TYPE##_##NAME (DATA_TYPE *__restrict x, \ + DATA_TYPE *__restrict y, \ + DATA_TYPE z1, DATA_TYPE z2, \ + DATA_TYPE *__restrict pred, \ + OTHER_TYPE *__restrict foo, \ + int n) \ + { \ + for (int i = 0; i < n; i += 2) \ + { \ + x[i] = (pred[i] != 1 ? y[i] OP z1 : y[i]); \ + x[i + 1] = (pred[i + 1] != 1 ? y[i + 1] OP z2 : y[i + 1]); \ + foo[i] += 1; \ + foo[i + 1] += 2; \ + } \ + } + +#define TEST_INT_TYPE(DATA_TYPE, OTHER_TYPE) \ + TEST (DATA_TYPE, OTHER_TYPE, div, /) + +#define TEST_FP_TYPE(DATA_TYPE, OTHER_TYPE) \ + TEST (DATA_TYPE, OTHER_TYPE, add, +) \ + TEST (DATA_TYPE, OTHER_TYPE, sub, -) \ + TEST (DATA_TYPE, OTHER_TYPE, mul, *) \ + TEST (DATA_TYPE, OTHER_TYPE, div, /) + +#define TEST_ALL \ + TEST_INT_TYPE (int32_t, int8_t) \ + TEST_INT_TYPE (int32_t, int16_t) \ + TEST_INT_TYPE (uint32_t, int8_t) \ + TEST_INT_TYPE (uint32_t, int16_t) \ + TEST_INT_TYPE (int64_t, int8_t) \ + TEST_INT_TYPE (int64_t, int16_t) \ + TEST_INT_TYPE (int64_t, int32_t) \ + TEST_INT_TYPE (uint64_t, int8_t) \ + TEST_INT_TYPE (uint64_t, int16_t) \ + TEST_INT_TYPE (uint64_t, int32_t) \ + TEST_FP_TYPE (float, int8_t) \ + TEST_FP_TYPE (float, int16_t) \ + TEST_FP_TYPE (double, int8_t) \ + TEST_FP_TYPE (double, int16_t) \ + TEST_FP_TYPE (double, int32_t) + +TEST_ALL + +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m,} 6 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m,} 14 } } */ + +/* The load XFAILs for fixed-length SVE account for extra loads from the + constant pool. */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z,} 12 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7],} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z,} 12 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7],} 12 } } */ + +/* 72 for x operations, 6 for foo operations. */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 78 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* 36 for x operations, 6 for foo operations. */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 42 } } */ + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z,} 168 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7],} 84 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_4_run.c new file mode 100644 index 00000000000..e9b5b938d1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cond_arith_4_run.c @@ -0,0 +1,35 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_cond_arith_4.c" + +#define N 98 + +#undef TEST +#define TEST(DATA_TYPE, OTHER_TYPE, NAME, OP) \ + { \ + DATA_TYPE x[N], y[N], pred[N], z[2] = { 5, 7 }; \ + OTHER_TYPE foo[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + y[i] = i * i; \ + pred[i] = i % 3; \ + foo[i] = i * 5; \ + } \ + test_##DATA_TYPE##_##OTHER_TYPE##_##NAME (x, y, z[0], z[1], \ + pred, foo, N); \ + for (int i = 0; i < N; ++i) \ + { \ + DATA_TYPE expected = i % 3 != 1 ? y[i] OP z[i & 1] : y[i]; \ + if (x[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +int +main (void) +{ + TEST_ALL + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C new file mode 100644 index 00000000000..4937e7f10e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_1.C @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef signed char v32qi __attribute__((vector_size(32))); + +v32qi +foo (v32qi x, v32qi y) +{ + return (v32qi) { -1, 0, 0, -1, -1, -1, 0, 0, + -1, -1, -1, -1, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y; +} + +/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ +/* { dg-final { scan-assembler {\t\.byte\t57\n\t\.byte\t15\n\t\.byte\t(255|-1)\n\t\.byte\t0\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C new file mode 100644 index 00000000000..3de4a8ccd00 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_2.C @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef short v16hi __attribute__((vector_size(32))); + +v16hi +foo (v16hi x, v16hi y) +{ + return (v16hi) { -1, 0, 0, -1, -1, -1, 0, 0, + -1, -1, -1, -1, 0, 0, 0, 0 } ? x : y; +} + +/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ +/* { dg-final { scan-assembler {\t\.byte\t65\n\t\.byte\t5\n\t\.byte\t85\n\t\.byte\t0\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C new file mode 100644 index 00000000000..8185f7baa76 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_3.C @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef int v8si __attribute__((vector_size(32))); + +v8si +foo (v8si x, v8si y) +{ + return (v8si) { -1, 0, 0, -1, -1, -1, 0, 0 } ? x : y; +} + +/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ +/* { dg-final { scan-assembler {\t\.byte\t1\n\t\.byte\t16\n\t\.byte\t17\n\t\.byte\t0\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C new file mode 100644 index 00000000000..b15da8a59e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_const_pred_4.C @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef long long v4di __attribute__((vector_size(32))); + +v4di +foo (v4di x, v4di y) +{ + return (v4di) { -1, 0, 0, -1 } ? x : y; +} + +/* { dg-final { scan-assembler {\tldr\tp[0-9]+,} } } */ +/* { dg-final { scan-assembler {\t\.byte\t1\n\t\.byte\t0\n\t\.byte\t0\n\t\.byte\t1\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1.c new file mode 100644 index 00000000000..86d3930e476 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vcvtf_16 (_Float16 *dst, int16_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (_Float16) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vcvtf_32 (float *dst, int32_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (float) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vcvtf_64 (double *dst, int64_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (double) src1[i]; +} + +/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1_run.c new file mode 100644 index 00000000000..9b431ad0ed4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_signed_1_run.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_cvtf_signed_1.c" + +#define ARRAY_SIZE 47 + +#define VAL1 (i ^ 3) +#define VAL2 ((i * 3) - (15 * 3)) +#define VAL3 ((i * 0xffdfffef) - (11 * 0xffdfffef)) + +int __attribute__ ((optimize (1))) +main (void) +{ + static _Float16 array_dest16[ARRAY_SIZE]; + static float array_dest32[ARRAY_SIZE]; + static double array_dest64[ARRAY_SIZE]; + + int16_t array_source16[ARRAY_SIZE]; + int32_t array_source32[ARRAY_SIZE]; + int64_t array_source64[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); + } + + vcvtf_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (_Float16) VAL1) + __builtin_abort (); + + vcvtf_32 (array_dest32, array_source32, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest32[i] != (float) VAL2) + __builtin_abort (); + + vcvtf_64 (array_dest64, array_source64, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest64[i] != (double) VAL3) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1.c new file mode 100644 index 00000000000..0605307d1e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vcvtf_16 (_Float16 *dst, uint16_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (_Float16) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vcvtf_32 (float *dst, uint32_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (float) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vcvtf_64 (double *dst, uint64_t *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (double) src1[i]; +} + +/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1_run.c new file mode 100644 index 00000000000..a4434cbf478 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_cvtf_unsigned_1_run.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_cvtf_unsigned_1.c" + +#define ARRAY_SIZE 65 + +#define VAL1 (i * 109) +#define VAL2 (i * 9456) +#define VAL3 (i * 0xfddff13f) + +int __attribute__ ((optimize (1))) +main (void) +{ + static _Float16 array_dest16[ARRAY_SIZE]; + static float array_dest32[ARRAY_SIZE]; + static double array_dest64[ARRAY_SIZE]; + + uint16_t array_source16[ARRAY_SIZE]; + uint32_t array_source32[ARRAY_SIZE]; + uint64_t array_source64[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); + } + + vcvtf_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (_Float16) VAL1) + __builtin_abort (); + + vcvtf_32 (array_dest32, array_source32, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest32[i] != (float) VAL2) + __builtin_abort (); + + vcvtf_64 (array_dest64, array_source64, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest64[i] != (double) VAL3) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1.c new file mode 100644 index 00000000000..9fed379607b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1.c @@ -0,0 +1,138 @@ +/* { dg-do compile } */ +/* -fno-tree-loop-distribute-patterns prevents conversion to memset. */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) + +#define DEF_SET_IMM(TYPE, IMM, SUFFIX) \ +void __attribute__ ((noinline, noclone)) \ +set_##TYPE##_##SUFFIX (TYPE *a) \ +{ \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + a[i] = IMM; \ +} + +/* --- VALID --- */ + +DEF_SET_IMM (int8_t, 0, imm_0) +DEF_SET_IMM (int16_t, 0, imm_0) +DEF_SET_IMM (int32_t, 0, imm_0) +DEF_SET_IMM (int64_t, 0, imm_0) + +DEF_SET_IMM (int8_t, -1, imm_m1) +DEF_SET_IMM (int16_t, -1, imm_m1) +DEF_SET_IMM (int32_t, -1, imm_m1) +DEF_SET_IMM (int64_t, -1, imm_m1) + +DEF_SET_IMM (int8_t, 1, imm_1) +DEF_SET_IMM (int16_t, 1, imm_1) +DEF_SET_IMM (int32_t, 1, imm_1) +DEF_SET_IMM (int64_t, 1, imm_1) + +DEF_SET_IMM (int8_t, 127, imm_127) +DEF_SET_IMM (int16_t, 127, imm_127) +DEF_SET_IMM (int32_t, 127, imm_127) +DEF_SET_IMM (int64_t, 127, imm_127) + +DEF_SET_IMM (int8_t, -128, imm_m128) +DEF_SET_IMM (int16_t, -128, imm_m128) +DEF_SET_IMM (int32_t, -128, imm_m128) +DEF_SET_IMM (int64_t, -128, imm_m128) + +// No uint8_t variant - size too large for a byte +DEF_SET_IMM (int16_t, 256, imm_256) +DEF_SET_IMM (int32_t, 256, imm_256) +DEF_SET_IMM (int64_t, 256, imm_256) + +// No uint8_t variant - size too large for a byte +DEF_SET_IMM (int16_t, 32512, imm_32512) +DEF_SET_IMM (int32_t, 32512, imm_32512) +DEF_SET_IMM (int64_t, 32512, imm_32512) + +// No uint8_t variant - size too large for a byte +DEF_SET_IMM (int16_t, -32768, imm_m32768) +DEF_SET_IMM (int32_t, -32768, imm_m32768) +DEF_SET_IMM (int64_t, -32768, imm_m32768) + +/* gcc will generate: + dup z0.b, 0x01 +*/ +DEF_SET_IMM (int16_t, 0x0101, imm_01_pat) +DEF_SET_IMM (int32_t, 0x01010101, imm_01_pat) +DEF_SET_IMM (int64_t, 0x0101010101010101LL, imm_01_pat) + +/* gcc will generate: + dup z0.h, 0x01 +*/ +DEF_SET_IMM (int32_t, 0x00010001, imm_0001_pat) +DEF_SET_IMM (int64_t, 0x0001000100010001LL, imm_0001_pat) + +/* gcc will generate: + dup z0.b, 0xFE (-2) +*/ +DEF_SET_IMM (int16_t, 0xFEFE, imm_FE_pat) +DEF_SET_IMM (int32_t, 0xFEFEFEFE, imm_FE_pat) +DEF_SET_IMM (int64_t, 0xFEFEFEFEFEFEFEFE, imm_FE_pat) + +/* gcc will generate: + dup z0.h, 0xFFFE (-2) +*/ +DEF_SET_IMM (int32_t, 0xFFFEFFFE, imm_FFFE_pat) +DEF_SET_IMM (int64_t, 0xFFFEFFFEFFFEFFFELL, imm_FFFE_pat) + +/* gcc will generate: + dup z0.h, 0xFE00 +*/ +DEF_SET_IMM (int32_t, 0xFE00FE00, imm_FE00_pat) +DEF_SET_IMM (int64_t, 0xFE00FE00FE00FE00LL, imm_FE00_pat) + + +/* --- INVALID --- */ + +// This shouldn't generate a dup as it's out of range, but also the compiler +// shouldn't assert! +DEF_SET_IMM (int32_t, 129, imm_m129) +DEF_SET_IMM (int32_t, 32513, imm_32513) +DEF_SET_IMM (int32_t, -32763, imm_m32763) + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #-1\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #0\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #1\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #1\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #1\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #1\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #127\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #127\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #127\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #127\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #-128\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #-128\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #-128\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #-128\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #256\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #256\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #256\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #32512\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #32512\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #32512\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #-32768\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.s, #-32768\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.d, #-32768\n} } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.b, #-2\n} } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #-2\n} 2 } } */ + +/* { dg-final { scan-assembler {\tmov\tz[0-9]+\.h, #-512\n} } } */ + +/* { dg-final { scan-assembler-not {#129\n} } } */ +/* { dg-final { scan-assembler-not {#32513\n} } } */ +/* { dg-final { scan-assembler-not {#-32763\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1_run.c new file mode 100644 index 00000000000..237f44947ab --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_imm_1_run.c @@ -0,0 +1,70 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ + +#include "sve_dup_imm_1.c" + +#define TEST_SET_IMM(TYPE, IMM, SUFFIX) \ + { \ + TYPE v[NUM_ELEMS (TYPE)]; \ + set_##TYPE##_##SUFFIX (v); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (v[i] != (TYPE) IMM) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main (int argc, char **argv) +{ + TEST_SET_IMM (int8_t, 0, imm_0) + TEST_SET_IMM (int16_t, 0, imm_0) + TEST_SET_IMM (int32_t, 0, imm_0) + TEST_SET_IMM (int64_t, 0, imm_0) + + TEST_SET_IMM (int8_t, -1, imm_m1) + TEST_SET_IMM (int16_t, -1, imm_m1) + TEST_SET_IMM (int32_t, -1, imm_m1) + TEST_SET_IMM (int64_t, -1, imm_m1) + + TEST_SET_IMM (int8_t, 1, imm_1) + TEST_SET_IMM (int16_t, 1, imm_1) + TEST_SET_IMM (int32_t, 1, imm_1) + TEST_SET_IMM (int64_t, 1, imm_1) + + TEST_SET_IMM (int8_t, 127, imm_127) + TEST_SET_IMM (int16_t, 127, imm_127) + TEST_SET_IMM (int32_t, 127, imm_127) + TEST_SET_IMM (int64_t, 127, imm_127) + + TEST_SET_IMM (int8_t, -128, imm_m128) + TEST_SET_IMM (int16_t, -128, imm_m128) + TEST_SET_IMM (int32_t, -128, imm_m128) + TEST_SET_IMM (int64_t, -128, imm_m128) + + TEST_SET_IMM (int16_t, 256, imm_256) + TEST_SET_IMM (int32_t, 256, imm_256) + TEST_SET_IMM (int64_t, 256, imm_256) + + TEST_SET_IMM (int16_t, 32512, imm_32512) + TEST_SET_IMM (int32_t, 32512, imm_32512) + TEST_SET_IMM (int64_t, 32512, imm_32512) + + TEST_SET_IMM (int16_t, -32768, imm_m32768) + TEST_SET_IMM (int32_t, -32768, imm_m32768) + TEST_SET_IMM (int64_t, -32768, imm_m32768) + + TEST_SET_IMM (int16_t, 0x0101, imm_01_pat) + TEST_SET_IMM (int32_t, 0x01010101, imm_01_pat) + TEST_SET_IMM (int64_t, 0x0101010101010101LL, imm_01_pat) + + TEST_SET_IMM (int32_t, 0x00010001, imm_0001_pat) + TEST_SET_IMM (int64_t, 0x0001000100010001LL, imm_0001_pat) + + TEST_SET_IMM (int16_t, 0xFEFE, imm_FE_pat) + TEST_SET_IMM (int32_t, 0xFEFEFEFE, imm_FE_pat) + TEST_SET_IMM (int64_t, 0xFEFEFEFEFEFEFEFE, imm_FE_pat) + + TEST_SET_IMM (int32_t, 0xFE00FE00, imm_FE00_pat) + TEST_SET_IMM (int64_t, 0xFE00FE00FE00FE00, imm_FE00_pat) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c new file mode 100644 index 00000000000..ea977207226 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_dup_lane_1.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define MASK_2(X) X, X +#define MASK_4(X) MASK_2 (X), MASK_2 (X) +#define MASK_8(X) MASK_4 (X), MASK_4 (X) +#define MASK_16(X) MASK_8 (X), MASK_8 (X) +#define MASK_32(X) MASK_16 (X), MASK_16 (X) + +#define INDEX_4 v4di +#define INDEX_8 v8si +#define INDEX_16 v16hi +#define INDEX_32 v32qi + +#define DUP_LANE(TYPE, NUNITS, INDEX) \ + TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (INDEX) })); \ + } + +#define TEST_ALL(T) \ + T (v4di, 4, 0) \ + T (v4di, 4, 2) \ + T (v4di, 4, 3) \ + T (v8si, 8, 0) \ + T (v8si, 8, 5) \ + T (v8si, 8, 7) \ + T (v16hi, 16, 0) \ + T (v16hi, 16, 6) \ + T (v16hi, 16, 15) \ + T (v32qi, 32, 0) \ + T (v32qi, 32, 19) \ + T (v32qi, 32, 31) \ + T (v4df, 4, 0) \ + T (v4df, 4, 2) \ + T (v4df, 4, 3) \ + T (v8sf, 8, 0) \ + T (v8sf, 8, 5) \ + T (v8sf, 8, 7) \ + T (v16hf, 16, 0) \ + T (v16hf, 16, 6) \ + T (v16hf, 16, 15) \ + +TEST_ALL (DUP_LANE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[0\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[3\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[0\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[5\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[7\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[0\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[6\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[15\]} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[0\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[19\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[31\]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c new file mode 100644 index 00000000000..1ec51aa2eaf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_1.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define MASK_2(X) X, X + 1 +#define MASK_4(X) MASK_2 (X), MASK_2 (X + 2) +#define MASK_8(X) MASK_4 (X), MASK_4 (X + 4) +#define MASK_16(X) MASK_8 (X), MASK_8 (X + 8) +#define MASK_32(X) MASK_16 (X), MASK_16 (X + 16) + +#define INDEX_4 v4di +#define INDEX_8 v8si +#define INDEX_16 v16hi +#define INDEX_32 v32qi + +#define DUP_LANE(TYPE, NUNITS, INDEX) \ + TYPE dup_##INDEX##_##TYPE (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (INDEX) })); \ + } + +#define TEST_ALL(T) \ + T (v4di, 4, 1) \ + T (v4di, 4, 2) \ + T (v4di, 4, 3) \ + T (v8si, 8, 1) \ + T (v8si, 8, 5) \ + T (v8si, 8, 7) \ + T (v16hi, 16, 1) \ + T (v16hi, 16, 6) \ + T (v16hi, 16, 15) \ + T (v32qi, 32, 1) \ + T (v32qi, 32, 19) \ + T (v32qi, 32, 31) \ + T (v4df, 4, 1) \ + T (v4df, 4, 2) \ + T (v4df, 4, 3) \ + T (v8sf, 8, 1) \ + T (v8sf, 8, 5) \ + T (v8sf, 8, 7) \ + T (v16hf, 16, 1) \ + T (v16hf, 16, 6) \ + T (v16hf, 16, 15) \ + +TEST_ALL (DUP_LANE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #4\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #8\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #12\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #19\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #20\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #24\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #28\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #30\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #31\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c new file mode 100644 index 00000000000..b93574e50f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ext_2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef int v8si __attribute__((vector_size (32))); + +void +foo (void) +{ + register v8si x asm ("z0"); + register v8si y asm ("z1"); + + asm volatile ("" : "=w" (y)); + x = __builtin_shuffle (y, y, (v8si) { 1, 2, 3, 4, 5, 6, 7, 8 }); + asm volatile ("" :: "w" (x)); +} + +/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */ +/* { dg-final { scan-assembler {\text\tz0\.b, z0\.b, z[01]\.b, #4\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c new file mode 100644 index 00000000000..1ba277ffa6d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_1.c @@ -0,0 +1,93 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v4di, 0) \ + T (int64_t, v4di, 1) \ + T (int64_t, v4di, 2) \ + T (int64_t, v4di, 3) \ + T (int32_t, v8si, 0) \ + T (int32_t, v8si, 1) \ + T (int32_t, v8si, 3) \ + T (int32_t, v8si, 4) \ + T (int32_t, v8si, 7) \ + T (int16_t, v16hi, 0) \ + T (int16_t, v16hi, 1) \ + T (int16_t, v16hi, 7) \ + T (int16_t, v16hi, 8) \ + T (int16_t, v16hi, 15) \ + T (int8_t, v32qi, 0) \ + T (int8_t, v32qi, 1) \ + T (int8_t, v32qi, 15) \ + T (int8_t, v32qi, 16) \ + T (int8_t, v32qi, 31) \ + T (double, v4df, 0) \ + T (double, v4df, 1) \ + T (double, v4df, 2) \ + T (double, v4df, 3) \ + T (float, v8sf, 0) \ + T (float, v8sf, 1) \ + T (float, v8sf, 3) \ + T (float, v8sf, 4) \ + T (float, v8sf, 7) \ + T (_Float16, v16hf, 0) \ + T (_Float16, v16hf, 1) \ + T (_Float16, v16hf, 7) \ + T (_Float16, v16hf, 8) \ + T (_Float16, v16hf, 15) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c new file mode 100644 index 00000000000..b163f28ef28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_2.c @@ -0,0 +1,93 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=512 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v8di __attribute__((vector_size (64))); +typedef int32_t v16si __attribute__((vector_size (64))); +typedef int16_t v32hi __attribute__((vector_size (64))); +typedef int8_t v64qi __attribute__((vector_size (64))); +typedef double v8df __attribute__((vector_size (64))); +typedef float v16sf __attribute__((vector_size (64))); +typedef _Float16 v32hf __attribute__((vector_size (64))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v8di, 0) \ + T (int64_t, v8di, 1) \ + T (int64_t, v8di, 2) \ + T (int64_t, v8di, 7) \ + T (int32_t, v16si, 0) \ + T (int32_t, v16si, 1) \ + T (int32_t, v16si, 3) \ + T (int32_t, v16si, 4) \ + T (int32_t, v16si, 15) \ + T (int16_t, v32hi, 0) \ + T (int16_t, v32hi, 1) \ + T (int16_t, v32hi, 7) \ + T (int16_t, v32hi, 8) \ + T (int16_t, v32hi, 31) \ + T (int8_t, v64qi, 0) \ + T (int8_t, v64qi, 1) \ + T (int8_t, v64qi, 15) \ + T (int8_t, v64qi, 16) \ + T (int8_t, v64qi, 63) \ + T (double, v8df, 0) \ + T (double, v8df, 1) \ + T (double, v8df, 2) \ + T (double, v8df, 7) \ + T (float, v16sf, 0) \ + T (float, v16sf, 1) \ + T (float, v16sf, 3) \ + T (float, v16sf, 4) \ + T (float, v16sf, 15) \ + T (_Float16, v32hf, 0) \ + T (_Float16, v32hf, 1) \ + T (_Float16, v32hf, 7) \ + T (_Float16, v32hf, 8) \ + T (_Float16, v32hf, 31) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c new file mode 100644 index 00000000000..87ac2351768 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_3.c @@ -0,0 +1,124 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=1024 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v16di __attribute__((vector_size (128))); +typedef int32_t v32si __attribute__((vector_size (128))); +typedef int16_t v64hi __attribute__((vector_size (128))); +typedef int8_t v128qi __attribute__((vector_size (128))); +typedef double v16df __attribute__((vector_size (128))); +typedef float v32sf __attribute__((vector_size (128))); +typedef _Float16 v64hf __attribute__((vector_size (128))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v16di, 0) \ + T (int64_t, v16di, 1) \ + T (int64_t, v16di, 2) \ + T (int64_t, v16di, 7) \ + T (int64_t, v16di, 8) \ + T (int64_t, v16di, 9) \ + T (int64_t, v16di, 15) \ + T (int32_t, v32si, 0) \ + T (int32_t, v32si, 1) \ + T (int32_t, v32si, 3) \ + T (int32_t, v32si, 4) \ + T (int32_t, v32si, 15) \ + T (int32_t, v32si, 16) \ + T (int32_t, v32si, 21) \ + T (int32_t, v32si, 31) \ + T (int16_t, v64hi, 0) \ + T (int16_t, v64hi, 1) \ + T (int16_t, v64hi, 7) \ + T (int16_t, v64hi, 8) \ + T (int16_t, v64hi, 31) \ + T (int16_t, v64hi, 32) \ + T (int16_t, v64hi, 47) \ + T (int16_t, v64hi, 63) \ + T (int8_t, v128qi, 0) \ + T (int8_t, v128qi, 1) \ + T (int8_t, v128qi, 15) \ + T (int8_t, v128qi, 16) \ + T (int8_t, v128qi, 63) \ + T (int8_t, v128qi, 64) \ + T (int8_t, v128qi, 100) \ + T (int8_t, v128qi, 127) \ + T (double, v16df, 0) \ + T (double, v16df, 1) \ + T (double, v16df, 2) \ + T (double, v16df, 7) \ + T (double, v16df, 8) \ + T (double, v16df, 9) \ + T (double, v16df, 15) \ + T (float, v32sf, 0) \ + T (float, v32sf, 1) \ + T (float, v32sf, 3) \ + T (float, v32sf, 4) \ + T (float, v32sf, 15) \ + T (float, v32sf, 16) \ + T (float, v32sf, 21) \ + T (float, v32sf, 31) \ + T (_Float16, v64hf, 0) \ + T (_Float16, v64hf, 1) \ + T (_Float16, v64hf, 7) \ + T (_Float16, v64hf, 8) \ + T (_Float16, v64hf, 31) \ + T (_Float16, v64hf, 32) \ + T (_Float16, v64hf, 47) \ + T (_Float16, v64hf, 63) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_extract_4.c b/gcc/testsuite/gcc.target/aarch64/sve_extract_4.c new file mode 100644 index 00000000000..e61a2fa94e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_extract_4.c @@ -0,0 +1,135 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=2048 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v32di __attribute__((vector_size (256))); +typedef int32_t v64si __attribute__((vector_size (256))); +typedef int16_t v128hi __attribute__((vector_size (256))); +typedef int8_t v256qi __attribute__((vector_size (256))); +typedef double v32df __attribute__((vector_size (256))); +typedef float v64sf __attribute__((vector_size (256))); +typedef _Float16 v128hf __attribute__((vector_size (256))); + +#define EXTRACT(ELT_TYPE, TYPE, INDEX) \ + ELT_TYPE permute_##TYPE##_##INDEX (void) \ + { \ + TYPE values; \ + asm ("" : "=w" (values)); \ + return values[INDEX]; \ + } + +#define TEST_ALL(T) \ + T (int64_t, v32di, 0) \ + T (int64_t, v32di, 1) \ + T (int64_t, v32di, 2) \ + T (int64_t, v32di, 7) \ + T (int64_t, v32di, 8) \ + T (int64_t, v32di, 9) \ + T (int64_t, v32di, 15) \ + T (int64_t, v32di, 31) \ + T (int32_t, v64si, 0) \ + T (int32_t, v64si, 1) \ + T (int32_t, v64si, 3) \ + T (int32_t, v64si, 4) \ + T (int32_t, v64si, 15) \ + T (int32_t, v64si, 16) \ + T (int32_t, v64si, 21) \ + T (int32_t, v64si, 31) \ + T (int32_t, v64si, 63) \ + T (int16_t, v128hi, 0) \ + T (int16_t, v128hi, 1) \ + T (int16_t, v128hi, 7) \ + T (int16_t, v128hi, 8) \ + T (int16_t, v128hi, 31) \ + T (int16_t, v128hi, 32) \ + T (int16_t, v128hi, 47) \ + T (int16_t, v128hi, 63) \ + T (int16_t, v128hi, 127) \ + T (int8_t, v256qi, 0) \ + T (int8_t, v256qi, 1) \ + T (int8_t, v256qi, 15) \ + T (int8_t, v256qi, 16) \ + T (int8_t, v256qi, 63) \ + T (int8_t, v256qi, 64) \ + T (int8_t, v256qi, 100) \ + T (int8_t, v256qi, 127) \ + T (int8_t, v256qi, 255) \ + T (double, v32df, 0) \ + T (double, v32df, 1) \ + T (double, v32df, 2) \ + T (double, v32df, 7) \ + T (double, v32df, 8) \ + T (double, v32df, 9) \ + T (double, v32df, 15) \ + T (double, v32df, 31) \ + T (float, v64sf, 0) \ + T (float, v64sf, 1) \ + T (float, v64sf, 3) \ + T (float, v64sf, 4) \ + T (float, v64sf, 15) \ + T (float, v64sf, 16) \ + T (float, v64sf, 21) \ + T (float, v64sf, 31) \ + T (float, v64sf, 63) \ + T (_Float16, v128hf, 0) \ + T (_Float16, v128hf, 1) \ + T (_Float16, v128hf, 7) \ + T (_Float16, v128hf, 8) \ + T (_Float16, v128hf, 31) \ + T (_Float16, v128hf, 32) \ + T (_Float16, v128hf, 47) \ + T (_Float16, v128hf, 63) \ + T (_Float16, v128hf, 127) + +TEST_ALL (EXTRACT) + +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ + +/* Also used to move the result of a non-Advanced SIMD extract. */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #120\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #124\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #127\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fabs_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fabs_1.c new file mode 100644 index 00000000000..33e1db5d1df --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fabs_1.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (_Float16, fabsf) +DO_OPS (float, fabsf) +DO_OPS (double, fabs) + +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1.c new file mode 100644 index 00000000000..7c5f6ddc996 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vfcvtz_16 (int16_t *dst, _Float16 *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (int16_t) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vfcvtz_32 (int32_t *dst, float *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (int32_t) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vfcvtz_64 (int64_t *dst, double *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (int64_t) src1[i]; +} + +/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1_run.c new file mode 100644 index 00000000000..48968f8ce19 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_signed_1_run.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +#include "sve_fcvtz_signed_1.c" + +#define ARRAY_SIZE 81 + +#define VAL1 ((i * 17) - 180) +#define VAL2 ((i * 237.86) - (29 * 237.86)) +#define VAL3 ((double) ((i * 0xf8dfef2f) - (11 * 0xf8dfef2f))) + +int __attribute__ ((optimize (1))) +main (void) +{ + static int16_t array_dest16[ARRAY_SIZE]; + static int32_t array_dest32[ARRAY_SIZE]; + static int64_t array_dest64[ARRAY_SIZE]; + + _Float16 array_source16[ARRAY_SIZE]; + float array_source32[ARRAY_SIZE]; + double array_source64[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); + } + + vfcvtz_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (int16_t) VAL1) + __builtin_abort (); + + vfcvtz_32 (array_dest32, array_source32, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest32[i] != (int32_t) VAL2) + __builtin_abort (); + + vfcvtz_64 (array_dest64, array_source64, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest64[i] != (int64_t) VAL3) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1.c new file mode 100644 index 00000000000..2691cf0bc17 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +vfcvtz_16 (uint16_t *dst, _Float16 *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (uint16_t) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vfcvtz_32 (uint32_t *dst, float *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (uint32_t) src1[i]; +} + +void __attribute__ ((noinline, noclone)) +vfcvtz_64 (uint64_t *dst, double *src1, int size) +{ + for (int i = 0; i < size; i++) + dst[i] = (uint64_t) src1[i]; +} + +/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1_run.c new file mode 100644 index 00000000000..9c1be7c8a6f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fcvtz_unsigned_1_run.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_fcvtz_unsigned_1.c" + +#define ARRAY_SIZE 75 + +#define VAL1 (i * 19) +#define VAL2 (i * 2574.33) +#define VAL3 ((double) (i * 0xff23efef)) + +int __attribute__ ((optimize (1))) +main (void) +{ + static uint16_t array_dest16[ARRAY_SIZE]; + static uint32_t array_dest32[ARRAY_SIZE]; + static uint64_t array_dest64[ARRAY_SIZE]; + + _Float16 array_source16[ARRAY_SIZE]; + float array_source32[ARRAY_SIZE]; + double array_source64[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source16[i] = VAL1; + array_source32[i] = VAL2; + array_source64[i] = VAL3; + asm volatile ("" ::: "memory"); + } + + vfcvtz_16 (array_dest16, array_source16, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest16[i] != (uint16_t) VAL1) + __builtin_abort (); + + vfcvtz_32 (array_dest32, array_source32, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest32[i] != (uint32_t) VAL2) + __builtin_abort (); + + vfcvtz_64 (array_dest64, array_source64, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest64[i] != (uint64_t) VAL3) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c new file mode 100644 index 00000000000..b193726ea0a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fdiv_1.c @@ -0,0 +1,43 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vdiv_##TYPE (TYPE *x, TYPE y) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src asm("z2"); \ + dst = *x; \ + src = y; \ + asm volatile ("" :: "w" (dst), "w" (src)); \ + dst = dst / src; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} \ +void vdivr_##TYPE (TYPE *x, TYPE y) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src asm("z2"); \ + dst = *x; \ + src = y; \ + asm volatile ("" :: "w" (dst), "w" (src)); \ + dst = src / dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdup_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1.c new file mode 100644 index 00000000000..148e0f9bd89 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1.c @@ -0,0 +1,62 @@ +/* { dg-do assemble } */ +/* -fno-tree-loop-distribute-patterns prevents conversion to memset. */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns --save-temps" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) + +#define DEF_SET_IMM(TYPE, IMM, SUFFIX) \ +void __attribute__ ((noinline, noclone)) \ +set_##TYPE##_##SUFFIX (TYPE *a) \ +{ \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + a[i] = IMM; \ +} + +#define DEF_SET_IMM_FP(IMM, SUFFIX) \ + DEF_SET_IMM (float, IMM, SUFFIX) \ + DEF_SET_IMM (double, IMM, SUFFIX) + +/* Valid. */ +DEF_SET_IMM_FP (1, imm1) +DEF_SET_IMM_FP (0x1.1p0, imm1p0) +DEF_SET_IMM_FP (0x1.fp0, immfp0) +DEF_SET_IMM_FP (0x1.1p4, imm1p4) +DEF_SET_IMM_FP (0x1.1p-3, imm1pm3) +DEF_SET_IMM_FP (0x1.fp4, immfp4) +DEF_SET_IMM_FP (0x1.fp-3, immfpm3) + +/* Should use MOV instead. */ +DEF_SET_IMM_FP (0, imm0) + +/* Invalid. */ +DEF_SET_IMM_FP (0x1.1fp0, imm1fp0) +DEF_SET_IMM_FP (0x1.1p5, imm1p5) +DEF_SET_IMM_FP (0x1.1p-4, imm1pm4) +DEF_SET_IMM_FP (0x1.1fp5, imm1fp5) +DEF_SET_IMM_FP (0x1.1fp-4, imm1fpm4) + +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s,} 7 } } */ + +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.0625e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.9375e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.7e\+1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #1.328125e-1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #3.1e\+1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2.421875e-1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d,} 7 } } */ + +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.0625e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.9375e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.7e\+1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #1.328125e-1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3.1e\+1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2.421875e-1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #0\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fdup_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1_run.c new file mode 100644 index 00000000000..f4cb1a0bf71 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fdup_1_run.c @@ -0,0 +1,38 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ + +#include "sve_fdup_1.c" + +#define TEST_SET_IMM(TYPE,IMM,SUFFIX) \ + { \ + TYPE v[NUM_ELEMS (TYPE)]; \ + set_##TYPE##_##SUFFIX (v); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ ) \ + if (v[i] != IMM) \ + __builtin_abort (); \ + } + +#define TEST_SET_IMM_FP(IMM, SUFFIX) \ + TEST_SET_IMM (float, IMM, SUFFIX) \ + TEST_SET_IMM (double, IMM, SUFFIX) + +int __attribute__ ((optimize (1))) +main (int argc, char **argv) +{ + TEST_SET_IMM_FP (1, imm1) + TEST_SET_IMM_FP (0x1.1p0, imm1p0) + TEST_SET_IMM_FP (0x1.fp0, immfp0) + TEST_SET_IMM_FP (0x1.1p4, imm1p4) + TEST_SET_IMM_FP (0x1.1p-3, imm1pm3) + TEST_SET_IMM_FP (0x1.fp4, immfp4) + TEST_SET_IMM_FP (0x1.fp-3, immfpm3) + + TEST_SET_IMM_FP (0, imm0) + TEST_SET_IMM_FP (0x1.1fp0, imm1fp0) + TEST_SET_IMM_FP (0x1.1p5, imm1p5) + TEST_SET_IMM_FP (0x1.1p-4, imm1pm4) + TEST_SET_IMM_FP (0x1.1fp5, imm1fp5) + TEST_SET_IMM_FP (0x1.1fp-4, imm1fpm4) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c new file mode 100644 index 00000000000..2b1dbb087bc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmad_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (dst * src1) + src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c new file mode 100644 index 00000000000..d5e4df266bf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmla_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (src1 * src2) + dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c new file mode 100644 index 00000000000..c3f2c8a5823 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmls_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-src1 * src2) + dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c new file mode 100644 index 00000000000..30e1895c8d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmsb_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-dst * src1) + src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fmul_1.c new file mode 100644 index 00000000000..3b648297963 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fmul_1.c @@ -0,0 +1,46 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#define DO_REGREG_OPS(TYPE, OP, NAME) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP src[i]; \ +} + +#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP (TYPE) VALUE; \ +} + +#define DO_ARITH_OPS(TYPE, OP, NAME) \ + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## 0point5); \ + DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2); \ + DO_IMMEDIATE_OPS (5, TYPE, OP, NAME ## 5); \ + DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minus0point5); \ + DO_IMMEDIATE_OPS (-2, TYPE, OP, NAME ## minus2); + +DO_ARITH_OPS (_Float16, *, mul) +DO_ARITH_OPS (float, *, mul) +DO_ARITH_OPS (double, *, mul) + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #5} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #5} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #5} } } */ +/* { dg-final { scan-assembler-not {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fneg_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fneg_1.c new file mode 100644 index 00000000000..7af81662fb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fneg_1.c @@ -0,0 +1,17 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE) \ +void vneg_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = -src[i]; \ +} + +DO_OPS (_Float16) +DO_OPS (float) +DO_OPS (double) + +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c new file mode 100644 index 00000000000..84a95187314 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmad_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-dst * src1) - src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfnmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c new file mode 100644 index 00000000000..dcc4811f1d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmla_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (-src1 * src2) - dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfnmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c new file mode 100644 index 00000000000..7a89399f4be --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmls_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (src1 * src2) - dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfnmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c new file mode 100644 index 00000000000..6c95b0abc8e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fnmsb_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options " -O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +typedef _Float16 v16hf __attribute__((vector_size(32))); +typedef float v8sf __attribute__((vector_size(32))); +typedef double v4df __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmad##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (dst * src1) - src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v16hf) +DO_OP (v8sf) +DO_OP (v4df) + +/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fp_arith_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fp_arith_1.c new file mode 100644 index 00000000000..06fea806038 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fp_arith_1.c @@ -0,0 +1,71 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#define DO_REGREG_OPS(TYPE, OP, NAME) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP src[i]; \ +} + +#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP (TYPE) VALUE; \ +} + +#define DO_ARITH_OPS(TYPE, OP, NAME) \ + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (1, TYPE, OP, NAME ## 1); \ + DO_IMMEDIATE_OPS (0.5, TYPE, OP, NAME ## pointfive); \ + DO_IMMEDIATE_OPS (2, TYPE, OP, NAME ## 2); \ + DO_IMMEDIATE_OPS (2.5, TYPE, OP, NAME ## twopoint5); \ + DO_IMMEDIATE_OPS (-0.5, TYPE, OP, NAME ## minuspointfive); \ + DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); + +DO_ARITH_OPS (_Float16, +, add) +DO_ARITH_OPS (float, +, add) +DO_ARITH_OPS (double, +, add) + +DO_ARITH_OPS (_Float16, -, minus) +DO_ARITH_OPS (float, -, minus) +DO_ARITH_OPS (double, -, minus) + +/* No specific count because it's valid to use fadd or fsub for the + out-of-range constants. */ +/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */ + +/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #-} } } */ + +/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */ + +/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #-} } } */ + +/* { dg-final { scan-assembler {\tfadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */ +/* { dg-final { scan-assembler-not {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */ + +/* { dg-final { scan-assembler {\tfsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #-} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frinta_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frinta_1.c new file mode 100644 index 00000000000..bad2be4ed33 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_frinta_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (float, roundf) +DO_OPS (double, round) + +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frinti_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frinti_1.c new file mode 100644 index 00000000000..4407fb56caa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_frinti_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (float, nearbyintf) +DO_OPS (double, nearbyint) + +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintm_1.c new file mode 100644 index 00000000000..01bf65db343 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintm_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (float, floorf) +DO_OPS (double, floor) + +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintp_1.c new file mode 100644 index 00000000000..f8b2c08ac63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintp_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (float, ceilf) +DO_OPS (double, ceil) + +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintx_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintx_1.c new file mode 100644 index 00000000000..a062295011a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintx_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (float, rintf) +DO_OPS (double, rint) + +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_frintz_1.c b/gcc/testsuite/gcc.target/aarch64/sve_frintz_1.c new file mode 100644 index 00000000000..207814f5506 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_frintz_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (float, truncf) +DO_OPS (double, trunc) + +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fsqrt_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fsqrt_1.c new file mode 100644 index 00000000000..55081c3bf4f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fsqrt_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -fno-math-errno --save-temps" } */ + +#define DO_OPS(TYPE, OP) \ +void \ +vsqrt_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = __builtin_##OP (src[i]); \ +} + +DO_OPS (float, sqrtf) +DO_OPS (double, sqrt) + +/* { dg-final { scan-assembler-times {\tfsqrt\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsqrt\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_fsubr_1.c new file mode 100644 index 00000000000..b252ef059ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_fsubr_1.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ +void vsubrarithimm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = (TYPE) VALUE - dst[i]; \ +} + +#define DO_ARITH_OPS(TYPE) \ + DO_IMMEDIATE_OPS (0, TYPE, 0); \ + DO_IMMEDIATE_OPS (1, TYPE, 1); \ + DO_IMMEDIATE_OPS (0.5, TYPE, 0point5); \ + DO_IMMEDIATE_OPS (2, TYPE, 2); \ + DO_IMMEDIATE_OPS (3.5, TYPE, 3point5); + +DO_ARITH_OPS (_Float16) +DO_ARITH_OPS (float) +DO_ARITH_OPS (double) + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #3} } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #3} } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2} } } */ +/* { dg-final { scan-assembler-not {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #3} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c new file mode 100644 index 00000000000..096a969d756 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_1.c @@ -0,0 +1,72 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, unsigned long * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, unsigned int * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, unsigned short * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, unsigned char * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load64s(signed long * restrict dst, signed long * restrict src, unsigned long * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load32s(signed int * restrict dst, signed int * restrict src, unsigned int * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load16s(signed short * restrict dst, signed short * restrict src, unsigned short * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load8s(signed char * restrict dst, signed char * restrict src, unsigned char * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load_double(double * restrict dst, double * restrict src, unsigned long * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load_float(float * restrict dst, float * restrict src, unsigned int * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */ +/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 3 } } */ +/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */ +/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c new file mode 100644 index 00000000000..b31b4508114 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_10.c @@ -0,0 +1,72 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +void gather_load64(unsigned long * restrict dst, unsigned long * restrict src, signed long * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load32(unsigned int * restrict dst, unsigned int * restrict src, signed int * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load16(unsigned short * restrict dst, unsigned short * restrict src, signed short * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load8(unsigned char * restrict dst, unsigned char * restrict src, signed char * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load64s(signed long * restrict dst, signed long * restrict src, signed long * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load32s(signed int * restrict dst, signed int * restrict src, signed int * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load16s(signed short * restrict dst, signed short * restrict src, signed short * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load8s(signed char * restrict dst, signed char * restrict src, signed char * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load_double(double * restrict dst, double * restrict src, signed long * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +void gather_load_float(float * restrict dst, float * restrict src, signed int * restrict indices, int count) +{ + for (int i=0; i<count; i++) + dst[i] = src[indices[i]]; +} + +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */ +/* { dg-final { scan-assembler-not "ld1d\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 3\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 2\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 3 } } */ +/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw 1\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1h\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 1\\\]" } } */ +/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, sxtw\\\ ]" } } */ +/* { dg-final { scan-assembler-not "ld1b\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\ ]" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c new file mode 100644 index 00000000000..d8a85396eb4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_11.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +void +f (double *restrict a, double *restrict b, short *c, int *d, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[c[i] + d[i]]; +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d,} 4 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+.d,} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c new file mode 100644 index 00000000000..9b62b12904e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_2.c @@ -0,0 +1,72 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +void gather_loadu64_s16(unsigned long * restrict dst, unsigned long * restrict src, + short int * restrict indices, short n) +{ + for (short i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +void gather_loadu64_u16(unsigned long * restrict dst, unsigned long * restrict src, + unsigned short int * restrict indices, short n) +{ + for (short i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +void gather_loadd_s16(double * restrict dst, double * restrict src, + short * restrict indices, short n) +{ + for (short i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +void gather_loadd_u16(double * restrict dst, double * restrict src, + unsigned short * restrict indices, short n) +{ + for (short i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +void gather_loadu64_s32(unsigned long * restrict dst, unsigned long * restrict src, + int * restrict indices, int n) +{ + for (int i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +void gather_loadu64_u32(unsigned long * restrict dst, unsigned long * restrict src, + unsigned int * restrict indices, int n) +{ + for (int i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +void gather_loadd_s32(double * restrict dst, double * restrict src, + int * restrict indices, int n) +{ + for (int i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +void gather_loadd_u32(double * restrict dst, double * restrict src, + unsigned int * restrict indices, int n) +{ + for (int i=0; i<n; i++) + dst[i] = src[indices[i]]; +} + +/* At present we only use unpacks for the 32/64 combinations. */ +/* { dg-final { scan-assembler-times {\tpunpklo\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\tpunpkhi\tp[0-9]+\.h, p[0-9]+\.b} 4 } } */ + +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]} 24 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c new file mode 100644 index 00000000000..0a8f802ce56 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3.c @@ -0,0 +1,45 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ + +#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ + DATA_TYPE __attribute__ ((noinline)) \ + NAME (char *data, INDEX_TYPE *indices, int n) \ + { \ + DATA_TYPE sum = 0; \ + for (int i = 0; i < n; ++i) \ + sum += *(DATA_TYPE *) (data + indices[i]); \ + return sum; \ + } + +#define TEST32(NAME, DATA_TYPE) \ + TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \ + TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \ + TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \ + TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \ + TEST_LOOP (NAME ## _s16, DATA_TYPE, signed short) \ + TEST_LOOP (NAME ## _s32, DATA_TYPE, signed int) + +#define TEST64(NAME, DATA_TYPE) \ + TEST_LOOP (NAME ## _s8, DATA_TYPE, signed char) \ + TEST_LOOP (NAME ## _u8, DATA_TYPE, unsigned char) \ + TEST_LOOP (NAME ## _s16, DATA_TYPE, short) \ + TEST_LOOP (NAME ## _u16, DATA_TYPE, unsigned short) \ + TEST_LOOP (NAME ## _s32, DATA_TYPE, int) \ + TEST_LOOP (NAME ## _u32, DATA_TYPE, unsigned int) \ + TEST_LOOP (NAME ## _s64, DATA_TYPE, long) \ + TEST_LOOP (NAME ## _u64, DATA_TYPE, unsigned long) + +TEST32 (f_s32, int) +TEST32 (f_u32, unsigned int) +TEST32 (f_f32, float) + +TEST64 (f_s64, long) +TEST64 (f_u64, unsigned long) +TEST64 (f_f64, double) + +/* (4 + 2 + 1) * 3 */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]} 21 } } */ +/* (4 + 2 + 1) * 3 */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 21 } } */ +/* (8 + 8 + 4 + 4 + 2 + 2 + 1 + 1) * 3 */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 90 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c new file mode 100644 index 00000000000..baa90d5d5fc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_3_run.c @@ -0,0 +1,41 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_gather_load_3.c" + +extern void abort (void); + +#define N 57 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ + { \ + INDEX_TYPE indices[N]; \ + DATA_TYPE data[N * 2]; \ + for (int i = 0; i < N * 2; ++i) \ + data[i] = (i / 2) * 4 + i % 2; \ + DATA_TYPE sum = 0; \ + for (int i = 0; i < N; ++i) \ + { \ + INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \ + j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \ + sum += data[j / sizeof (DATA_TYPE)]; \ + indices[i] = j; \ + } \ + DATA_TYPE res = NAME ((char *) data, indices, N); \ + if (res != sum) \ + abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST32 (f_s32, int) + TEST32 (f_u32, unsigned int) + TEST32 (f_f32, float) + + TEST64 (f_s64, long) + TEST64 (f_u64, unsigned long) + TEST64 (f_f64, double) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c new file mode 100644 index 00000000000..4d0da987d30 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ + +#define TEST_LOOP(NAME, TYPE) \ + TYPE __attribute__ ((noinline)) \ + NAME (TYPE **indices, int n) \ + { \ + TYPE sum = 0; \ + for (int i = 0; i < n; ++i) \ + sum += *indices[i]; \ + return sum; \ + } + +TEST_LOOP (f_s64, long) +TEST_LOOP (f_u64, unsigned long) +TEST_LOOP (f_f64, double) + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c new file mode 100644 index 00000000000..00d3dea6acd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_4_run.c @@ -0,0 +1,35 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_gather_load_4.c" + +extern void abort (void); + +#define N 57 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE *ptrs[N]; \ + TYPE data[N * 2]; \ + for (int i = 0; i < N * 2; ++i) \ + data[i] = (i / 2) * 4 + i % 2; \ + TYPE sum = 0; \ + for (int i = 0; i < N; ++i) \ + { \ + ptrs[i] = &data[i * 3 / 2]; \ + sum += *ptrs[i]; \ + } \ + TYPE res = NAME (ptrs, N); \ + if (res != sum) \ + abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_LOOP (f_s64, long) + TEST_LOOP (f_u64, unsigned long) + TEST_LOOP (f_f64, double) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c new file mode 100644 index 00000000000..0aaf9553a11 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5.c @@ -0,0 +1,113 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE)\ +void gather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + dst[i] = src[i * STRIDE];\ +} + +#define GATHER_LOAD2(OBJTYPE,STRIDETYPE)\ +void gather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + dst[i] = src[i * stride];\ +} + +#define GATHER_LOAD3(OBJTYPE,STRIDETYPE)\ +void gather_load3s5##OBJTYPE##STRIDETYPE\ + (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\ + OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\ + STRIDETYPE count)\ +{\ + const STRIDETYPE STRIDE = 5;\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + d1[i] = src[0 + (i * STRIDE)];\ + d2[i] = src[1 + (i * STRIDE)];\ + d3[i] = src[2 + (i * STRIDE)];\ + d4[i] = src[3 + (i * STRIDE)];\ + d5[i] = src[4 + (i * STRIDE)];\ + }\ +} + +#define GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE)\ +void gather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + *dst = *src;\ + dst += 1;\ + src += STRIDE;\ + }\ +} + +#define GATHER_LOAD5(OBJTYPE,STRIDETYPE)\ +void gather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + *dst = *src;\ + dst += 1;\ + src += stride;\ + }\ +} + +GATHER_LOAD1 (double, long, 5) +GATHER_LOAD1 (double, long, 8) +GATHER_LOAD1 (double, long, 21) +GATHER_LOAD1 (double, long, 1009) + +GATHER_LOAD1 (float, int, 5) +GATHER_LOAD1 (float, int, 8) +GATHER_LOAD1 (float, int, 21) +GATHER_LOAD1 (float, int, 1009) + +GATHER_LOAD2 (double, long) +GATHER_LOAD2 (float, int) + +GATHER_LOAD3 (double, long) +GATHER_LOAD3 (float, int) + +GATHER_LOAD4 (double, long, 5) + +/* NOTE: We can't vectorize GATHER_LOAD4 (float, int, 5) because we can't prove + that the offsets used for the gather load won't overflow. */ + +GATHER_LOAD5 (double, long) +GATHER_LOAD5 (float, int) + +/* Widened forms. */ +GATHER_LOAD1 (double, int, 5) +GATHER_LOAD1 (double, int, 8) +GATHER_LOAD1 (double, short, 5) +GATHER_LOAD1 (double, short, 8) + +GATHER_LOAD1 (float, short, 5) +GATHER_LOAD1 (float, short, 8) + +GATHER_LOAD2 (double, int) +GATHER_LOAD2 (float, short) + +GATHER_LOAD4 (double, int, 5) +GATHER_LOAD4 (float, short, 5) + +GATHER_LOAD5 (double, int) + +/* TODO: We generate abysmal code for this even though we don't use gathers. */ +/*GATHER_LOAD5 (float, short)*/ + +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c new file mode 100644 index 00000000000..7608f9b569b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_5_run.c @@ -0,0 +1,161 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <unistd.h> + +extern void abort (void); +extern void *memset(void *, int, size_t); + +#include "sve_gather_load_5.c" + +#define NUM_DST_ELEMS 13 +#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE) + +#define TEST_GATHER_LOAD_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ + memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + src[i * STRIDE] = i;\ + FUN##OBJTYPE##STRIDETYPE##STRIDE \ + (dst, src, NUM_DST_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + if (dst[i] != i)\ + abort ();\ +} + +#define TEST_GATHER_LOAD_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ + memset (real_dst, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + src[i * STRIDE] = i;\ + FUN##OBJTYPE##STRIDETYPE \ + (dst, src, STRIDE, NUM_DST_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + if (dst[i] != i)\ + abort ();\ +} + +#define TEST_GATHER_LOAD1(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_GATHER_LOAD_COMMON1 (gather_load1, OBJTYPE, STRIDETYPE, STRIDE) + +#define TEST_GATHER_LOAD2(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_GATHER_LOAD_COMMON2 (gather_load2, OBJTYPE, STRIDETYPE, STRIDE) + +#define TEST_GATHER_LOAD3(OBJTYPE,STRIDETYPE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\ + memset (real_dst1, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ + memset (real_dst2, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ + memset (real_dst3, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ + memset (real_dst4, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ + memset (real_dst5, 0, (1 + NUM_DST_ELEMS) * sizeof (OBJTYPE));\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst1 = &real_dst1[1];\ + OBJTYPE *dst2 = &real_dst2[1];\ + OBJTYPE *dst3 = &real_dst3[1];\ + OBJTYPE *dst4 = &real_dst4[1];\ + OBJTYPE *dst5 = &real_dst5[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\ + src[i] = i;\ + gather_load3s5##OBJTYPE##STRIDETYPE \ + (dst1, dst2, dst3, dst4, dst5, src, NUM_DST_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + {\ + STRIDETYPE base = i * 5;\ + if (dst1[i] != base)\ + abort ();\ + if (dst2[i] != (base + 1))\ + abort ();\ + if (dst3[i] != (base + 2))\ + abort ();\ + if (dst4[i] != (base + 3))\ + abort ();\ + if (dst5[i] != (base + 4))\ + abort ();\ + }\ +} + +#define TEST_GATHER_LOAD4(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_GATHER_LOAD_COMMON1 (gather_load4, OBJTYPE, STRIDETYPE, STRIDE) + +#define TEST_GATHER_LOAD5(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_GATHER_LOAD_COMMON2 (gather_load5, OBJTYPE, STRIDETYPE, STRIDE) + +int __attribute__ ((optimize (1))) +main () +{ + TEST_GATHER_LOAD1 (double, long, 5); + TEST_GATHER_LOAD1 (double, long, 8); + TEST_GATHER_LOAD1 (double, long, 21); + + TEST_GATHER_LOAD1 (float, int, 5); + TEST_GATHER_LOAD1 (float, int, 8); + TEST_GATHER_LOAD1 (float, int, 21); + + TEST_GATHER_LOAD2 (double, long, 5); + TEST_GATHER_LOAD2 (double, long, 8); + TEST_GATHER_LOAD2 (double, long, 21); + + TEST_GATHER_LOAD2 (float, int, 5); + TEST_GATHER_LOAD2 (float, int, 8); + TEST_GATHER_LOAD2 (float, int, 21); + + TEST_GATHER_LOAD3 (double, long); + TEST_GATHER_LOAD3 (float, int); + + TEST_GATHER_LOAD4 (double, long, 5); + + TEST_GATHER_LOAD5 (double, long, 5); + TEST_GATHER_LOAD5 (float, int, 5); + + /* Widened forms. */ + TEST_GATHER_LOAD1 (double, int, 5) + TEST_GATHER_LOAD1 (double, int, 8) + TEST_GATHER_LOAD1 (double, short, 5) + TEST_GATHER_LOAD1 (double, short, 8) + + TEST_GATHER_LOAD1 (float, short, 5) + TEST_GATHER_LOAD1 (float, short, 8) + + TEST_GATHER_LOAD2 (double, int, 5); + TEST_GATHER_LOAD2 (double, int, 8); + TEST_GATHER_LOAD2 (double, int, 21); + + TEST_GATHER_LOAD2 (float, short, 5); + TEST_GATHER_LOAD2 (float, short, 8); + TEST_GATHER_LOAD2 (float, short, 21); + + TEST_GATHER_LOAD4 (double, int, 5); + TEST_GATHER_LOAD4 (float, short, 5); + + TEST_GATHER_LOAD5 (double, int, 5); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c new file mode 100644 index 00000000000..68b0b4d59b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_6.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +void +foo (double *__restrict y, double *__restrict x1, + double *__restrict x2, int m) +{ + for (int i = 0; i < 256; ++i) + y[i * m] = x1[i * m] + x2[i * m]; +} + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, x[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c new file mode 100644 index 00000000000..788aeb08df2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_7.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +void +foo (double *x, int m) +{ + for (int i = 0; i < 256; ++i) + x[i * m] += x[i * m]; +} + +/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c new file mode 100644 index 00000000000..0c0cf73be55 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_8.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +void +f (int *__restrict a, + int *__restrict b, + int *__restrict c, + int count) +{ + for (int i = 0; i < count; ++i) + a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 2] + + c[i * 5] + c[i * 5 + 3]); +} + +/* There must be a final scalar iteration because b[(count - 1) * 4 + 3] + is not accessed by the original code. */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */ +/* { dg-final { scan-assembler {\tldr\t} } } */ +/* { dg-final { scan-assembler {\tstr\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c new file mode 100644 index 00000000000..dad798c8106 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_gather_load_9.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +void +f (int *__restrict a, + int *__restrict b, + int *__restrict c, + int count) +{ + for (int i = 0; i < count; ++i) + a[i] = (b[i * 4] + b[i * 4 + 1] + b[i * 4 + 3] + + c[i * 5] + c[i * 5 + 3]); +} + +/* There's no need for a scalar tail here. */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.*}} 1 } } */ +/* { dg-final { scan-assembler-not {\tldr\t} } } */ +/* { dg-final { scan-assembler-not {\tstr\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve_index_1.c new file mode 100644 index 00000000000..09e65cf0fc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_index_1.c @@ -0,0 +1,94 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) + +#define DEF_LOOP(TYPE, BASE, STEP, SUFFIX) \ +void __attribute__ ((noinline, noclone)) \ +loop_##TYPE##_##SUFFIX (TYPE *a) \ +{ \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + a[i] = (BASE) + i * (STEP); \ +} + +#define TEST_ALL_UNSIGNED_TYPES(T, BASE, STEP, SUFFIX) \ + T (uint8_t, BASE, STEP, SUFFIX) \ + T (uint16_t, BASE, STEP, SUFFIX) \ + T (uint32_t, BASE, STEP, SUFFIX) \ + T (uint64_t, BASE, STEP, SUFFIX) + +#define TEST_ALL_SIGNED_TYPES(T, BASE, STEP, SUFFIX) \ + T (int8_t, BASE, STEP, SUFFIX) \ + T (int16_t, BASE, STEP, SUFFIX) \ + T (int32_t, BASE, STEP, SUFFIX) \ + T (int64_t, BASE, STEP, SUFFIX) + +/* Immediate loops. */ +#define TEST_IMMEDIATE(T) \ + TEST_ALL_UNSIGNED_TYPES (T, 0, 1, b0s1) \ + TEST_ALL_SIGNED_TYPES (T, 0, 1, b0s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 0, 15, b0s15) \ + TEST_ALL_SIGNED_TYPES (T, 0, 15, b0s15) \ + TEST_ALL_SIGNED_TYPES (T, 0, -1, b0sm1) \ + TEST_ALL_SIGNED_TYPES (T, 0, -16, b0sm16) \ + TEST_ALL_SIGNED_TYPES (T, -16, 1, bm16s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 15, 1, b15s1) \ + TEST_ALL_SIGNED_TYPES (T, 15, 1, b15s1) + +/* Non-immediate loops. */ +#define TEST_NONIMMEDIATE(T) \ + TEST_ALL_UNSIGNED_TYPES (T, 0, 16, b0s16) \ + TEST_ALL_SIGNED_TYPES (T, 0, 16, b0s16) \ + TEST_ALL_SIGNED_TYPES (T, 0, -17, b0sm17) \ + TEST_ALL_SIGNED_TYPES (T, -17, 1, bm17s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 16, 1, b16s1) \ + TEST_ALL_SIGNED_TYPES (T, 16, 1, b16s1) \ + TEST_ALL_UNSIGNED_TYPES (T, 16, 16, b16s16) \ + TEST_ALL_SIGNED_TYPES (T, 16, 16, b16s16) \ + TEST_ALL_SIGNED_TYPES (T, -17, -17, bm17sm17) + +#define TEST_ALL(T) TEST_IMMEDIATE (T) TEST_NONIMMEDIATE (T) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #-1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #-16, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #15, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, #0, w[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #1\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, w[0-9]+\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #-1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #-16, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #15, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, #0, w[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #1\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, w[0-9]+\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #-1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #-16, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #15, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, #0, w[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #1\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #-1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #-16, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #15, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, #0, x[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.c new file mode 100644 index 00000000000..7492ed3f756 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_index_1_run.c @@ -0,0 +1,20 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include "sve_index_1.c" + +#define TEST_LOOP(TYPE, BASE, STEP, SUFFIX) \ + { \ + TYPE array[NUM_ELEMS (TYPE)] = {}; \ + loop_##TYPE##_##SUFFIX (array); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (array[i] != (TYPE) (BASE + i * STEP)) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c new file mode 100644 index 00000000000..949449cde9f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" } */ + +#define SIZE 15*8+3 + +#define INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE) \ +void set_##SIGNED##_##TYPE##_##ITERTYPE (SIGNED TYPE *__restrict out, \ + SIGNED TYPE *__restrict in) \ +{ \ + SIGNED ITERTYPE i; \ + for (i = 0; i < SIZE; i++) \ + { \ + out[i] = in[i]; \ + } \ +} \ +void set_##SIGNED##_##TYPE##_##ITERTYPE##_var (SIGNED TYPE *__restrict out, \ + SIGNED TYPE *__restrict in, \ + SIGNED ITERTYPE n) \ +{\ + SIGNED ITERTYPE i;\ + for (i = 0; i < n; i++)\ + {\ + out[i] = in[i];\ + }\ +} + +#define INDEX_OFFSET_TEST(SIGNED, TYPE)\ + INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \ + INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \ + INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \ + INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long) + +INDEX_OFFSET_TEST (signed, long) +INDEX_OFFSET_TEST (unsigned, long) +INDEX_OFFSET_TEST (signed, int) +INDEX_OFFSET_TEST (unsigned, int) +INDEX_OFFSET_TEST (signed, short) +INDEX_OFFSET_TEST (unsigned, short) +INDEX_OFFSET_TEST (signed, char) +INDEX_OFFSET_TEST (unsigned, char) + +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 3\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 2\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "ld1h\\tz\[0-9\]+.h, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1h\\tz\[0-9\]+.h, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+, lsl 1\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "ld1b\\tz\[0-9\]+.b, p\[0-9\]+/z, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */ +/* { dg-final { scan-assembler-times "st1b\\tz\[0-9\]+.b, p\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" 16 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c new file mode 100644 index 00000000000..d6b2646798c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_indexoffset_1_run.c @@ -0,0 +1,48 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-std=c99 -ftree-vectorize -O2 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_indexoffset_1.c" + +#include <string.h> + +#define CALL_INDEX_OFFSET_TEST_1(SIGNED, TYPE, ITERTYPE)\ +{\ + SIGNED TYPE out[SIZE + 1];\ + SIGNED TYPE in1[SIZE + 1];\ + SIGNED TYPE in2[SIZE + 1];\ + for (int i = 0; i < SIZE + 1; ++i)\ + {\ + in1[i] = (i * 4) ^ i;\ + in2[i] = (i * 2) ^ i;\ + }\ + out[SIZE] = 42;\ + set_##SIGNED##_##TYPE##_##ITERTYPE (out, in1); \ + if (0 != memcmp (out, in1, SIZE * sizeof (TYPE)))\ + return 1;\ + set_##SIGNED##_##TYPE##_##ITERTYPE##_var (out, in2, SIZE); \ + if (0 != memcmp (out, in2, SIZE * sizeof (TYPE)))\ + return 1;\ + if (out[SIZE] != 42)\ + return 1;\ +} + +#define CALL_INDEX_OFFSET_TEST(SIGNED, TYPE)\ + CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, char) \ + CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, short) \ + CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, int) \ + CALL_INDEX_OFFSET_TEST_1 (SIGNED, TYPE, long) + +int +main (void) +{ + CALL_INDEX_OFFSET_TEST (signed, long) + CALL_INDEX_OFFSET_TEST (unsigned, long) + CALL_INDEX_OFFSET_TEST (signed, int) + CALL_INDEX_OFFSET_TEST (unsigned, int) + CALL_INDEX_OFFSET_TEST (signed, short) + CALL_INDEX_OFFSET_TEST (unsigned, short) + CALL_INDEX_OFFSET_TEST (signed, char) + CALL_INDEX_OFFSET_TEST (unsigned, char) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C new file mode 100644 index 00000000000..4c196684364 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.C @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) + +#define DEF_LOAD_BROADCAST(TYPE)\ +void set_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\ +{\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++)\ + a[i] = *b;\ +}\ + +#define DEF_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\ +void set_##TYPE##SUFFIX (TYPE *__restrict__ a)\ +{\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++)\ + a[i] = IMM;\ +}\ + +/* --- VALID --- */ + +DEF_LOAD_BROADCAST (int8_t) +DEF_LOAD_BROADCAST (int16_t) +DEF_LOAD_BROADCAST (int32_t) +DEF_LOAD_BROADCAST (int64_t) + +DEF_LOAD_BROADCAST_IMM (int16_t, 129, imm_129) +DEF_LOAD_BROADCAST_IMM (int32_t, 129, imm_129) +DEF_LOAD_BROADCAST_IMM (int64_t, 129, imm_129) + +DEF_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130) +DEF_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130) +DEF_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130) + +DEF_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234) +DEF_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234) +DEF_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234) + +DEF_LOAD_BROADCAST_IMM (int16_t, 0xFEDC, imm_0xFEDC) +DEF_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC) +DEF_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC) + +DEF_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678) +DEF_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678) + +DEF_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678) +DEF_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678) + +DEF_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765), imm_FEBA716B12371765) + +/* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */ +/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.c b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.c new file mode 100644 index 00000000000..314c2b89624 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1.c @@ -0,0 +1,53 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +#define DUP4(X) X, X, X, X +#define DUP8(X) DUP4 (X), DUP4 (X) +#define DUP16(X) DUP8 (X), DUP8 (X) +#define DUP32(X) DUP16 (X), DUP16 (X) + +typedef uint8_t vuint8_t __attribute__ ((vector_size (32))); +typedef uint16_t vuint16_t __attribute__ ((vector_size (32))); +typedef uint32_t vuint32_t __attribute__ ((vector_size (32))); +typedef uint64_t vuint64_t __attribute__ ((vector_size (32))); + +#define TEST(TYPE, NAME, INIT) \ + void \ + NAME##_##TYPE (TYPE *dest, __typeof__(dest[0][0]) *ptr) \ + { \ + TYPE x = { INIT }; \ + *dest = x; \ + } + +#define TEST_GROUP(TYPE, NAME, DUP) \ + TEST (TYPE, NAME_##m1, DUP (ptr[-1])) \ + TEST (TYPE, NAME_##0, DUP (ptr[0])) \ + TEST (TYPE, NAME_##63, DUP (ptr[63])) \ + TEST (TYPE, NAME_##64, DUP (ptr[64])) + +TEST_GROUP (vuint8_t, t8, DUP32) +TEST_GROUP (vuint16_t, t16, DUP16) +TEST_GROUP (vuint32_t, t16, DUP8) +TEST_GROUP (vuint64_t, t16, DUP4) + +/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, 63\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rb\tz[0-9]+\.b, p[0-7]/z, \[x1, 64\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, 126\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rh\tz[0-9]+\.h, p[0-7]/z, \[x1, 128\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, 252\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rw\tz[0-9]+\.s, p[0-7]/z, \[x1, 256\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, -1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1\]\n} } } */ +/* { dg-final { scan-assembler {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, 504\]\n} } } */ +/* { dg-final { scan-assembler-not {\tld1rd\tz[0-9]+\.d, p[0-7]/z, \[x1, 512\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C new file mode 100644 index 00000000000..8e954f3e32c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_ld1r_1_run.C @@ -0,0 +1,64 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-std=c++11 -O3 -fno-inline -march=armv8-a+sve -fno-tree-loop-distribute-patterns" } */ + +#include "sve_ld1r_1.C" + +#include <stdlib.h> +#include <stdio.h> + +#define TEST_LOAD_BROADCAST(TYPE,IMM)\ + {\ + TYPE v[NUM_ELEMS (TYPE)];\ + TYPE temp = 0;\ + set_##TYPE (v, IMM);\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + temp += v[i];\ + result += temp;\ + }\ + +#define TEST_LOAD_BROADCAST_IMM(TYPE,IMM,SUFFIX)\ + {\ + TYPE v[NUM_ELEMS (TYPE)];\ + TYPE temp = 0;\ + set_##TYPE##SUFFIX (v);\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + temp += v[i];\ + result += temp;\ + }\ + +int main (int argc, char **argv) +{ + long long int result = 0; + + TEST_LOAD_BROADCAST_IMM (int16_t, 129, imm_129) + TEST_LOAD_BROADCAST_IMM (int32_t, 129, imm_129) + TEST_LOAD_BROADCAST_IMM (int64_t, 129, imm_129) + + TEST_LOAD_BROADCAST_IMM (int16_t, -130, imm_m130) + TEST_LOAD_BROADCAST_IMM (int32_t, -130, imm_m130) + TEST_LOAD_BROADCAST_IMM (int64_t, -130, imm_m130) + + TEST_LOAD_BROADCAST_IMM (int16_t, 0x1234, imm_0x1234) + TEST_LOAD_BROADCAST_IMM (int32_t, 0x1234, imm_0x1234) + TEST_LOAD_BROADCAST_IMM (int64_t, 0x1234, imm_0x1234) + + TEST_LOAD_BROADCAST_IMM (int16_t, int16_t (0xFEDC), imm_0xFEDC) + TEST_LOAD_BROADCAST_IMM (int32_t, 0xFEDC, imm_0xFEDC) + TEST_LOAD_BROADCAST_IMM (int64_t, 0xFEDC, imm_0xFEDC) + + TEST_LOAD_BROADCAST_IMM (int32_t, 0x12345678, imm_0x12345678) + TEST_LOAD_BROADCAST_IMM (int64_t, 0x12345678, imm_0x12345678) + + TEST_LOAD_BROADCAST_IMM (int32_t, 0xF2345678, imm_0xF2345678) + TEST_LOAD_BROADCAST_IMM (int64_t, 0xF2345678, imm_0xF2345678) + + TEST_LOAD_BROADCAST_IMM (int64_t, int64_t (0xFEBA716B12371765), + imm_FEBA716B12371765) + + if (result != int64_t (6717319005707226880)) + { + fprintf (stderr, "result = %lld\n", result); + abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c new file mode 100644 index 00000000000..2d92708fbd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +int +liveloop (int start, int n, int *x) +{ + int i = start; + int j; + + for (j = 0; j < n; ++j) + { + i += 1; + x[j] = i; + } + return i; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Using a fully-masked loop" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c new file mode 100644 index 00000000000..99f0be353aa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_live_1_run.c @@ -0,0 +1,29 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +extern void abort(void); +#include <string.h> + +#include "sve_live_1.c" + +#define MAX 62 +#define START 27 + +int main (void) +{ + int a[MAX]; + int i; + + memset (a, 0, MAX*sizeof (int)); + + int ret = liveloop (START, MAX, a); + + if (ret != 89) + abort (); + + for (i=0; i<MAX; i++) + { + if (a[i] != i+START+1) + abort (); + } +}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2.c new file mode 100644 index 00000000000..06d95fa8ea6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_live_2.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-tree-scev-cprop -march=armv8-a+sve --save-temps -fdump-tree-vect-details" } */ + +int +liveloop (int start, int n, int * __restrict__ x, char * __restrict__ y) +{ + int i = start; + int j; + + for (j = 0; j < n; ++j) + { + i += 1; + x[j] = y[j] + 1; + } + return i; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Can't use a fully-masked loop because ncopies is greater than 1" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c new file mode 100644 index 00000000000..e7924e020cb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_live_2_run.c @@ -0,0 +1,32 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +extern void abort(void); +#include <string.h> +#include <stdio.h> + +#include "sve_live_2.c" + +#define MAX 193 +#define START 84 + +int main (void) +{ + int a[MAX]; + char b[MAX]; + int i; + + memset (a, 0, MAX*sizeof (int)); + memset (b, 23, MAX*sizeof (char)); + + int ret = liveloop (START, MAX, a, b); + + if (ret != 277) + abort (); + + for (i=0; i<MAX; i++) + { + if (a[i] != 24) + abort (); + } +}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c new file mode 100644 index 00000000000..0bc757907cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_1.c @@ -0,0 +1,79 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__ ((vector_size (32))); +typedef int32_t v8si __attribute__ ((vector_size (32))); +typedef int16_t v16hi __attribute__ ((vector_size (32))); +typedef int8_t v32qi __attribute__ ((vector_size (32))); + +#define TEST_TYPE(TYPE) \ + void sve_load_##TYPE##_neg9 (TYPE *a) \ + { \ + register TYPE x asm ("z0") = a[-9]; \ + asm volatile ("" :: "w" (x)); \ + } \ + \ + void sve_load_##TYPE##_neg8 (TYPE *a) \ + { \ + register TYPE x asm ("z0") = a[-8]; \ + asm volatile ("" :: "w" (x)); \ + } \ + \ + void sve_load_##TYPE##_0 (TYPE *a) \ + { \ + register TYPE x asm ("z0") = a[0]; \ + asm volatile ("" :: "w" (x)); \ + } \ + \ + void sve_load_##TYPE##_unaligned (TYPE *a) \ + { \ + register TYPE x asm ("z0") = *(TYPE *) ((char *) a + 16); \ + asm volatile ("" :: "w" (x)); \ + } \ + \ + void sve_load_##TYPE##_7 (TYPE *a) \ + { \ + register TYPE x asm ("z0") = a[7]; \ + asm volatile ("" :: "w" (x)); \ + } \ + \ + void sve_load_##TYPE##_8 (TYPE *a) \ + { \ + register TYPE x asm ("z0") = a[8]; \ + asm volatile ("" :: "w" (x)); \ + } + +TEST_TYPE (v4di) +TEST_TYPE (v8si) +TEST_TYPE (v16hi) +TEST_TYPE (v32qi) + +/* { dg-final { scan-assembler-times {\tsub\tx[0-9]+, x0, #288\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 256\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tld1d\tz0\.d, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1d\tz0\.d, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1w\tz0\.s, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1w\tz0\.s, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1h\tz0\.h, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1h\tz0\.h, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler-not {\tld1b\tz0\.b, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, #-8, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, #7, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tld1b\tz0\.b, p[0-7]/z, \[x0, #8, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c new file mode 100644 index 00000000000..a0ced0d9be4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_2.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps -msve-vector-bits=256" } */ + +void +f (unsigned int *restrict a, unsigned char *restrict b, int n) +{ + for (int i = 0; i < n; ++i) + a[i] += b[i]; +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #1, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #2, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #3, mul vl\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c new file mode 100644 index 00000000000..00731d995c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_const_offset_3.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -save-temps" } */ + +void +f (unsigned int *restrict a, unsigned char *restrict b, int n) +{ + for (int i = 0; i < n; ++i) + a[i] += b[i]; +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #1, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #2, mul vl\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, #3, mul vl\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c new file mode 100644 index 00000000000..9163702db1d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_load_scalar_offset_1.c @@ -0,0 +1,70 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__ ((vector_size (32))); +typedef int32_t v8si __attribute__ ((vector_size (32))); +typedef int16_t v16hi __attribute__ ((vector_size (32))); +typedef int8_t v32qi __attribute__ ((vector_size (32))); + +void sve_load_64_u_lsl (uint64_t *a) +{ + register unsigned long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v4di *)&a[i])); +} + +void sve_load_64_s_lsl (int64_t *a) +{ + register long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v4di *)&a[i])); +} + +void sve_load_32_u_lsl (uint32_t *a) +{ + register unsigned long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v8si *)&a[i])); +} + +void sve_load_32_s_lsl (int32_t *a) +{ + register long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v8si *)&a[i])); +} + +void sve_load_16_z_lsl (uint16_t *a) +{ + register unsigned long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v16hi *)&a[i])); +} + +void sve_load_16_s_lsl (int16_t *a) +{ + register long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v16hi *)&a[i])); +} + +void sve_load_8_z (uint8_t *a) +{ + register unsigned long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v32qi *)&a[i])); +} + +void sve_load_8_s (int8_t *a) +{ + register long i asm("x1"); + asm volatile ("" : "=r" (i)); + asm volatile ("" :: "w" (*(v32qi *)&a[i])); +} + +/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0, x1, lsl 3\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0, x1, lsl 2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0, x1, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0, x1\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_logical_1.c b/gcc/testsuite/gcc.target/aarch64/sve_logical_1.c new file mode 100644 index 00000000000..aa39adf85f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_logical_1.c @@ -0,0 +1,277 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#define DO_CONSTANT(VALUE, TYPE, OP, NAME) \ +void vlogical_imm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; i++) \ + dst[i] = dst[i] OP VALUE; \ +} + +#define DO_LOGICAL_OPS_BRIEF(TYPE, OP, NAME) \ + DO_CONSTANT (1, TYPE, OP, NAME ## 1) \ + DO_CONSTANT (2, TYPE, OP, NAME ## 2) \ + DO_CONSTANT (5, TYPE, OP, NAME ## 5) \ + DO_CONSTANT (6, TYPE, OP, NAME ## 6) \ + DO_CONSTANT (8, TYPE, OP, NAME ## 8) \ + DO_CONSTANT (9, TYPE, OP, NAME ## 9) \ + DO_CONSTANT (-1, TYPE, OP, NAME ## minus1) \ + DO_CONSTANT (-2, TYPE, OP, NAME ## minus2) \ + DO_CONSTANT (-5, TYPE, OP, NAME ## minus5) \ + DO_CONSTANT (-6, TYPE, OP, NAME ## minus6) + +#define DO_LOGICAL_OPS(TYPE, OP, NAME) \ + DO_CONSTANT (1, TYPE, OP, NAME ## 1) \ + DO_CONSTANT (2, TYPE, OP, NAME ## 2) \ + DO_CONSTANT (3, TYPE, OP, NAME ## 3) \ + DO_CONSTANT (4, TYPE, OP, NAME ## 4) \ + DO_CONSTANT (5, TYPE, OP, NAME ## 5) \ + DO_CONSTANT (6, TYPE, OP, NAME ## 6) \ + DO_CONSTANT (7, TYPE, OP, NAME ## 7) \ + DO_CONSTANT (8, TYPE, OP, NAME ## 8) \ + DO_CONSTANT (9, TYPE, OP, NAME ## 9) \ + DO_CONSTANT (10, TYPE, OP, NAME ## 10) \ + DO_CONSTANT (11, TYPE, OP, NAME ## 11) \ + DO_CONSTANT (12, TYPE, OP, NAME ## 12) \ + DO_CONSTANT (13, TYPE, OP, NAME ## 13) \ + DO_CONSTANT (14, TYPE, OP, NAME ## 14) \ + DO_CONSTANT (15, TYPE, OP, NAME ## 15) \ + DO_CONSTANT (16, TYPE, OP, NAME ## 16) \ + DO_CONSTANT (17, TYPE, OP, NAME ## 17) \ + DO_CONSTANT (18, TYPE, OP, NAME ## 18) \ + DO_CONSTANT (19, TYPE, OP, NAME ## 19) \ + DO_CONSTANT (20, TYPE, OP, NAME ## 20) \ + DO_CONSTANT (21, TYPE, OP, NAME ## 21) \ + DO_CONSTANT (22, TYPE, OP, NAME ## 22) \ + DO_CONSTANT (23, TYPE, OP, NAME ## 23) \ + DO_CONSTANT (24, TYPE, OP, NAME ## 24) \ + DO_CONSTANT (25, TYPE, OP, NAME ## 25) \ + DO_CONSTANT (26, TYPE, OP, NAME ## 26) \ + DO_CONSTANT (27, TYPE, OP, NAME ## 27) \ + DO_CONSTANT (28, TYPE, OP, NAME ## 28) \ + DO_CONSTANT (29, TYPE, OP, NAME ## 29) \ + DO_CONSTANT (30, TYPE, OP, NAME ## 30) \ + DO_CONSTANT (31, TYPE, OP, NAME ## 31) \ + DO_CONSTANT (32, TYPE, OP, NAME ## 32) \ + DO_CONSTANT (33, TYPE, OP, NAME ## 33) \ + DO_CONSTANT (34, TYPE, OP, NAME ## 34) \ + DO_CONSTANT (35, TYPE, OP, NAME ## 35) \ + DO_CONSTANT (252, TYPE, OP, NAME ## 252) \ + DO_CONSTANT (253, TYPE, OP, NAME ## 253) \ + DO_CONSTANT (254, TYPE, OP, NAME ## 254) \ + DO_CONSTANT (255, TYPE, OP, NAME ## 255) \ + DO_CONSTANT (256, TYPE, OP, NAME ## 256) \ + DO_CONSTANT (257, TYPE, OP, NAME ## 257) \ + DO_CONSTANT (65535, TYPE, OP, NAME ## 65535) \ + DO_CONSTANT (65536, TYPE, OP, NAME ## 65536) \ + DO_CONSTANT (65537, TYPE, OP, NAME ## 65537) \ + DO_CONSTANT (2147483646, TYPE, OP, NAME ## 2147483646) \ + DO_CONSTANT (2147483647, TYPE, OP, NAME ## 2147483647) \ + DO_CONSTANT (2147483648, TYPE, OP, NAME ## 2147483648) \ + DO_CONSTANT (-1, TYPE, OP, NAME ## minus1) \ + DO_CONSTANT (-2, TYPE, OP, NAME ## minus2) \ + DO_CONSTANT (-3, TYPE, OP, NAME ## minus3) \ + DO_CONSTANT (-4, TYPE, OP, NAME ## minus4) \ + DO_CONSTANT (-5, TYPE, OP, NAME ## minus5) \ + DO_CONSTANT (-6, TYPE, OP, NAME ## minus6) \ + DO_CONSTANT (-7, TYPE, OP, NAME ## minus7) \ + DO_CONSTANT (-8, TYPE, OP, NAME ## minus8) \ + DO_CONSTANT (-9, TYPE, OP, NAME ## minus9) + +DO_LOGICAL_OPS_BRIEF (char, &, and) +DO_LOGICAL_OPS_BRIEF (long, &, and) + +DO_LOGICAL_OPS (int, &, and) +DO_LOGICAL_OPS (int, |, or) +DO_LOGICAL_OPS (int, ^, xor) + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x4\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x5\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0x8\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x9\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xa\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xb\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xc\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xd\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xf\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x10\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x11\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x12\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x13\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x14\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x15\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x16\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x17\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x18\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x19\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1a\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1b\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1c\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1d\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1e\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x1f\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x20\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x21\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x22\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x23\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfc\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfd\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x100\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x101\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x10000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x10001\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7ffffffe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xffffffff\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0xfe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffd\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffc\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0xfb\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */ +/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.b, z[0-9]+\.b, #0xfa\n} } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff9\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff7\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 28 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x4\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x5\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x8\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x9\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xa\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xb\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xc\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xd\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xf\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x10\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x11\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x12\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x13\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x14\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x15\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x16\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x17\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x18\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x19\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1a\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1b\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1c\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1d\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1e\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x1f\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x20\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x21\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x22\n} } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x23\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfc\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfd\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x100\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x101\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x10000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x10001\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x7ffffffe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xffffffff\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffd\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffc\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */ +/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff9\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff7\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 22 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x4\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x5\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x8\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x9\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xa\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xb\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xc\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xd\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xf\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x10\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x11\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x12\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x13\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x14\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x15\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x16\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x17\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x18\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x19\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1a\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1b\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1c\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1d\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1e\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x1f\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x20\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x21\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x22\n} } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x23\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfc\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfd\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x100\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x101\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x10000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x10001\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x7ffffffe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xffffffff\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffe\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffd\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffc\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffb\n} 1 } } */ +/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffffa\n} } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff9\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, z[0-9]+\.s, #0xfffffff7\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 22 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1.c new file mode 100644 index 00000000000..5546cefe686 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +void __attribute__((noinline, noclone)) +vadd (int *dst, int *op1, int *op2, int count) +{ + for (int i = 0; i < count; ++i) + dst[i] = op1[i] + op2[i]; +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7],} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1_run.c new file mode 100644 index 00000000000..c7d0352e273 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_1_run.c @@ -0,0 +1,23 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +#include "sve_loop_add_1.c" + +#define ELEMS 10 + +int __attribute__ ((optimize (1))) +main (void) +{ + int in1[ELEMS] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int in2[ELEMS] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + int out[ELEMS]; + int check[ELEMS] = { 3, 5, 7, 9, 11, 13, 15, 17, 19, 21 }; + + vadd (out, in1, in2, ELEMS); + + for (int i = 0; i < ELEMS; ++i) + if (out[i] != check[i]) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_2.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_2.c new file mode 100644 index 00000000000..379090e6251 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -O3 -march=armv8-a+sve" } */ + +void +foo (int *__restrict a, int *__restrict b) +{ + for (int i = 0; i < 512; ++i) + a[i] += b[i]; +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_3.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_3.c new file mode 100644 index 00000000000..7e6c5e08c1b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -O3 -march=armv8-a+sve" } */ + +void +f (int *__restrict a, + int *__restrict b, + int *__restrict c, + int *__restrict d, + int *__restrict e, + int *__restrict f, + int *__restrict g, + int *__restrict h, + int count) +{ + for (int i = 0; i < count; ++i) + a[i] = b[i] + c[i] + d[i] + e[i] + f[i] + g[i] + h[i]; +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 7 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4.c new file mode 100644 index 00000000000..35ab3b3c641 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4.c @@ -0,0 +1,96 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define LOOP(TYPE, NAME, STEP) \ + __attribute__((noinline, noclone)) \ + void \ + test_##TYPE##_##NAME (TYPE *dst, TYPE base, int count) \ + { \ + for (int i = 0; i < count; ++i, base += STEP) \ + dst[i] += base; \ + } + +#define TEST_TYPE(T, TYPE) \ + T (TYPE, m17, -17) \ + T (TYPE, m16, -16) \ + T (TYPE, m15, -15) \ + T (TYPE, m1, -1) \ + T (TYPE, 1, 1) \ + T (TYPE, 15, 15) \ + T (TYPE, 16, 16) \ + T (TYPE, 17, 17) + +#define TEST_ALL(T) \ + TEST_TYPE (T, int8_t) \ + TEST_TYPE (T, int16_t) \ + TEST_TYPE (T, int32_t) \ + TEST_TYPE (T, int64_t) + +TEST_ALL (LOOP) + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, w[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]+/z, \[x[0-9]+, x[0-9]+\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7]+, \[x[0-9]+, x[0-9]+\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tincb\tx[0-9]+\n} 8 } } */ + +/* { dg-final { scan-assembler-not {\tdecb\tz[0-9]+\.b} } } */ +/* We don't need to increment the vector IV for steps -16 and 16, since the + increment is always a multiple of 256. */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 14 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, w[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 1\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tincb\tx[0-9]+\n} 8 } } */ + +/* { dg-final { scan-assembler-times {\tdech\tz[0-9]+\.h, all, mul #16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdech\tz[0-9]+\.h, all, mul #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdech\tz[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tinch\tz[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tinch\tz[0-9]+\.h, all, mul #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tinch\tz[0-9]+\.h, all, mul #16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 10 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */ + +/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tincw\tz[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tincw\tz[0-9]+\.s, all, mul #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tincw\tz[0-9]+\.s, all, mul #16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 10 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */ + +/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tincd\tz[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tincd\tz[0-9]+\.d, all, mul #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tincd\tz[0-9]+\.d, all, mul #16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 10 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c new file mode 100644 index 00000000000..2d11a221e93 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_loop_add_4.c" + +#define N 131 +#define BASE 41 + +#define TEST_LOOP(TYPE, NAME, STEP) \ + { \ + TYPE a[N]; \ + for (int i = 0; i < N; ++i) \ + a[i] = i * i + i % 5; \ + test_##TYPE##_##NAME (a, BASE, N); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * i + i % 5 + BASE + i * STEP; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (TEST_LOOP) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5.c new file mode 100644 index 00000000000..a27bde6f9da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include "sve_loop_add_4.c" + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #-16\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #-15\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, #15\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.b, w[0-9]+, w[0-9]+\n} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]+/z, \[x[0-9]+, x[0-9]+\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7]+, \[x[0-9]+, x[0-9]+\]} 8 } } */ + +/* The induction vector is invariant for steps of -16 and 16. */ +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, #} } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, #} 6 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 8 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #-16\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #-15\n} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, w[0-9]+\n} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 1\]} 8 } } */ + +/* The (-)17 * 16 is out of range. */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, #} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, #} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 10 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ + +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, #} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, #} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 8 } } */ + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #-16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ + +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, #} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, #} 4 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5_run.c new file mode 100644 index 00000000000..3a471b593d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_5_run.c @@ -0,0 +1,5 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_loop_add_4_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c new file mode 100644 index 00000000000..ccb20b4191f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mad_1.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (dst * src1) + src2; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v32qi) +DO_OP (v16hi) +DO_OP (v8si) +DO_OP (v4di) + +/* { dg-final { scan-assembler-times {\tmad\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */ +/* { dg-final { scan-assembler-times {\tmad\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tmad\tz0\.s, p[0-7]/m, z2\.s, z4\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tmad\tz0\.d, p[0-7]/m, z2\.d, z4\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c new file mode 100644 index 00000000000..4d47bce14fd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define INVALID_INDEX(TYPE) ((TYPE) 107) +#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE)) +#define ODD(VAL) (VAL & 0x1) + +/* TODO: This is a bit ugly for floating point types as it involves FP<>INT + conversions, but I can't find another way of auto-vectorizing the code to + make use of SVE gather instructions. */ +#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ +void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\ + LOOKUPTYPE *__restrict lookup,\ + INDEXTYPE *__restrict index, int n)\ +{\ + int i;\ + for (i = 0; i < n; ++i)\ + {\ + INDEXTYPE x = index[i];\ + if (IS_VALID_INDEX (INDEXTYPE, x))\ + x = lookup[x];\ + out[i] = x;\ + }\ +}\ + +DEF_MASK_GATHER_LOAD (int32_t, int32_t, int32_t) +DEF_MASK_GATHER_LOAD (int64_t, int64_t, int64_t) +DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t) +DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t) +DEF_MASK_GATHER_LOAD (float, float, int32_t) +DEF_MASK_GATHER_LOAD (double, double, int64_t) + +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 3 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c new file mode 100644 index 00000000000..89ccf3e35a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_1_run.c @@ -0,0 +1,72 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_mask_gather_load_1.c" + +#include <stdio.h> + +extern void abort (); + +/* TODO: Support widening forms of gather loads and test them here. */ + +#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) + +#define INDEX_VEC_INIT(INDEXTYPE)\ + INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (INDEXTYPE)];\ + +#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ + LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (LOOKUPTYPE)];\ + OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\ + {\ + int i;\ + for (i = 0; i < NUM_ELEMS (INDEXTYPE); i++)\ + {\ + lookup_##LOOKUPTYPE [i] = i * 2;\ + index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\ + }\ + } + +#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ + fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\ + (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\ + NUM_ELEMS (INDEXTYPE));\ + {\ + int i;\ + for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\ + {\ + if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\ + break;\ + else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (INDEXTYPE))\ + break;\ + }\ + if (i < NUM_ELEMS (OUTTYPE))\ + {\ + fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\ + i, (int) out_##OUTTYPE[i]);\ + abort ();\ + }\ + } + +int main() +{ + INDEX_VEC_INIT (int32_t) + INDEX_VEC_INIT (int64_t) + INDEX_VEC_INIT (uint32_t) + INDEX_VEC_INIT (uint64_t) + + VEC_INIT (int32_t, int32_t, int32_t) + VEC_INIT (int64_t, int64_t, int64_t) + VEC_INIT (uint32_t, uint32_t, uint32_t) + VEC_INIT (uint64_t, uint64_t, uint64_t) + VEC_INIT (float, float, int32_t) + VEC_INIT (double, double, int64_t) + + TEST_MASK_GATHER_LOAD (int32_t, int32_t, int32_t) + TEST_MASK_GATHER_LOAD (int64_t, int64_t, int64_t) + TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint32_t) + TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint64_t) + TEST_MASK_GATHER_LOAD (float, float, int32_t) + TEST_MASK_GATHER_LOAD (double, double, int64_t) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c new file mode 100644 index 00000000000..48db58ffefd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2.c @@ -0,0 +1,60 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (4 * (32 / sizeof (TYPE))) +#define INVALID_INDEX(TYPE) ((TYPE) 107) +#define IS_VALID_INDEX(TYPE, VAL) (VAL < INVALID_INDEX (TYPE)) + +/* TODO: This is a bit ugly for floating point types as it involves FP<>INT + conversions, but I can't find another way of auto-vectorizing the code to + make use of SVE gather instructions. */ +#define DEF_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ +void fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE (OUTTYPE *__restrict out,\ + LOOKUPTYPE *__restrict lookup,\ + INDEXTYPE *__restrict index, INDEXTYPE n)\ +{\ + INDEXTYPE i;\ + for (i = 0; i < n; ++i)\ + {\ + LOOKUPTYPE x = index[i];\ + if (IS_VALID_INDEX (LOOKUPTYPE, x))\ + x = lookup[x];\ + out[i] = x;\ + }\ +}\ + +DEF_MASK_GATHER_LOAD (int32_t, int32_t, int8_t) +DEF_MASK_GATHER_LOAD (int64_t, int64_t, int8_t) +DEF_MASK_GATHER_LOAD (int32_t, int32_t, int16_t) +DEF_MASK_GATHER_LOAD (int64_t, int64_t, int16_t) +DEF_MASK_GATHER_LOAD (int64_t, int64_t, int32_t) +DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t) +DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t) +DEF_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t) +DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t) +DEF_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t) + +/* At present we only use predicate unpacks when the index type is + half the size of the result type. */ +/* { dg-final { scan-assembler-times "\tpunpklo\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */ +/* { dg-final { scan-assembler-times "\tpunpkhi\\tp\[0-9\]+\.h, p\[0-9\]+\.b" 4 } } */ + +/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ +/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ +/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ +/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ +/* { dg-final { scan-assembler-times "\tsunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ +/* { dg-final { scan-assembler-times "\tsunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ + +/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ +/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.h, z\[0-9\]+\.b" 2 } } */ +/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ +/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.s, z\[0-9\]+\.h" 6 } } */ +/* { dg-final { scan-assembler-times "\tuunpklo\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ +/* { dg-final { scan-assembler-times "\tuunpkhi\\tz\[0-9\]+\.d, z\[0-9\]+\.s" 7 } } */ + +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, uxtw 2\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d, lsl 3\\\]" 28 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c new file mode 100644 index 00000000000..c5280546206 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_2_run.c @@ -0,0 +1,98 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_mask_gather_load_2.c" + +#include <stdio.h> + +extern void abort (); + +#define ODD(VAL) (VAL & 0x1) +#define INDEX_VEC_INIT(INDEXTYPE)\ + INDEXTYPE index_##INDEXTYPE[NUM_ELEMS (int8_t)];\ + +#define VEC_INIT(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ + LOOKUPTYPE lookup_##LOOKUPTYPE[NUM_ELEMS (OUTTYPE)];\ + OUTTYPE out_##OUTTYPE[NUM_ELEMS (OUTTYPE)];\ + {\ + int i;\ + for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\ + {\ + lookup_##LOOKUPTYPE [i] = i * 2;\ + index_##INDEXTYPE [i] = ODD (i) ? i : INVALID_INDEX (INDEXTYPE);\ + }\ + } + +#define TEST_MASK_GATHER_LOAD(OUTTYPE,LOOKUPTYPE,INDEXTYPE)\ + fun_##OUTTYPE##LOOKUPTYPE##INDEXTYPE\ + (out_##OUTTYPE, lookup_##LOOKUPTYPE, index_##INDEXTYPE,\ + NUM_ELEMS (OUTTYPE));\ + {\ + int i;\ + for (i = 0; i < NUM_ELEMS (OUTTYPE); i++)\ + {\ + if (ODD (i) && out_##OUTTYPE[i] != (i * 2))\ + break;\ + else if (!ODD (i) && out_##OUTTYPE[i] != INVALID_INDEX (OUTTYPE))\ + break;\ + }\ + if (i < NUM_ELEMS (OUTTYPE))\ + {\ + fprintf (stderr, "out_" # OUTTYPE "[%d] = %d\n",\ + i, (int) out_##OUTTYPE[i]);\ + abort ();\ + }\ + } + +int main() +{ + INDEX_VEC_INIT (int8_t) + INDEX_VEC_INIT (int16_t) + INDEX_VEC_INIT (int32_t) + INDEX_VEC_INIT (uint8_t) + INDEX_VEC_INIT (uint16_t) + INDEX_VEC_INIT (uint32_t) + + { + VEC_INIT (int32_t, int32_t, int8_t) + TEST_MASK_GATHER_LOAD (int32_t, int32_t, int8_t) + } + { + VEC_INIT (int64_t, int64_t, int8_t) + TEST_MASK_GATHER_LOAD (int64_t, int64_t, int8_t) + } + { + VEC_INIT (int32_t, int32_t, int16_t) + TEST_MASK_GATHER_LOAD (int32_t, int32_t, int16_t) + } + { + VEC_INIT (int64_t, int64_t, int16_t) + TEST_MASK_GATHER_LOAD (int64_t, int64_t, int16_t) + } + { + VEC_INIT (int64_t, int64_t, int32_t) + TEST_MASK_GATHER_LOAD (int64_t, int64_t, int32_t) + } + { + VEC_INIT (uint32_t, uint32_t, uint8_t) + TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint8_t) + } + { + VEC_INIT (uint64_t, uint64_t, uint8_t) + TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint8_t) + } + { + VEC_INIT (uint32_t, uint32_t, uint16_t) + TEST_MASK_GATHER_LOAD (uint32_t, uint32_t, uint16_t) + } + { + VEC_INIT (uint64_t, uint64_t, uint16_t) + TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint16_t) + } + { + VEC_INIT (uint64_t, uint64_t, uint32_t) + TEST_MASK_GATHER_LOAD (uint64_t, uint64_t, uint32_t) + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c new file mode 100644 index 00000000000..2965760e058 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ + +#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ + DATA_TYPE __attribute__ ((noinline)) \ + NAME (char *data, INDEX_TYPE *indices, signed char n) \ + { \ + DATA_TYPE sum = 0; \ + for (signed char i = 0; i < n; ++i) \ + { \ + INDEX_TYPE index = indices[i]; \ + sum += (index & 16 ? *(DATA_TYPE *) (data + index) : 1); \ + } \ + return sum; \ + } + +TEST_LOOP (f_s32, int, unsigned int) +TEST_LOOP (f_u32, unsigned int, unsigned int) +TEST_LOOP (f_f32, float, unsigned int) + +TEST_LOOP (f_s64_s64, long, long) +TEST_LOOP (f_s64_u64, long, unsigned long) +TEST_LOOP (f_u64_s64, unsigned long, long) +TEST_LOOP (f_u64_u64, unsigned long, unsigned long) +TEST_LOOP (f_f64_s64, double, long) +TEST_LOOP (f_f64_u64, double, unsigned long) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c new file mode 100644 index 00000000000..aa73c81ffca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_3_run.c @@ -0,0 +1,47 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_mask_gather_load_3.c" + +extern void abort (void); + +#define N 57 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, DATA_TYPE, INDEX_TYPE) \ + { \ + INDEX_TYPE indices[N]; \ + DATA_TYPE data[N * 2]; \ + for (int i = 0; i < N * 2; ++i) \ + data[i] = (i / 2) * 4 + i % 2; \ + DATA_TYPE sum = 0; \ + for (int i = 0; i < N; ++i) \ + { \ + INDEX_TYPE j = (i * 3 / 2) * sizeof (DATA_TYPE); \ + j &= (1ULL << (sizeof (INDEX_TYPE) * 8 - 1)) - 1; \ + if (j & 16) \ + sum += data[j / sizeof (DATA_TYPE)]; \ + else \ + sum += 1; \ + indices[i] = j; \ + } \ + DATA_TYPE res = NAME ((char *) data, indices, N); \ + if (res != sum) \ + abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_LOOP (f_s32, int, unsigned int) + TEST_LOOP (f_u32, unsigned int, unsigned int) + TEST_LOOP (f_f32, float, unsigned int) + + TEST_LOOP (f_s64_s64, long, long) + TEST_LOOP (f_s64_u64, long, unsigned long) + TEST_LOOP (f_u64_s64, unsigned long, long) + TEST_LOOP (f_u64_u64, unsigned long, unsigned long) + TEST_LOOP (f_f64_s64, double, long) + TEST_LOOP (f_f64_u64, double, unsigned long) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c new file mode 100644 index 00000000000..38bb5275e59 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math --save-temps" } */ + +#define TEST_LOOP(NAME, TYPE) \ + TYPE __attribute__ ((noinline)) \ + NAME (TYPE **indices, long *mask, int n) \ + { \ + TYPE sum = 0; \ + for (int i = 0; i < n; ++i) \ + sum += mask[i] ? *indices[i] : 1; \ + return sum; \ + } + +TEST_LOOP (f_s64, long) +TEST_LOOP (f_u64, unsigned long) +TEST_LOOP (f_f64, double) + +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c new file mode 100644 index 00000000000..8a6320a002c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_4_run.c @@ -0,0 +1,37 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_mask_gather_load_4.c" + +extern void abort (void); + +#define N 57 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE *ptrs[N]; \ + TYPE data[N * 2]; \ + long mask[N]; \ + for (int i = 0; i < N * 2; ++i) \ + data[i] = (i / 2) * 4 + i % 2; \ + TYPE sum = 0; \ + for (int i = 0; i < N; ++i) \ + { \ + mask[i] = i & 15; \ + ptrs[i] = &data[i * 3 / 2]; \ + sum += mask[i] ? *ptrs[i] : 1; \ + } \ + TYPE res = NAME (ptrs, mask, N); \ + if (res != sum) \ + abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_LOOP (f_s64, long) + TEST_LOOP (f_u64, unsigned long) + TEST_LOOP (f_f64, double) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c new file mode 100644 index 00000000000..abb38e40f72 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5.c @@ -0,0 +1,120 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +void mgather_load1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + if (masks[i * STRIDE])\ + dst[i] = src[i * STRIDE];\ +} + +#define MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE)\ +void mgather_load2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + if (masks[i * stride])\ + dst[i] = src[i * stride];\ +} + +#define MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\ +void mgather_load3s5##OBJTYPE##STRIDETYPE\ + (OBJTYPE * restrict d1, OBJTYPE * restrict d2, OBJTYPE * restrict d3,\ + OBJTYPE * restrict d4, OBJTYPE * restrict d5, OBJTYPE * restrict src,\ + MASKTYPE * restrict masks, STRIDETYPE count)\ +{\ + const STRIDETYPE STRIDE = 5;\ + for (STRIDETYPE i=0; i<count; i++)\ + if (masks[i * STRIDE])\ + {\ + d1[i] = src[0 + (i * STRIDE)];\ + d2[i] = src[1 + (i * STRIDE)];\ + d3[i] = src[2 + (i * STRIDE)];\ + d4[i] = src[3 + (i * STRIDE)];\ + d5[i] = src[4 + (i * STRIDE)];\ + }\ +} + +#define MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +void mgather_load4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + if (masks[i * STRIDE])\ + *dst = *src;\ + dst += 1;\ + src += STRIDE;\ + }\ +} + +#define MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE)\ +void mgather_load5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + if (masks[i * stride])\ + *dst = *src;\ + dst += 1;\ + src += stride;\ + }\ +} + +MASK_GATHER_LOAD1 (double, long, long, 5) +MASK_GATHER_LOAD1 (double, long, long, 8) +MASK_GATHER_LOAD1 (double, long, long, 21) +MASK_GATHER_LOAD1 (double, long, long, 1009) + +MASK_GATHER_LOAD1 (float, int, int, 5) +MASK_GATHER_LOAD1 (float, int, int, 8) +MASK_GATHER_LOAD1 (float, int, int, 21) +MASK_GATHER_LOAD1 (float, int, int, 1009) + +MASK_GATHER_LOAD2 (double, long, long) +MASK_GATHER_LOAD2 (float, int, int) + +MASK_GATHER_LOAD3 (double, long, long) +MASK_GATHER_LOAD3 (float, int, int) + +MASK_GATHER_LOAD4 (double, long, long, 5) + +/* NOTE: We can't vectorize MASK_GATHER_LOAD4 (float, int, int, 5) because we + can't prove that the offsets used for the gather load won't overflow. */ + +MASK_GATHER_LOAD5 (double, long, long) +MASK_GATHER_LOAD5 (float, int, int) + +/* Widened forms. */ +MASK_GATHER_LOAD1 (double, long, int, 5) +MASK_GATHER_LOAD1 (double, long, int, 8) +MASK_GATHER_LOAD1 (double, long, short, 5) +MASK_GATHER_LOAD1 (double, long, short, 8) + +MASK_GATHER_LOAD1 (float, int, short, 5) +MASK_GATHER_LOAD1 (float, int, short, 8) + +MASK_GATHER_LOAD2 (double, long, int) +MASK_GATHER_LOAD2 (float, int, short) + +MASK_GATHER_LOAD4 (double, long, int, 5) +MASK_GATHER_LOAD4 (float, int, short, 5) + +MASK_GATHER_LOAD5 (double, long, int) + +/* Loads including masks. */ +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 34 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 20 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c new file mode 100644 index 00000000000..445c47f23ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_gather_load_5_run.c @@ -0,0 +1,177 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <unistd.h> + +extern void abort (void); +extern void *memset(void *, int, size_t); + +#include "sve_mask_gather_load_5.c" + +#define NUM_DST_ELEMS 13 +#define NUM_SRC_ELEMS(STRIDE) (NUM_DST_ELEMS * STRIDE) + +#define MASKED_VALUE 3 + +#define TEST_MASK_GATHER_LOAD_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ + memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ + real_dst[0] = 0;\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + {\ + src[i * STRIDE] = i;\ + dst[i] = MASKED_VALUE;\ + masks[i * STRIDE] = i & 0x1;\ + }\ + FUN##OBJTYPE##STRIDETYPE##STRIDE \ + (dst, src, masks, NUM_DST_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ + abort ();\ +} + +#define TEST_MASK_GATHER_LOAD_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + MASKTYPE masks[NUM_SRC_ELEMS (STRIDE)];\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ + memset (masks, 0, (NUM_SRC_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ + real_dst[0] = 0;\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + {\ + src[i * STRIDE] = i;\ + dst[i] = MASKED_VALUE;\ + masks[i * STRIDE] = i & 0x1;\ + }\ + FUN##OBJTYPE##STRIDETYPE \ + (dst, src, masks, STRIDE, NUM_DST_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + if (dst[i] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ + abort ();\ +} + +#define TEST_MASK_GATHER_LOAD1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load1, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +#define TEST_MASK_GATHER_LOAD2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load2, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +#define TEST_MASK_GATHER_LOAD3(OBJTYPE,MASKTYPE,STRIDETYPE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS (5)]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst1[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst2[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst3[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst4[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst5[1 + NUM_DST_ELEMS]\ + __attribute__((aligned (32)));\ + MASKTYPE masks[NUM_SRC_ELEMS (5)];\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS (5)) * sizeof (OBJTYPE));\ + memset (masks, 0, (NUM_SRC_ELEMS (5)) * sizeof (MASKTYPE));\ + real_dst1[0] = real_dst2[0] = real_dst3[0] = real_dst4[0] = real_dst5[0] = 0;\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst1 = &real_dst1[1];\ + OBJTYPE *dst2 = &real_dst2[1];\ + OBJTYPE *dst3 = &real_dst3[1];\ + OBJTYPE *dst4 = &real_dst4[1];\ + OBJTYPE *dst5 = &real_dst5[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS (5); i++)\ + src[i] = i;\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + {\ + dst1[i] = MASKED_VALUE;\ + dst2[i] = MASKED_VALUE;\ + dst3[i] = MASKED_VALUE;\ + dst4[i] = MASKED_VALUE;\ + dst5[i] = MASKED_VALUE;\ + masks[i * 5] = i & 0x1;\ + }\ + mgather_load3s5##OBJTYPE##STRIDETYPE \ + (dst1, dst2, dst3, dst4, dst5, src, masks, NUM_DST_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS; i++)\ + {\ + STRIDETYPE base = i * 5;\ + if (dst1[i] != (masks[base] ? base : MASKED_VALUE))\ + abort ();\ + if (dst2[i] != (masks[base] ? (base + 1) : MASKED_VALUE))\ + abort ();\ + if (dst3[i] != (masks[base] ? (base + 2) : MASKED_VALUE))\ + abort ();\ + if (dst4[i] != (masks[base] ? (base + 3) : MASKED_VALUE))\ + abort ();\ + if (dst5[i] != (masks[base] ? (base + 4) : MASKED_VALUE))\ + abort ();\ + }\ +} + +#define TEST_MASK_GATHER_LOAD4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_GATHER_LOAD_COMMON1 (mgather_load4, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +#define TEST_MASK_GATHER_LOAD5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_GATHER_LOAD_COMMON2 (mgather_load5, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +int main () +{ + TEST_MASK_GATHER_LOAD1 (double, long, long, 5); + TEST_MASK_GATHER_LOAD1 (double, long, long, 8); + TEST_MASK_GATHER_LOAD1 (double, long, long, 21); + + TEST_MASK_GATHER_LOAD1 (float, int, int, 5); + TEST_MASK_GATHER_LOAD1 (float, int, int, 8); + TEST_MASK_GATHER_LOAD1 (float, int, int, 21); + + TEST_MASK_GATHER_LOAD2 (double, long, long, 5); + TEST_MASK_GATHER_LOAD2 (double, long, long, 8); + TEST_MASK_GATHER_LOAD2 (double, long, long, 21); + + TEST_MASK_GATHER_LOAD3 (double, long, long); + TEST_MASK_GATHER_LOAD3 (float, int, int); + + TEST_MASK_GATHER_LOAD4 (double, long, long, 5); + + TEST_MASK_GATHER_LOAD5 (double, long, long, 5); + TEST_MASK_GATHER_LOAD5 (float, int, int, 5); + + /* Widened forms. */ + TEST_MASK_GATHER_LOAD1 (double, long, int, 5) + TEST_MASK_GATHER_LOAD1 (double, long, int, 8) + TEST_MASK_GATHER_LOAD1 (double, long, short, 5) + TEST_MASK_GATHER_LOAD1 (double, long, short, 8) + + TEST_MASK_GATHER_LOAD1 (float, int, short, 5) + TEST_MASK_GATHER_LOAD1 (float, int, short, 8) + + TEST_MASK_GATHER_LOAD2 (double, long, int, 5); + TEST_MASK_GATHER_LOAD2 (double, long, int, 8); + TEST_MASK_GATHER_LOAD2 (double, long, int, 21); + + TEST_MASK_GATHER_LOAD4 (double, long, int, 5); + TEST_MASK_GATHER_LOAD4 (float, int, short, 5); + + TEST_MASK_GATHER_LOAD5 (double, long, int, 5); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c new file mode 100644 index 00000000000..51cd4646765 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_MASK_LOAD(TYPE)\ +void maskload_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b, int n)\ +{\ + for (int i = 0; i < n; i++)\ + a[i] = a[i] < 4 ? b[i] : a[i];\ +}\ + +#define DEF_MASK_STORE(TYPE)\ +void maskstore_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b, int n)\ +{\ + for (int i = 0; i < n; i++)\ + if (b[i] != 0)\ + a[i] = b[i];\ +}\ + +DEF_MASK_LOAD (int8_t) +DEF_MASK_LOAD (int16_t) +DEF_MASK_LOAD (int32_t) +DEF_MASK_LOAD (int64_t) +DEF_MASK_LOAD (uint8_t) +DEF_MASK_LOAD (uint16_t) +DEF_MASK_LOAD (uint32_t) +DEF_MASK_LOAD (uint64_t) + +DEF_MASK_STORE (int8_t) +DEF_MASK_STORE (int16_t) +DEF_MASK_STORE (int32_t) +DEF_MASK_STORE (int64_t) +DEF_MASK_STORE (uint8_t) +DEF_MASK_STORE (uint16_t) +DEF_MASK_STORE (uint32_t) +DEF_MASK_STORE (uint64_t) + +/* No scalar memory accesses. */ +/* { dg-final { scan-assembler-not {[wx][0-9]+, \[} } } */ + +/* No scalar memory accesses. */ +/* { dg-final { scan-assembler-not {\tand\t} } } */ + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z} 6 } } */ + +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #3} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #3} 1 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7]} 4 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7]} 4 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]} 4 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c new file mode 100644 index 00000000000..22ab0f0e98a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c @@ -0,0 +1,90 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_mask_ldst_1.c" + +#include <stdlib.h> + +#define NUM_ELEMS(TYPE) (73 + sizeof (TYPE)) + +#define DEF_INIT_VECTOR(TYPE)\ + TYPE a_##TYPE[NUM_ELEMS (TYPE) + 1];\ + TYPE b_##TYPE[NUM_ELEMS (TYPE) + 1];\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + {\ + a_##TYPE[i] = (i & 0x1) ? 0 : 4;\ + b_##TYPE[i] = (i & 0x1) ? 4 : 0;\ + }\ + a_##TYPE[NUM_ELEMS (TYPE)] = 101;\ + b_##TYPE[NUM_ELEMS (TYPE)] = 4; + +#define TEST_MASK_LOAD(TYPE)\ + maskload_##TYPE (a_##TYPE, b_##TYPE, NUM_ELEMS (TYPE));\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + if (a_##TYPE[i] != 4)\ + {\ + result++;\ + }\ + if (a_##TYPE[NUM_ELEMS (TYPE)] != 101)\ + abort (); + +#define TEST_MASK_STORE(TYPE)\ + maskstore_##TYPE (a_##TYPE, b_##TYPE, NUM_ELEMS (TYPE));\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + if (a_##TYPE[i] != 4)\ + {\ + result++;\ + }\ + if (a_##TYPE[NUM_ELEMS (TYPE)] != 101)\ + abort (); + +int __attribute__((optimize(1))) +main (int argc, char **argv) +{ + int result = 0; + + { + DEF_INIT_VECTOR (int8_t); + DEF_INIT_VECTOR (int16_t); + DEF_INIT_VECTOR (int32_t); + DEF_INIT_VECTOR (int64_t); + DEF_INIT_VECTOR (uint8_t); + DEF_INIT_VECTOR (uint16_t); + DEF_INIT_VECTOR (uint32_t); + DEF_INIT_VECTOR (uint64_t); + + TEST_MASK_LOAD (int8_t); + TEST_MASK_LOAD (int16_t); + TEST_MASK_LOAD (int32_t); + TEST_MASK_LOAD (int64_t); + TEST_MASK_LOAD (uint8_t); + TEST_MASK_LOAD (uint16_t); + TEST_MASK_LOAD (uint32_t); + TEST_MASK_LOAD (uint64_t); + } + + { + DEF_INIT_VECTOR (int8_t); + DEF_INIT_VECTOR (int16_t); + DEF_INIT_VECTOR (int32_t); + DEF_INIT_VECTOR (int64_t); + DEF_INIT_VECTOR (uint8_t); + DEF_INIT_VECTOR (uint16_t); + DEF_INIT_VECTOR (uint32_t); + DEF_INIT_VECTOR (uint64_t); + + TEST_MASK_STORE (int8_t); + TEST_MASK_STORE (int16_t); + TEST_MASK_STORE (int32_t); + TEST_MASK_STORE (int64_t); + TEST_MASK_STORE (uint8_t); + TEST_MASK_STORE (uint16_t); + TEST_MASK_STORE (uint32_t); + TEST_MASK_STORE (uint64_t); + } + + if (result != 0) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c new file mode 100644 index 00000000000..a7f2995a6cd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1.c @@ -0,0 +1,124 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +void mscatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + if (masks[i * STRIDE])\ + dst[i * STRIDE] = src[i];\ +} + +#define MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE)\ +void mscatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + if (masks[i * stride])\ + dst[i * stride] = src[i];\ +} + +#define MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\ +void mscatter_store3s5##OBJTYPE##STRIDETYPE\ + (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\ + OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\ + MASKTYPE * restrict masks, STRIDETYPE count)\ +{\ + const STRIDETYPE STRIDE = 5;\ + for (STRIDETYPE i=0; i<count; i++)\ + if (masks[i * STRIDE])\ + {\ + dst[0 + (i * STRIDE)] = s1[i];\ + dst[1 + (i * STRIDE)] = s2[i];\ + dst[2 + (i * STRIDE)] = s3[i];\ + dst[3 + (i * STRIDE)] = s4[i];\ + dst[4 + (i * STRIDE)] = s5[i];\ + }\ +} + +#define MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +void mscatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + if (masks[i * STRIDE])\ + *dst = *src;\ + dst += STRIDE;\ + src += 1;\ + }\ +} + +#define MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE)\ +void mscatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + MASKTYPE * restrict masks,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + if (masks[i * stride])\ + *dst = *src;\ + dst += stride;\ + src += 1;\ + }\ +} + +MASK_SCATTER_STORE1 (double, long, long, 5) +MASK_SCATTER_STORE1 (double, long, long, 8) +MASK_SCATTER_STORE1 (double, long, long, 21) +MASK_SCATTER_STORE1 (double, long, long, 1009) + +MASK_SCATTER_STORE1 (float, int, int, 5) + +MASK_SCATTER_STORE1 (float, int, int, 8) +MASK_SCATTER_STORE1 (float, int, int, 21) +MASK_SCATTER_STORE1 (float, int, int, 1009) + +MASK_SCATTER_STORE2 (double, long, long) +MASK_SCATTER_STORE2 (float, int, int) + +MASK_SCATTER_STORE3 (double, long, long) +MASK_SCATTER_STORE3 (float, int, int) + +MASK_SCATTER_STORE4 (double, long, long, 5) +/* NOTE: We can't vectorize MASK_SCATTER_STORE4 (float, int, int, 3) because we + can't prove that the offsets used for the gather load won't overflow. */ + +MASK_SCATTER_STORE5 (double, long, long) +MASK_SCATTER_STORE5 (float, int, int) + +/* Widened forms. */ +MASK_SCATTER_STORE1 (double, long, int, 5) +MASK_SCATTER_STORE1 (double, long, int, 8) +MASK_SCATTER_STORE1 (double, long, short, 5) +MASK_SCATTER_STORE1 (double, long, short, 8) + +MASK_SCATTER_STORE1 (float, int, short, 5) +MASK_SCATTER_STORE1 (float, int, short, 8) + +MASK_SCATTER_STORE2 (double, long, int) +MASK_SCATTER_STORE2 (float, int, short) + +MASK_SCATTER_STORE4 (double, long, int, 5) +MASK_SCATTER_STORE4 (float, int, short, 5) + +MASK_SCATTER_STORE5 (double, long, int) + +/* Gather loads are for the masks. */ +/* { dg-final { scan-assembler-times "ld1d\\tz\[0-9\]+.d, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 15 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 8 } } */ +/* { dg-final { scan-assembler-times "ld1w\\tz\[0-9\]+.s, p\[0-9\]+/z, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ + +/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */ +/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */ +/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c new file mode 100644 index 00000000000..3222d420763 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_scatter_store_1_run.c @@ -0,0 +1,186 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <unistd.h> +#include <stdio.h> + +extern void abort (void); +extern void *memset(void *, int, size_t); + +#include "sve_mask_scatter_store_1.c" + +#define NUM_SRC_ELEMS 13 +#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE) + +#define MASKED_VALUE 3 + +#define TEST_MASK_SCATTER_STORE_COMMON1(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\ + memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ + real_src[0] = 0;\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + {\ + src[i] = i;\ + masks[i * STRIDE] = i & 0x1;\ + }\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\ + dst[i] = MASKED_VALUE;\ + FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, masks, NUM_SRC_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ + abort ();\ +} + +#define TEST_MASK_SCATTER_STORE_COMMON2(FUN,OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + MASKTYPE masks[NUM_DST_ELEMS (STRIDE)];\ + memset (masks, 0, (NUM_DST_ELEMS (STRIDE)) * sizeof (MASKTYPE));\ + real_src[0] = 0;\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + {\ + src[i] = i;\ + masks[i * STRIDE] = i & 0x1;\ + }\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (STRIDE); i++)\ + dst[i] = MASKED_VALUE;\ + FUN##OBJTYPE##STRIDETYPE (dst, src, masks, STRIDE, NUM_SRC_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + if (dst[i * STRIDE] != (masks[i * STRIDE] ? i : MASKED_VALUE))\ + abort ();\ +} + +#define TEST_MASK_SCATTER_STORE1(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store1, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +#define TEST_MASK_SCATTER_STORE2(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store2, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +#define TEST_MASK_SCATTER_STORE3(OBJTYPE,MASKTYPE,STRIDETYPE)\ +{\ + OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\ + __attribute__((aligned (32)));\ + MASKTYPE masks[NUM_DST_ELEMS (5)];\ + memset (masks, 0, (NUM_DST_ELEMS (5)) * sizeof (MASKTYPE));\ + real_src1[0] = real_src2[0] = real_src3[0] = real_src4[0] = real_src5[0] = 0;\ + OBJTYPE *src1 = &real_src1[1];\ + OBJTYPE *src2 = &real_src2[1];\ + OBJTYPE *src3 = &real_src3[1];\ + OBJTYPE *src4 = &real_src4[1];\ + OBJTYPE *src5 = &real_src5[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + {\ + STRIDETYPE base = i * 5;\ + src1[i] = base;\ + src2[i] = base + 1;\ + src3[i] = base + 2;\ + src4[i] = base + 3;\ + src5[i] = base + 4;\ + masks[i * 5] = i & 0x1;\ + }\ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\ + dst[i] = MASKED_VALUE;\ + mscatter_store3s5##OBJTYPE##STRIDETYPE \ + (dst, src1, src2, src3, src4, src5, masks, NUM_SRC_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + {\ + STRIDETYPE base = i * 5;\ + if (dst[base] != (masks[i * 5] ? base : MASKED_VALUE))\ + abort ();\ + if (dst[base + 1] != (masks[i * 5] ? (base + 1) : MASKED_VALUE))\ + abort ();\ + if (dst[base + 2] != (masks[i * 5] ? (base + 2) : MASKED_VALUE))\ + abort ();\ + if (dst[base + 3] != (masks[i * 5] ? (base + 3) : MASKED_VALUE))\ + abort ();\ + if (dst[base + 4] != (masks[i * 5] ? (base + 4) : MASKED_VALUE))\ + abort ();\ + }\ +} + +#define TEST_MASK_SCATTER_STORE4(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_SCATTER_STORE_COMMON1 (mscatter_store4, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +#define TEST_MASK_SCATTER_STORE5(OBJTYPE,MASKTYPE,STRIDETYPE,STRIDE) \ + TEST_MASK_SCATTER_STORE_COMMON2 (mscatter_store5, OBJTYPE, MASKTYPE, \ + STRIDETYPE, STRIDE) + +int __attribute__ ((optimize (1))) +main () +{ + TEST_MASK_SCATTER_STORE1 (double, long, long, 5); + + TEST_MASK_SCATTER_STORE1 (double, long, long, 8); + TEST_MASK_SCATTER_STORE1 (double, long, long, 21); + + TEST_MASK_SCATTER_STORE1 (float, int, int, 5); + TEST_MASK_SCATTER_STORE1 (float, int, int, 8); + TEST_MASK_SCATTER_STORE1 (float, int, int, 21); + + TEST_MASK_SCATTER_STORE2 (double, long, long, 5); + TEST_MASK_SCATTER_STORE2 (double, long, long, 8); + TEST_MASK_SCATTER_STORE2 (double, long, long, 21); + + TEST_MASK_SCATTER_STORE2 (float, int, int, 5); + TEST_MASK_SCATTER_STORE2 (float, int, int, 8); + TEST_MASK_SCATTER_STORE2 (float, int, int, 21); + + TEST_MASK_SCATTER_STORE3 (double, long, long); + TEST_MASK_SCATTER_STORE3 (float, int, int); + + TEST_MASK_SCATTER_STORE4 (double, long, long, 5); + + TEST_MASK_SCATTER_STORE5 (double, long, long, 5); + TEST_MASK_SCATTER_STORE5 (float, int, int, 5); + + /* Widened forms. */ + TEST_MASK_SCATTER_STORE1 (double, long, int, 5) + TEST_MASK_SCATTER_STORE1 (double, long, int, 8) + TEST_MASK_SCATTER_STORE1 (double, long, short, 5) + TEST_MASK_SCATTER_STORE1 (double, long, short, 8) + + TEST_MASK_SCATTER_STORE1 (float, int, short, 5) + TEST_MASK_SCATTER_STORE1 (float, int, short, 8) + + TEST_MASK_SCATTER_STORE2 (double, long, int, 5); + TEST_MASK_SCATTER_STORE2 (double, long, int, 8); + TEST_MASK_SCATTER_STORE2 (double, long, int, 21); + + TEST_MASK_SCATTER_STORE2 (float, int, short, 5); + TEST_MASK_SCATTER_STORE2 (float, int, short, 8); + TEST_MASK_SCATTER_STORE2 (float, int, short, 21); + + TEST_MASK_SCATTER_STORE4 (double, long, int, 5); + TEST_MASK_SCATTER_STORE4 (float, int, short, 5); + + TEST_MASK_SCATTER_STORE5 (double, long, int, 5); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c new file mode 100644 index 00000000000..4a6247db978 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void __attribute__((weak)) \ + NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = src[i * 2] + src[i * 2 + 1]; \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld2b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld2h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tld2w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tld2d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c new file mode 100644 index 00000000000..626b78c29e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_1_run.c @@ -0,0 +1,39 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#include "sve_mask_struct_load_1.c" + +#define N 100 + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + { \ + OUTTYPE out[N]; \ + INTYPE in[N * 2]; \ + MASKTYPE mask[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + mask[i] = i % 5 <= i % 3; \ + } \ + for (int i = 0; i < N * 2; ++i) \ + in[i] = i * 9 / 2; \ + NAME##_2 (out, in, mask, N); \ + for (int i = 0; i < N; ++i) \ + { \ + OUTTYPE if_true = in[i * 2] + in[i * 2 + 1]; \ + OUTTYPE if_false = i * 7 / 2; \ + if (out[i] != (mask[i] ? if_true : if_false)) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c new file mode 100644 index 00000000000..0004e673d49 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void __attribute__((weak)) \ + NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = (src[i * 3] \ + + src[i * 3 + 1] \ + + src[i * 3 + 2]); \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld3b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tld3w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tld3d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c new file mode 100644 index 00000000000..86219b4a191 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_2_run.c @@ -0,0 +1,41 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#include "sve_mask_struct_load_2.c" + +#define N 100 + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + { \ + OUTTYPE out[N]; \ + INTYPE in[N * 3]; \ + MASKTYPE mask[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + mask[i] = i % 5 <= i % 3; \ + } \ + for (int i = 0; i < N * 3; ++i) \ + in[i] = i * 9 / 2; \ + NAME##_3 (out, in, mask, N); \ + for (int i = 0; i < N; ++i) \ + { \ + OUTTYPE if_true = (in[i * 3] \ + + in[i * 3 + 1] \ + + in[i * 3 + 2]); \ + OUTTYPE if_false = i * 7 / 2; \ + if (out[i] != (mask[i] ? if_true : if_false)) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c new file mode 100644 index 00000000000..5f784e7dd36 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void __attribute__((weak)) \ + NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = (src[i * 4] \ + + src[i * 4 + 1] \ + + src[i * 4 + 2] \ + + src[i * 4 + 3]); \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld4b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tld4w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tld4d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c new file mode 100644 index 00000000000..51bd38e2890 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_3_run.c @@ -0,0 +1,42 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#include "sve_mask_struct_load_3.c" + +#define N 100 + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + { \ + OUTTYPE out[N]; \ + INTYPE in[N * 4]; \ + MASKTYPE mask[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + out[i] = i * 7 / 2; \ + mask[i] = i % 5 <= i % 3; \ + } \ + for (int i = 0; i < N * 4; ++i) \ + in[i] = i * 9 / 2; \ + NAME##_4 (out, in, mask, N); \ + for (int i = 0; i < N; ++i) \ + { \ + OUTTYPE if_true = (in[i * 4] \ + + in[i * 4 + 1] \ + + in[i * 4 + 2] \ + + in[i * 4 + 3]); \ + OUTTYPE if_false = i * 7 / 2; \ + if (out[i] != (mask[i] ? if_true : if_false)) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c new file mode 100644 index 00000000000..6608558d3ff --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_4.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void \ + NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = src[i * 3] + src[i * 3 + 2]; \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld3b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld3h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tld3w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tld3d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c new file mode 100644 index 00000000000..003cf650d7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_5.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void \ + NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = src[i * 4] + src[i * 4 + 3]; \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld4b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tld4h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tld4w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + Out 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tld4d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c new file mode 100644 index 00000000000..a6161f31536 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_6.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void \ + NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = src[i * 2]; \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* { dg-final { scan-assembler-not {\tld2b\t} } } */ +/* { dg-final { scan-assembler-not {\tld2h\t} } } */ +/* { dg-final { scan-assembler-not {\tld2w\t} } } */ +/* { dg-final { scan-assembler-not {\tld2d\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c new file mode 100644 index 00000000000..75a3e43f267 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_7.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void \ + NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = src[i * 3] + src[i * 3 + 1]; \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* { dg-final { scan-assembler-not {\tld3b\t} } } */ +/* { dg-final { scan-assembler-not {\tld3h\t} } } */ +/* { dg-final { scan-assembler-not {\tld3w\t} } } */ +/* { dg-final { scan-assembler-not {\tld3d\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c new file mode 100644 index 00000000000..e87ad0bc074 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_load_8.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void \ + NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + dest[i] = src[i * 4] + src[i * 4 + 2]; \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* { dg-final { scan-assembler-not {\tld4b\t} } } */ +/* { dg-final { scan-assembler-not {\tld4h\t} } } */ +/* { dg-final { scan-assembler-not {\tld4w\t} } } */ +/* { dg-final { scan-assembler-not {\tld4d\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c new file mode 100644 index 00000000000..966968d4b91 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void __attribute__((weak)) \ + NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + { \ + dest[i * 2] = src[i]; \ + dest[i * 2 + 1] = src[i]; \ + } \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tst2b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tst2h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tst2w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tst2d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c new file mode 100644 index 00000000000..fd48a4c96f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_1_run.c @@ -0,0 +1,39 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#include "sve_mask_struct_store_1.c" + +#define N 100 + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + { \ + OUTTYPE out[N * 2]; \ + INTYPE in[N]; \ + MASKTYPE mask[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + in[i] = i * 7 / 2; \ + mask[i] = i % 5 <= i % 3; \ + } \ + for (int i = 0; i < N * 2; ++i) \ + out[i] = i * 9 / 2; \ + NAME##_2 (out, in, mask, N); \ + for (int i = 0; i < N * 2; ++i) \ + { \ + OUTTYPE if_true = in[i / 2]; \ + OUTTYPE if_false = i * 9 / 2; \ + if (out[i] != (mask[i / 2] ? if_true : if_false)) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c new file mode 100644 index 00000000000..5359c6a457a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void __attribute__((weak)) \ + NAME##_3 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + { \ + dest[i * 3] = src[i]; \ + dest[i * 3 + 1] = src[i]; \ + dest[i * 3 + 2] = src[i]; \ + } \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tst3b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tst3h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tst3w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tst3d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c new file mode 100644 index 00000000000..f8845ebd7ec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_2_run.c @@ -0,0 +1,39 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#include "sve_mask_struct_store_2.c" + +#define N 100 + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + { \ + OUTTYPE out[N * 3]; \ + INTYPE in[N]; \ + MASKTYPE mask[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + in[i] = i * 7 / 2; \ + mask[i] = i % 5 <= i % 3; \ + } \ + for (int i = 0; i < N * 3; ++i) \ + out[i] = i * 9 / 2; \ + NAME##_3 (out, in, mask, N); \ + for (int i = 0; i < N * 3; ++i) \ + { \ + OUTTYPE if_true = in[i / 3]; \ + OUTTYPE if_false = i * 9 / 2; \ + if (out[i] != (mask[i / 3] ? if_true : if_false)) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c new file mode 100644 index 00000000000..cc614847e7e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3.c @@ -0,0 +1,71 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void __attribute__((weak)) \ + NAME##_4 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + if (cond[i]) \ + { \ + dest[i * 4] = src[i]; \ + dest[i * 4 + 1] = src[i]; \ + dest[i * 4 + 2] = src[i]; \ + dest[i * 4 + 3] = src[i]; \ + } \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 1 1 1 1 + 16 | 1 1 1 1 + 32 | 1 1 1 1 + 64 | 1 1 1 1. */ +/* { dg-final { scan-assembler-times {\tst4b\t.z[0-9]} 16 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 2 2 2 2 + 16 | 2 1 1 1 + 32 | 2 1 1 1 + 64 | 2 1 1 1. */ +/* { dg-final { scan-assembler-times {\tst4h\t.z[0-9]} 23 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 4 4 4 4 + 16 | 4 2 2 2 + 32 | 4 2 1 1 x2 (for float) + 64 | 4 2 1 1. */ +/* { dg-final { scan-assembler-times {\tst4w\t.z[0-9]} 50 } } */ + +/* Mask | 8 16 32 64 + -------+------------ + In 8 | 8 8 8 8 + 16 | 8 4 4 4 + 32 | 8 4 2 2 + 64 | 8 4 2 1 x2 (for double). */ +/* { dg-final { scan-assembler-times {\tst4d\t.z[0-9]} 98 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c new file mode 100644 index 00000000000..f845818fa4d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_3_run.c @@ -0,0 +1,39 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-dce -ffast-math -march=armv8-a+sve" } */ + +#include "sve_mask_struct_store_3.c" + +#define N 100 + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + { \ + OUTTYPE out[N * 4]; \ + INTYPE in[N]; \ + MASKTYPE mask[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + in[i] = i * 7 / 2; \ + mask[i] = i % 5 <= i % 3; \ + } \ + for (int i = 0; i < N * 4; ++i) \ + out[i] = i * 9 / 2; \ + NAME##_4 (out, in, mask, N); \ + for (int i = 0; i < N * 4; ++i) \ + { \ + OUTTYPE if_true = in[i / 4]; \ + OUTTYPE if_false = i * 9 / 2; \ + if (out[i] != (mask[i / 4] ? if_true : if_false)) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c new file mode 100644 index 00000000000..ac2df82c539 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_struct_store_4.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \ + void __attribute__((weak)) \ + NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \ + MASKTYPE *__restrict cond, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + if (cond[i] < 8) \ + dest[i * 2] = src[i]; \ + if (cond[i] > 2) \ + dest[i * 2 + 1] = src[i]; \ + } \ + } + +#define TEST2(NAME, OUTTYPE, INTYPE) \ + TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, signed char) \ + TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, unsigned short) \ + TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \ + TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double) + +#define TEST1(NAME, OUTTYPE) \ + TEST2 (NAME##_i8, OUTTYPE, signed char) \ + TEST2 (NAME##_i16, OUTTYPE, unsigned short) \ + TEST2 (NAME##_i32, OUTTYPE, int) \ + TEST2 (NAME##_i64, OUTTYPE, unsigned long) + +#define TEST(NAME) \ + TEST1 (NAME##_i8, signed char) \ + TEST1 (NAME##_i16, unsigned short) \ + TEST1 (NAME##_i32, int) \ + TEST1 (NAME##_i64, unsigned long) \ + TEST2 (NAME##_f32_f32, float, float) \ + TEST2 (NAME##_f64_f64, double, double) + +TEST (test) + +/* { dg-final { scan-assembler-not {\tst2b\t.z[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tst2h\t.z[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tst2w\t.z[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tst2d\t.z[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1.c b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1.c new file mode 100644 index 00000000000..733ffd1b765 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) + +#define DEF_MAXMIN(TYPE, NAME, CMP_OP) \ +void __attribute__ ((noinline, noclone)) \ +fun_##NAME##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ + TYPE *restrict b) \ +{ \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + r[i] = a[i] CMP_OP b[i] ? a[i] : b[i]; \ +} + +#define TEST_ALL(T) \ + T (int8_t, max, >) \ + T (int16_t, max, >) \ + T (int32_t, max, >) \ + T (int64_t, max, >) \ + T (uint8_t, max, >) \ + T (uint16_t, max, >) \ + T (uint32_t, max, >) \ + T (uint64_t, max, >) \ + T (_Float16, max, >) \ + T (float, max, >) \ + T (double, max, >) \ + \ + T (int8_t, min, <) \ + T (int16_t, min, <) \ + T (int32_t, min, <) \ + T (int64_t, min, <) \ + T (uint8_t, min, <) \ + T (uint16_t, min, <) \ + T (uint32_t, min, <) \ + T (uint64_t, min, <) \ + T (_Float16, min, <) \ + T (float, min, <) \ + T (double, min, <) + +TEST_ALL (DEF_MAXMIN) + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.c new file mode 100644 index 00000000000..d3130bff8fe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_1_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include "sve_maxmin_1.c" + +#define TEST_LOOP(TYPE, NAME, CMP_OP) \ + { \ + TYPE a[NUM_ELEMS (TYPE)]; \ + TYPE b[NUM_ELEMS (TYPE)]; \ + TYPE r[NUM_ELEMS (TYPE)]; \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + { \ + a[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1); \ + b[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1); \ + asm volatile ("" ::: "memory"); \ + } \ + fun_##NAME##_##TYPE (r, a, b); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (r[i] != (a[i] CMP_OP b[i] ? a[i] : b[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1.c b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1.c new file mode 100644 index 00000000000..27561d19694 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <math.h> + +#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) + +#define DEF_MAXMIN(TYPE, FUN) \ +void __attribute__ ((noinline, noclone)) \ +test_##FUN##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ + TYPE *restrict b) \ +{ \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + r[i] = FUN (a[i], b[i]); \ +} + +#define TEST_ALL(T) \ + T (float, fmaxf) \ + T (double, fmax) \ + \ + T (float, fminf) \ + T (double, fmin) + +TEST_ALL (DEF_MAXMIN) + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.c new file mode 100644 index 00000000000..2b869c62a5d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_maxmin_strict_1_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_maxmin_strict_1.c" + +#define TEST_LOOP(TYPE, FUN) \ + { \ + TYPE a[NUM_ELEMS (TYPE)]; \ + TYPE b[NUM_ELEMS (TYPE)]; \ + TYPE r[NUM_ELEMS (TYPE)]; \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + { \ + a[i] = ((i * 2) % 3) * (i & 1 ? 1 : -1); \ + b[i] = (1 + (i % 4)) * (i & 1 ? -1 : 1); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##FUN##_##TYPE (r, a, b); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (r[i] != FUN (a[i], b[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_miniloop_1.c b/gcc/testsuite/gcc.target/aarch64/sve_miniloop_1.c new file mode 100644 index 00000000000..562607271e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_miniloop_1.c @@ -0,0 +1,23 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +void loop (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c, + int * __restrict__ d, int * __restrict__ e, int * __restrict__ f, + int * __restrict__ g, int * __restrict__ h) +{ + int i = 0; + for (i = 0; i < 3; i++) + { + a[i] += i; + b[i] += i; + c[i] += i; + d[i] += i; + e[i] += i; + f[i] += a[i] + 7; + g[i] += b[i] - 3; + h[i] += c[i] + 3; + } +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, } 8 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, } 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_miniloop_2.c b/gcc/testsuite/gcc.target/aarch64/sve_miniloop_2.c new file mode 100644 index 00000000000..0e86004e21e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_miniloop_2.c @@ -0,0 +1,7 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps -msve-vector-bits=256" } */ + +#include "sve_miniloop_1.c" + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, } 8 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, } 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c new file mode 100644 index 00000000000..a4d705e38ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mla_1.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = (src1 * src2) + dst; \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v32qi) +DO_OP (v16hi) +DO_OP (v8si) +DO_OP (v4di) + +/* { dg-final { scan-assembler-times {\tmla\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmla\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmla\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmla\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c new file mode 100644 index 00000000000..b7cc1dba087 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mls_1.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = dst - (src1 * src2); \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v32qi) +DO_OP (v16hi) +DO_OP (v8si) +DO_OP (v4di) + +/* { dg-final { scan-assembler-times {\tmls\tz0\.b, p[0-7]/m, z2\.b, z4\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmls\tz0\.h, p[0-7]/m, z2\.h, z4\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmls\tz0\.s, p[0-7]/m, z2\.s, z4\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmls\tz0\.d, p[0-7]/m, z2\.d, z4\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c new file mode 100644 index 00000000000..a38375af017 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mov_rr_1.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +void sve_copy_rr (void) +{ + typedef int v8si __attribute__((vector_size(32))); + register v8si x asm ("z1"); + register v8si y asm ("z2"); + asm volatile ("#foo" : "=w" (x)); + y = x; + asm volatile ("#foo" :: "w" (y)); +} + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c new file mode 100644 index 00000000000..fc05837a920 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_msb_1.c @@ -0,0 +1,34 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +#define DO_OP(TYPE) \ +void vmla_##TYPE (TYPE *x, TYPE y, TYPE z) \ +{ \ + register TYPE dst asm("z0"); \ + register TYPE src1 asm("z2"); \ + register TYPE src2 asm("z4"); \ + dst = *x; \ + src1 = y; \ + src2 = z; \ + asm volatile ("" :: "w" (dst), "w" (src1), "w" (src2)); \ + dst = src2 - (dst * src1); \ + asm volatile ("" :: "w" (dst)); \ + *x = dst; \ +} + +DO_OP (v32qi) +DO_OP (v16hi) +DO_OP (v8si) +DO_OP (v4di) + +/* { dg-final { scan-assembler-times {\tmsb\tz0\.b, p[0-7]/m, z2\.b, z4\.b} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz0\.h, p[0-7]/m, z2\.h, z4\.h} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz0\.s, p[0-7]/m, z2\.s, z4\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tmsb\tz0\.d, p[0-7]/m, z2\.d, z4\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mul_1.c new file mode 100644 index 00000000000..2b1cd4a7a93 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mul_1.c @@ -0,0 +1,64 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define DO_REGREG_OPS(TYPE, OP, NAME) \ +void varith_##TYPE##_##NAME (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP src[i]; \ +} + +#define DO_IMMEDIATE_OPS(VALUE, TYPE, OP, NAME) \ +void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] OP VALUE; \ +} + +#define DO_ARITH_OPS(TYPE, OP, NAME) \ + DO_REGREG_OPS (TYPE, OP, NAME); \ + DO_IMMEDIATE_OPS (0, TYPE, OP, NAME ## 0); \ + DO_IMMEDIATE_OPS (86, TYPE, OP, NAME ## 86); \ + DO_IMMEDIATE_OPS (109, TYPE, OP, NAME ## 109); \ + DO_IMMEDIATE_OPS (141, TYPE, OP, NAME ## 141); \ + DO_IMMEDIATE_OPS (-1, TYPE, OP, NAME ## minus1); \ + DO_IMMEDIATE_OPS (-110, TYPE, OP, NAME ## minus110); \ + DO_IMMEDIATE_OPS (-141, TYPE, OP, NAME ## minus141); + +DO_ARITH_OPS (int8_t, *, mul) +DO_ARITH_OPS (int16_t, *, mul) +DO_ARITH_OPS (int32_t, *, mul) +DO_ARITH_OPS (int64_t, *, mul) + +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */ + +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */ + +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */ + +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tmul\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_neg_1.c b/gcc/testsuite/gcc.target/aarch64/sve_neg_1.c new file mode 100644 index 00000000000..b463c2c0580 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_neg_1.c @@ -0,0 +1,21 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define DO_OPS(TYPE) \ +void vneg_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = -src[i]; \ +} + +DO_OPS (int8_t) +DO_OPS (int16_t) +DO_OPS (int32_t) +DO_OPS (int64_t) + +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1.c b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1.c new file mode 100644 index 00000000000..3871451bc1d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1.c @@ -0,0 +1,33 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define DO_VNLOGICAL(TYPE) \ +void __attribute__ ((noinline, noclone)) \ +vnlogical_not_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; i++) \ + dst[i] = ~dst[i]; \ +} \ + \ +void __attribute__ ((noinline, noclone)) \ +vnlogical_bic_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; i++) \ + dst[i] = dst[i] & ~src[i]; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) + +TEST_ALL (DO_VNLOGICAL) + +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1_run.c new file mode 100644 index 00000000000..905d44b8265 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_nlogical_1_run.c @@ -0,0 +1,37 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +#include "sve_nlogical_1.c" + +#define N 128 + +#define TEST_VNLOGICAL(TYPE) \ + { \ + TYPE dst[N], src[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + dst[i] = i ^ 42; \ + asm volatile ("" ::: "memory"); \ + } \ + vnlogical_not_##TYPE (dst, N); \ + for (int i = 0; i < N; ++i) \ + if (dst[i] != (TYPE) ~(i ^ 42)) \ + __builtin_abort (); \ + for (int i = 0; i < N; ++i) \ + { \ + dst[i] = i ^ 42; \ + src[i] = i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vnlogical_bic_##TYPE (dst, src, N); \ + for (int i = 0; i < N; ++i) \ + if (dst[i] != (TYPE) ((i ^ 42) & ~(i % 5))) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (TEST_VNLOGICAL) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c new file mode 100644 index 00000000000..8f50308ebd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_nopeel_1.c @@ -0,0 +1,36 @@ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ + +#define TEST(NAME, TYPE, ITYPE) \ + void \ + NAME##1 (TYPE *x, ITYPE n) \ + { \ + for (ITYPE i = 0; i < n; ++i) \ + x[i] += 1; \ + } \ + TYPE NAME##_array[1024]; \ + void \ + NAME##2 (void) \ + { \ + for (ITYPE i = 1; i < 200; ++i) \ + NAME##_array[i] += 1; \ + } + +TEST (sc, signed char, unsigned char) +TEST (uc, unsigned char, unsigned char) +TEST (ss, signed short, unsigned short) +TEST (us, unsigned short, signed short) +TEST (si, signed int, signed int) +TEST (ui, unsigned int, unsigned int) +TEST (sl, signed long, unsigned long) +TEST (ul, unsigned long, signed long) +TEST (f, float, int) +TEST (d, double, long) + +/* No scalar memory accesses. */ +/* { dg-final { scan-assembler-not {[wx][0-9]*, \[} } } */ +/* 2 for each NAME##1 test, one in the header and one in the main loop + and 1 for each NAME##2 test, in the main loop only. */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b,} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 9 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 9 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_1.c new file mode 100644 index 00000000000..723b4e3433b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define PACK(TYPED, TYPES) \ +void __attribute__ ((noinline, noclone)) \ +pack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ +{ \ + for (int i = 0; i < size; i++) \ + d[i] = s[i] + 1; \ +} + +#define TEST_ALL(T) \ + T (int32_t, int64_t) \ + T (int16_t, int32_t) \ + T (int8_t, int16_t) \ + T (uint32_t, uint64_t) \ + T (uint16_t, uint32_t) \ + T (uint8_t, uint16_t) + +TEST_ALL (PACK) + +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_1_run.c new file mode 100644 index 00000000000..cb7876cb135 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_pack_1.c" + +#define ARRAY_SIZE 57 + +#define TEST_LOOP(TYPED, TYPES) \ + { \ + TYPED arrayd[ARRAY_SIZE]; \ + TYPES arrays[ARRAY_SIZE]; \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + { \ + arrays[i] = (i - 10) * 3; \ + asm volatile ("" ::: "memory"); \ + } \ + pack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1.c new file mode 100644 index 00000000000..a99d227e4c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +pack_int_double_plus_3 (int32_t *d, double *s, int size) +{ + for (int i = 0; i < size; i++) + d[i] = s[i] + 3; +} + +/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1_run.c new file mode 100644 index 00000000000..2a45bb5b1e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_signed_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_pack_fcvt_signed_1.c" + +#define ARRAY_SIZE 107 + +#define VAL1 ((i * 345.434) - (19 * 345.434)) + +int __attribute__ ((optimize (1))) +main (void) +{ + static int32_t array_dest[ARRAY_SIZE]; + double array_source[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } + + pack_int_double_plus_3 (array_dest, array_source, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest[i] != (int32_t) VAL1 + 3) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1.c new file mode 100644 index 00000000000..a039d6fdd66 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +pack_int_double_plus_7 (uint32_t *d, double *s, int size) +{ + for (int i = 0; i < size; i++) + d[i] = s[i] + 7; +} + +/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1_run.c new file mode 100644 index 00000000000..8a1e72485ad --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_fcvt_unsigned_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_pack_fcvt_unsigned_1.c" + +#define ARRAY_SIZE 157 + +#define VAL1 (i * 9584.3432) + +int __attribute__ ((optimize (1))) +main (void) +{ + static uint32_t array_dest[ARRAY_SIZE]; + double array_source[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } + + pack_int_double_plus_7 (array_dest, array_source, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest[i] != (uint32_t) VAL1 + 7) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1.c new file mode 100644 index 00000000000..746154e530d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +void __attribute__ ((noinline, noclone)) +pack_float_plus_1point1 (float *d, double *s, int size) +{ + for (int i = 0; i < size; i++) + d[i] = s[i] + 1.1; +} + +/* { dg-final { scan-assembler-times {\tfcvt\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1_run.c new file mode 100644 index 00000000000..91e8a699f0b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_pack_float_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_pack_float_1.c" + +#define ARRAY_SIZE 107 + +#define VAL1 ((i * 886.556) - (43 * 886.556)) + +int __attribute__ ((optimize (1))) +main (void) +{ + float array_dest[ARRAY_SIZE]; + double array_source[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } + + pack_float_plus_1point1 (array_dest, array_source, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest[i] != (float) (VAL1 + 1.1)) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c new file mode 100644 index 00000000000..a39f8241f46 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* Pick an arbitrary target for which unaligned accesses are more + expensive. */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" } */ + +#define N 512 +#define START 1 +#define END 505 + +int x[N] __attribute__((aligned(32))); + +void __attribute__((weak)) +foo (void) +{ + unsigned int v = 0; + for (unsigned int i = START; i < END; ++i) + { + x[i] = v; + v += 5; + } +} + +/* We should operate on aligned vectors. */ +/* { dg-final { scan-assembler {\tadrp\tx[0-9]+, x\n} } } */ +/* We should use an induction that starts at -5, with only the last + 7 elements of the first iteration being active. */ +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #-5, #5\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c new file mode 100644 index 00000000000..1ebaeea2bb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_1_run.c @@ -0,0 +1,20 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */ + +#include "sve_peel_ind_1.c" + +volatile int y; + +int +main (void) +{ + foo (); + for (int i = 0; i < N; ++i) + { + if (x[i] != (i < START || i >= END ? 0 : (i - START) * 5)) + __builtin_abort (); + y++; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c new file mode 100644 index 00000000000..9ef8c7f85e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* Pick an arbitrary target for which unaligned accesses are more + expensive. */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" } */ + +#define N 512 +#define START 7 +#define END 22 + +int x[N] __attribute__((aligned(32))); + +void __attribute__((weak)) +foo (void) +{ + for (unsigned int i = START; i < END; ++i) + x[i] = i; +} + +/* We should operate on aligned vectors. */ +/* { dg-final { scan-assembler {\tadrp\tx[0-9]+, x\n} } } */ +/* We should unroll the loop three times. */ +/* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c new file mode 100644 index 00000000000..b3e56bbbb7c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_2_run.c @@ -0,0 +1,20 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */ + +#include "sve_peel_ind_2.c" + +volatile int y; + +int +main (void) +{ + foo (); + for (int i = 0; i < N; ++i) + { + if (x[i] != (i < START || i >= END ? 0 : i)) + __builtin_abort (); + y++; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c new file mode 100644 index 00000000000..97a29f18361 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* Pick an arbitrary target for which unaligned accesses are more + expensive. */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" } */ + +#define N 32 +#define MAX_START 8 +#define COUNT 16 + +int x[MAX_START][N] __attribute__((aligned(32))); + +void __attribute__((weak)) +foo (int start) +{ + for (int i = start; i < start + COUNT; ++i) + x[start][i] = i; +} + +/* We should operate on aligned vectors. */ +/* { dg-final { scan-assembler {\tadrp\tx[0-9]+, x\n} } } */ +/* { dg-final { scan-assembler {\tubfx\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c new file mode 100644 index 00000000000..9851c1cce64 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_3_run.c @@ -0,0 +1,23 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 -march=armv8-a+sve -mtune=thunderx" } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx" { target aarch64_sve256_hw } } */ + +#include "sve_peel_ind_3.c" + +volatile int y; + +int +main (void) +{ + for (int start = 0; start < MAX_START; ++start) + { + foo (start); + for (int i = 0; i < N; ++i) + { + if (x[start][i] != (i < start || i >= start + COUNT ? 0 : i)) + __builtin_abort (); + y++; + } + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c new file mode 100644 index 00000000000..e5c55877341 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* Pick an arbitrary target for which unaligned accesses are more + expensive. */ +/* { dg-options "-Ofast -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" } */ + +#define START 1 +#define END 505 + +void __attribute__((weak)) +foo (double *x) +{ + double v = 10.0; + for (unsigned int i = START; i < END; ++i) + { + x[i] = v; + v += 5.0; + } +} + +/* We should operate on aligned vectors. */ +/* { dg-final { scan-assembler {\tubfx\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c new file mode 100644 index 00000000000..60be4a038de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_peel_ind_4_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-Ofast -march=armv8-a+sve -mtune=thunderx -fno-vect-cost-model" } */ +/* { dg-options "-Ofast -march=armv8-a+sve -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" { target aarch64_sve256_hw } } */ + +#include "sve_peel_ind_4.c" + +volatile int y; + +int +main (void) +{ + double x[END + 1]; + for (int i = 0; i < END + 1; ++i) + x[i] = i; + foo (x); + for (int i = 0; i < END + 1; ++i) + { + double expected; + if (i < START || i >= END) + expected = i; + else + expected = 10 + (i - START) * 5; + if (x[i] != expected) + __builtin_abort (); + y++; + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_popcount_1.c b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1.c new file mode 100644 index 00000000000..c3bb2756b2a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1.c @@ -0,0 +1,22 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +popcount_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) +{ + for (int i = 0; i < size; ++i) + dst[i] = __builtin_popcount (src[i]); +} + +void __attribute__ ((noinline, noclone)) +popcount_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) +{ + for (int i = 0; i < size; ++i) + dst[i] = __builtin_popcountl (src[i]); +} + +/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_popcount_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1_run.c new file mode 100644 index 00000000000..6be828fa81a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_popcount_1_run.c @@ -0,0 +1,50 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_popcount_1.c" + +extern void abort (void) __attribute__ ((noreturn)); + +unsigned int data[] = { + 0x11111100, 6, + 0xe0e0f0f0, 14, + 0x9900aab3, 13, + 0x00040003, 3, + 0x000e000c, 5, + 0x22227777, 16, + 0x12341234, 10, + 0x0, 0 +}; + +int __attribute__ ((optimize (1))) +main (void) +{ + unsigned int count = sizeof (data) / sizeof (data[0]) / 2; + + uint32_t in32[count]; + unsigned int out32[count]; + for (unsigned int i = 0; i < count; ++i) + { + in32[i] = data[i * 2]; + asm volatile ("" ::: "memory"); + } + popcount_32 (out32, in32, count); + for (unsigned int i = 0; i < count; ++i) + if (out32[i] != data[i * 2 + 1]) + abort (); + + count /= 2; + uint64_t in64[count]; + unsigned int out64[count]; + for (unsigned int i = 0; i < count; ++i) + { + in64[i] = ((uint64_t) data[i * 4] << 32) | data[i * 4 + 2]; + asm volatile ("" ::: "memory"); + } + popcount_64 (out64, in64, count); + for (unsigned int i = 0; i < count; ++i) + if (out64[i] != data[i * 4 + 1] + data[i * 4 + 3]) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c new file mode 100644 index 00000000000..4c26e78fae8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c @@ -0,0 +1,205 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_REDUC_PLUS(TYPE) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_plus_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 0; \ + for (int i = 0; i < n; ++i) \ + r += a[i]; \ + return r; \ +} + +#define TEST_PLUS(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) \ + T (uint8_t) \ + T (uint16_t) \ + T (uint32_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_PLUS (DEF_REDUC_PLUS) + +#define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 13; \ + for (int i = 0; i < n; ++i) \ + r = a[i] CMP_OP r ? a[i] : r; \ + return r; \ +} + +#define TEST_MAXMIN(T) \ + T (int8_t, max, >) \ + T (int16_t, max, >) \ + T (int32_t, max, >) \ + T (int64_t, max, >) \ + T (uint8_t, max, >) \ + T (uint16_t, max, >) \ + T (uint32_t, max, >) \ + T (uint64_t, max, >) \ + T (_Float16, max, >) \ + T (float, max, >) \ + T (double, max, >) \ + \ + T (int8_t, min, <) \ + T (int16_t, min, <) \ + T (int32_t, min, <) \ + T (int64_t, min, <) \ + T (uint8_t, min, <) \ + T (uint16_t, min, <) \ + T (uint32_t, min, <) \ + T (uint64_t, min, <) \ + T (_Float16, min, <) \ + T (float, min, <) \ + T (double, min, <) + +TEST_MAXMIN (DEF_REDUC_MAXMIN) + +#define DEF_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ +TYPE __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE *a, int n) \ +{ \ + TYPE r = 13; \ + for (int i = 0; i < n; ++i) \ + r BIT_OP a[i]; \ + return r; \ +} + +#define TEST_BITWISE(T) \ + T (int8_t, and, &=) \ + T (int16_t, and, &=) \ + T (int32_t, and, &=) \ + T (int64_t, and, &=) \ + T (uint8_t, and, &=) \ + T (uint16_t, and, &=) \ + T (uint32_t, and, &=) \ + T (uint64_t, and, &=) \ + \ + T (int8_t, ior, |=) \ + T (int16_t, ior, |=) \ + T (int32_t, ior, |=) \ + T (int64_t, ior, |=) \ + T (uint8_t, ior, |=) \ + T (uint16_t, ior, |=) \ + T (uint32_t, ior, |=) \ + T (uint64_t, ior, |=) \ + \ + T (int8_t, xor, ^=) \ + T (int16_t, xor, ^=) \ + T (int32_t, xor, ^=) \ + T (int64_t, xor, ^=) \ + T (uint8_t, xor, ^=) \ + T (uint16_t, xor, ^=) \ + T (uint32_t, xor, ^=) \ + T (uint64_t, xor, ^=) + +TEST_BITWISE (DEF_REDUC_BITWISE) + +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.c new file mode 100644 index 00000000000..9f4afbcf3a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1_run.c @@ -0,0 +1,56 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include "sve_reduc_1.c" + +#define NUM_ELEMS(TYPE) (73 + sizeof (TYPE)) + +#define INIT_VECTOR(TYPE) \ + TYPE a[NUM_ELEMS (TYPE) + 1]; \ + for (int i = 0; i < NUM_ELEMS (TYPE) + 1; i++) \ + { \ + a[i] = ((i * 2) * (i & 1 ? 1 : -1) | 3); \ + asm volatile ("" ::: "memory"); \ + } + +#define TEST_REDUC_PLUS(TYPE) \ + { \ + INIT_VECTOR (TYPE); \ + TYPE r1 = reduc_plus_##TYPE (a, NUM_ELEMS (TYPE)); \ + volatile TYPE r2 = 0; \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + r2 += a[i]; \ + if (r1 != r2) \ + __builtin_abort (); \ + } + +#define TEST_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ + { \ + INIT_VECTOR (TYPE); \ + TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE)); \ + volatile TYPE r2 = 13; \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + r2 = a[i] CMP_OP r2 ? a[i] : r2; \ + if (r1 != r2) \ + __builtin_abort (); \ + } + +#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ + { \ + INIT_VECTOR (TYPE); \ + TYPE r1 = reduc_##NAME##_##TYPE (a, NUM_ELEMS (TYPE)); \ + volatile TYPE r2 = 13; \ + for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ + r2 BIT_OP a[i]; \ + if (r1 != r2) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_PLUS (TEST_REDUC_PLUS) + TEST_MAXMIN (TEST_REDUC_MAXMIN) + TEST_BITWISE (TEST_REDUC_BITWISE) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2.c new file mode 100644 index 00000000000..669306549d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2.c @@ -0,0 +1,164 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (1024 / sizeof (TYPE)) + +#define DEF_REDUC_PLUS(TYPE) \ +void __attribute__ ((noinline, noclone)) \ +reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = 0; \ + for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ + r[i] += a[i][j]; \ + } \ +} + +#define TEST_PLUS(T) \ + T (int8_t) \ + T (int16_t) \ + T (int32_t) \ + T (int64_t) \ + T (uint8_t) \ + T (uint16_t) \ + T (uint32_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_PLUS (DEF_REDUC_PLUS) + +#define DEF_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ +void __attribute__ ((noinline, noclone)) \ +reduc_##NAME##_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = a[i][0]; \ + for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ + r[i] = a[i][j] CMP_OP r[i] ? a[i][j] : r[i]; \ + } \ +} + +#define TEST_MAXMIN(T) \ + T (int8_t, max, >) \ + T (int16_t, max, >) \ + T (int32_t, max, >) \ + T (int64_t, max, >) \ + T (uint8_t, max, >) \ + T (uint16_t, max, >) \ + T (uint32_t, max, >) \ + T (uint64_t, max, >) \ + T (_Float16, max, >) \ + T (float, max, >) \ + T (double, max, >) \ + \ + T (int8_t, min, <) \ + T (int16_t, min, <) \ + T (int32_t, min, <) \ + T (int64_t, min, <) \ + T (uint8_t, min, <) \ + T (uint16_t, min, <) \ + T (uint32_t, min, <) \ + T (uint64_t, min, <) \ + T (_Float16, min, <) \ + T (float, min, <) \ + T (double, min, <) + +TEST_MAXMIN (DEF_REDUC_MAXMIN) + +#define DEF_REDUC_BITWISE(TYPE,NAME,BIT_OP) \ +void __attribute__ ((noinline, noclone)) \ +reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \ + TYPE *restrict r, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + r[i] = a[i][0]; \ + for (int j = 0; j < NUM_ELEMS(TYPE); j++) \ + r[i] BIT_OP a[i][j]; \ + } \ +} + +#define TEST_BITWISE(T) \ + T (int8_t, and, &=) \ + T (int16_t, and, &=) \ + T (int32_t, and, &=) \ + T (int64_t, and, &=) \ + T (uint8_t, and, &=) \ + T (uint16_t, and, &=) \ + T (uint32_t, and, &=) \ + T (uint64_t, and, &=) \ + \ + T (int8_t, ior, |=) \ + T (int16_t, ior, |=) \ + T (int32_t, ior, |=) \ + T (int64_t, ior, |=) \ + T (uint8_t, ior, |=) \ + T (uint16_t, ior, |=) \ + T (uint32_t, ior, |=) \ + T (uint64_t, ior, |=) \ + \ + T (int8_t, xor, ^=) \ + T (int16_t, xor, ^=) \ + T (int32_t, xor, ^=) \ + T (int64_t, xor, ^=) \ + T (uint8_t, xor, ^=) \ + T (uint16_t, xor, ^=) \ + T (uint32_t, xor, ^=) \ + T (uint64_t, xor, ^=) + +TEST_BITWISE (DEF_REDUC_BITWISE) + +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsmaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumaxv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuminv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.c new file mode 100644 index 00000000000..041db66c8cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_2_run.c @@ -0,0 +1,79 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include "sve_reduc_2.c" + +#define NROWS 53 + +/* -ffast-math fuzz for PLUS. */ +#define CMP__Float16(X, Y) ((X) >= (Y) * 0.875 && (X) <= (Y) * 1.125) +#define CMP_float(X, Y) ((X) == (Y)) +#define CMP_double(X, Y) ((X) == (Y)) +#define CMP_int8_t(X, Y) ((X) == (Y)) +#define CMP_int16_t(X, Y) ((X) == (Y)) +#define CMP_int32_t(X, Y) ((X) == (Y)) +#define CMP_int64_t(X, Y) ((X) == (Y)) +#define CMP_uint8_t(X, Y) ((X) == (Y)) +#define CMP_uint16_t(X, Y) ((X) == (Y)) +#define CMP_uint32_t(X, Y) ((X) == (Y)) +#define CMP_uint64_t(X, Y) ((X) == (Y)) + +#define INIT_MATRIX(TYPE) \ + TYPE mat[NROWS][NUM_ELEMS (TYPE)]; \ + TYPE r[NROWS]; \ + for (int i = 0; i < NROWS; i++) \ + for (int j = 0; j < NUM_ELEMS (TYPE); j++) \ + { \ + mat[i][j] = i + (j * 2) * (j & 1 ? 1 : -1); \ + asm volatile ("" ::: "memory"); \ + } + +#define TEST_REDUC_PLUS(TYPE) \ + { \ + INIT_MATRIX (TYPE); \ + reduc_plus_##TYPE (mat, r, NROWS); \ + for (int i = 0; i < NROWS; i++) \ + { \ + volatile TYPE r2 = 0; \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + r2 += mat[i][j]; \ + if (!CMP_##TYPE (r[i], r2)) \ + __builtin_abort (); \ + } \ + } + +#define TEST_REDUC_MAXMIN(TYPE, NAME, CMP_OP) \ + { \ + INIT_MATRIX (TYPE); \ + reduc_##NAME##_##TYPE (mat, r, NROWS); \ + for (int i = 0; i < NROWS; i++) \ + { \ + volatile TYPE r2 = mat[i][0]; \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + r2 = mat[i][j] CMP_OP r2 ? mat[i][j] : r2; \ + if (r[i] != r2) \ + __builtin_abort (); \ + } \ + } + +#define TEST_REDUC_BITWISE(TYPE, NAME, BIT_OP) \ + { \ + INIT_MATRIX (TYPE); \ + reduc_##NAME##_##TYPE (mat, r, NROWS); \ + for (int i = 0; i < NROWS; i++) \ + { \ + volatile TYPE r2 = mat[i][0]; \ + for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \ + r2 BIT_OP mat[i][j]; \ + if (r[i] != r2) \ + __builtin_abort (); \ + } \ + } + +int main () +{ + TEST_PLUS (TEST_REDUC_PLUS) + TEST_MAXMIN (TEST_REDUC_MAXMIN) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_3.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_3.c new file mode 100644 index 00000000000..7daf3ae130e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_3.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (32 / sizeof (TYPE)) + +#define REDUC_PTR(DSTTYPE, SRCTYPE) \ +void reduc_ptr_##DSTTYPE##_##SRCTYPE (DSTTYPE *restrict sum, \ + SRCTYPE *restrict array, \ + int count) \ +{ \ + *sum = 0; \ + for (int i = 0; i < count; ++i) \ + *sum += array[i]; \ +} + +REDUC_PTR (int8_t, int8_t) +REDUC_PTR (int16_t, int16_t) + +REDUC_PTR (int32_t, int32_t) +REDUC_PTR (int64_t, int64_t) + +REDUC_PTR (_Float16, _Float16) +REDUC_PTR (float, float) +REDUC_PTR (double, double) + +/* Widening reductions. */ +REDUC_PTR (int32_t, int8_t) +REDUC_PTR (int32_t, int16_t) + +REDUC_PTR (int64_t, int8_t) +REDUC_PTR (int64_t, int16_t) +REDUC_PTR (int64_t, int32_t) + +REDUC_PTR (float, _Float16) +REDUC_PTR (double, float) + +/* Float<>Int conversions */ +REDUC_PTR (_Float16, int16_t) +REDUC_PTR (float, int32_t) +REDUC_PTR (double, int64_t) + +REDUC_PTR (int16_t, _Float16) +REDUC_PTR (int32_t, float) +REDUC_PTR (int64_t, double) + +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c new file mode 100644 index 00000000000..9e997adedca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +double +f (double *restrict a, double *restrict b, int *lookup) +{ + double res = 0.0; + for (int i = 0; i < 512; ++i) + res += a[lookup[i]] * b[i]; + return res; +} + +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+.d, p[0-7]/m, } 2 } } */ +/* Check that the vector instructions are the only instructions. */ +/* { dg-final { scan-assembler-times {\tfmla\t} 2 } } */ +/* { dg-final { scan-assembler-not {\tfadd\t} } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td0,} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_5.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_5.c new file mode 100644 index 00000000000..59363aac56b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define REDUC(TYPE) \ + TYPE reduc_##TYPE (TYPE *x, int count) \ + { \ + TYPE sum = 0; \ + for (int i = 0; i < count; ++i) \ + sum -= x[i]; \ + return sum; \ + } + +REDUC (int8_t) +REDUC (uint8_t) +REDUC (int16_t) +REDUC (uint16_t) +REDUC (int32_t) +REDUC (uint32_t) +REDUC (int64_t) +REDUC (uint64_t) +REDUC (float) +REDUC (double) + +/* XFAILed until we support sub-int reductions for signed types. */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m} 1 } } */ + +/* XFAILed until we support sub-int reductions for signed types. */ +/* { dg-final { scan-assembler-times {\tsub\t} 8 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_6.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_6.c new file mode 100644 index 00000000000..e1f72941de4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_6.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#define REDUC(TYPE) \ + TYPE reduc_##TYPE (TYPE *x, TYPE *y, int count) \ + { \ + TYPE sum = 0; \ + for (int i = 0; i < count; ++i) \ + sum += x[i] * y[i]; \ + return sum; \ + } + +REDUC (float) +REDUC (double) + +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_7.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_7.c new file mode 100644 index 00000000000..851f52d2cbd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_7.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#define REDUC(TYPE) \ + TYPE reduc_##TYPE (TYPE *x, TYPE *y, int count) \ + { \ + TYPE sum = 0; \ + for (int i = 0; i < count; ++i) \ + sum -= x[i] * y[i]; \ + return sum; \ + } + +REDUC (float) +REDUC (double) + +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C new file mode 100644 index 00000000000..53e10bcea01 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1.C @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <math.h> + +#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3) + +#define DEF_REDUC_PLUS(TYPE)\ +TYPE reduc_plus_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b)\ +{\ + TYPE r = 0, q = 3;\ + for (int i = 0; i < NUM_ELEMS(TYPE); i++)\ + {\ + r += a[i];\ + q -= b[i];\ + }\ + return r * q;\ +}\ + +DEF_REDUC_PLUS (float) +DEF_REDUC_PLUS (double) + +#define DEF_REDUC_MAXMIN(TYPE,FUN)\ +TYPE reduc_##FUN (TYPE *__restrict__ a, TYPE *__restrict__ b)\ +{\ + TYPE r = a[0], q = b[0];\ + for (int i = 0; i < NUM_ELEMS(TYPE); i++)\ + {\ + r = FUN (a[i], r);\ + q = FUN (b[i], q);\ + }\ + return r * q;\ +}\ + +DEF_REDUC_MAXMIN (float, fmaxf) +DEF_REDUC_MAXMIN (double, fmax) +DEF_REDUC_MAXMIN (float, fminf) +DEF_REDUC_MAXMIN (double, fmin) + + +/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 { xfail *-*-* } } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C new file mode 100644 index 00000000000..769d25165ea --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_1_run.C @@ -0,0 +1,47 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_reduc_strict_1.C" +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#define DEF_INIT_VECTOR(TYPE)\ + TYPE a_##TYPE[NUM_ELEMS (TYPE)];\ + TYPE b_##TYPE[NUM_ELEMS (TYPE)];\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + {\ + a_##TYPE[i] = (i * 2) * (i & 1 ? 1 : -1);\ + b_##TYPE[i] = (i * 3) * (i & 1 ? 1 : -1);\ + } + +#define TEST_REDUC_PLUS(RES,TYPE) (RES) += reduc_plus_##TYPE (a_##TYPE, b_##TYPE); +#define TEST_REDUC_MAX(RES,TYPE) (RES) += reduc_fmax (a_##TYPE, b_##TYPE); +#define TEST_REDUC_MAXF(RES,TYPE) (RES) += reduc_fmaxf (a_##TYPE, b_##TYPE); +#define TEST_REDUC_MIN(RES,TYPE) (RES) += reduc_fmin (a_##TYPE, b_##TYPE); +#define TEST_REDUC_MINF(RES,TYPE) (RES) += reduc_fminf (a_##TYPE, b_##TYPE); + +int main () +{ + double result = 0.0; + DEF_INIT_VECTOR (float) + DEF_INIT_VECTOR (double) + + TEST_REDUC_PLUS (result, float) + TEST_REDUC_PLUS (result, double) + + TEST_REDUC_MINF (result, float) + TEST_REDUC_MIN (result, double) + + TEST_REDUC_MAXF (result, float) + TEST_REDUC_MAX (result, double) + + if (result != double (1356996)) + { + fprintf (stderr, "result = %1.16lf\n", result); + abort (); + } + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C new file mode 100644 index 00000000000..542918abeb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2.C @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* FIXME: With -O3 we don't generate reductions as the compiler unrolls the outer loop + and processes the rows in parallel, performing in order reductions on the inner loop. */ +/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <math.h> + +#define NUM_ELEMS(TYPE) (int)(5 * (256 / sizeof (TYPE)) + 3) + +/* TODO: Test with inner loop = n * NUM_ELEMS(TYPE). */ +#define DEF_REDUC_PLUS(TYPE)\ +void reduc_plus_##TYPE (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\ +{\ + for (int i = 0; i < n; i++)\ + {\ + r[i] = 0;\ + for (int j = 0; j < NUM_ELEMS(TYPE); j++)\ + r[i] += a[i][j];\ + }\ +}\ + +DEF_REDUC_PLUS (float) +DEF_REDUC_PLUS (double) + +#define DEF_REDUC_MAXMIN(TYPE,FUN)\ +void reduc_##FUN (TYPE (*__restrict__ a)[NUM_ELEMS(TYPE)], TYPE *__restrict__ r, int n)\ +{\ + for (int i = 0; i < n; i++)\ + {\ + r[i] = a[i][0];\ + for (int j = 0; j < NUM_ELEMS(TYPE); j++)\ + r[i] = FUN (a[i][j], r[i]);\ + }\ +}\ + +DEF_REDUC_MAXMIN (float, fmaxf) +DEF_REDUC_MAXMIN (double, fmax) +DEF_REDUC_MAXMIN (float, fminf) +DEF_REDUC_MAXMIN (double, fmin) + +/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s} 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d} 1 { xfail *-*-* } } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C new file mode 100644 index 00000000000..86a930c7d33 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_2_run.C @@ -0,0 +1,59 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_reduc_strict_2.C" +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#define NROWS 5 + +#define DEF_INIT_VECTOR(TYPE)\ + TYPE mat_##TYPE[NROWS][NUM_ELEMS (TYPE)];\ + TYPE r_##TYPE[NROWS];\ + for (int i = 0; i < NROWS; i++)\ + for (int j = 0; j < NUM_ELEMS (TYPE); j++ )\ + mat_##TYPE[i][j] = i + (j * 2) * (j & 1 ? 1 : -1);\ + +#define TEST_REDUC_PLUS(TYPE) reduc_plus_##TYPE (mat_##TYPE, r_##TYPE, NROWS); +#define TEST_REDUC_MAXF reduc_fmaxf (mat_float, r_float, NROWS); +#define TEST_REDUC_MAX reduc_fmax (mat_double, r_double, NROWS); +#define TEST_REDUC_MINF reduc_fminf (mat_float, r_float, NROWS); +#define TEST_REDUC_MIN reduc_fmin (mat_double, r_double, NROWS); + +#define SUM_VECTOR(RES, TYPE)\ + for (int i = 0; i < NROWS; i++)\ + (RES) += r_##TYPE[i]; + +#define SUM_FLOAT_RESULT(RES)\ + SUM_VECTOR (RES, float);\ + SUM_VECTOR (RES, double);\ + +int main () +{ + double resultF = 0.0; + DEF_INIT_VECTOR (float) + DEF_INIT_VECTOR (double) + + TEST_REDUC_PLUS (float) + TEST_REDUC_PLUS (double) + SUM_FLOAT_RESULT (resultF); + + TEST_REDUC_MAXF + TEST_REDUC_MAX + SUM_FLOAT_RESULT (resultF); + + TEST_REDUC_MINF + TEST_REDUC_MIN + SUM_FLOAT_RESULT (resultF); + + if (resultF != double (2460)) + { + fprintf (stderr, "resultF = %1.16lf\n", resultF); + abort (); + } + + return 0; +} + + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C new file mode 100644 index 00000000000..338aa614b47 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_strict_3.C @@ -0,0 +1,121 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c++11 -O2 -ftree-vectorize -fno-inline -march=armv8-a+sve -msve-vector-bits=256 -fdump-tree-vect-details" } */ + +double mat[100][4]; +double mat2[100][8]; +double mat3[100][12]; +double mat4[100][3]; + +double slp_reduc_plus (int n) +{ + double tmp = 0.0; + for (int i = 0; i < n; i++) + { + tmp = tmp + mat[i][0]; + tmp = tmp + mat[i][1]; + tmp = tmp + mat[i][2]; + tmp = tmp + mat[i][3]; + } + return tmp; +} + +double slp_reduc_plus2 (int n) +{ + double tmp = 0.0; + for (int i = 0; i < n; i++) + { + tmp = tmp + mat2[i][0]; + tmp = tmp + mat2[i][1]; + tmp = tmp + mat2[i][2]; + tmp = tmp + mat2[i][3]; + tmp = tmp + mat2[i][4]; + tmp = tmp + mat2[i][5]; + tmp = tmp + mat2[i][6]; + tmp = tmp + mat2[i][7]; + } + return tmp; +} + +double slp_reduc_plus3 (int n) +{ + double tmp = 0.0; + for (int i = 0; i < n; i++) + { + tmp = tmp + mat3[i][0]; + tmp = tmp + mat3[i][1]; + tmp = tmp + mat3[i][2]; + tmp = tmp + mat3[i][3]; + tmp = tmp + mat3[i][4]; + tmp = tmp + mat3[i][5]; + tmp = tmp + mat3[i][6]; + tmp = tmp + mat3[i][7]; + tmp = tmp + mat3[i][8]; + tmp = tmp + mat3[i][9]; + tmp = tmp + mat3[i][10]; + tmp = tmp + mat3[i][11]; + } + return tmp; +} + +void slp_non_chained_reduc (int n, double * __restrict__ out) +{ + for (int i = 0; i < 3; i++) + out[i] = 0; + + for (int i = 0; i < n; i++) + { + out[0] = out[0] + mat4[i][0]; + out[1] = out[1] + mat4[i][1]; + out[2] = out[2] + mat4[i][2]; + } +} + +/* Strict FP reductions shouldn't be used for the outer loops, only the + inner loops. */ + +float double_reduc1 (float (*__restrict__ i)[16]) +{ + float l = 0; + + for (int a = 0; a < 8; a++) + for (int b = 0; b < 8; b++) + l += i[b][a]; + return l; +} + +float double_reduc2 (float *__restrict__ i) +{ + float l = 0; + + for (int a = 0; a < 8; a++) + for (int b = 0; b < 16; b++) + { + l += i[b * 4]; + l += i[b * 4 + 1]; + l += i[b * 4 + 2]; + l += i[b * 4 + 3]; + } + return l; +} + +float double_reduc3 (float *__restrict__ i, float *__restrict__ j) +{ + float k = 0, l = 0; + + for (int a = 0; a < 8; a++) + for (int b = 0; b < 8; b++) + { + k += i[b]; + l += j[b]; + } + return l * k; +} + +/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 9 } } */ +/* 1 reduction each for double_reduc{1,2} and 2 for double_reduc3. Each one + is reported three times, once for SVE, once for 128-bit AdvSIMD and once + for 64-bit AdvSIMD. */ +/* { dg-final { scan-tree-dump-times "Detected double reduction" 12 "vect" } } */ +/* double_reduc2 has 2 reductions and slp_non_chained_reduc has 3. */ +/* { dg-final { scan-tree-dump-times "Detected reduction" 10 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c new file mode 100644 index 00000000000..9307200fb05 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_revb_1.c @@ -0,0 +1,35 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int8_t v32qi __attribute__((vector_size (32))); + +#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) +#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) +#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y) +#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y) +#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y) + +#define INDEX_32 v32qi + +#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \ + TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle \ + (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (0, REV_NUNITS - 1) })); \ + } + +#define TEST_ALL(T) \ + T (v32qi, 32, 2) \ + T (v32qi, 32, 4) \ + T (v32qi, 32, 8) + +TEST_ALL (PERMUTE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 1 } } */ +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c new file mode 100644 index 00000000000..fb238373c4e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_revh_1.c @@ -0,0 +1,36 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef uint16_t v16hi __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) +#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) +#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y) +#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y) +#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y) + +#define INDEX_16 v16hi + +#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \ + TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle \ + (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (0, REV_NUNITS - 1) })); \ + } + +#define TEST_ALL(T) \ + T (v16hi, 16, 2) \ + T (v16hi, 16, 4) \ + T (v16hf, 16, 2) \ + T (v16hf, 16, 4) + +TEST_ALL (PERMUTE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c new file mode 100644 index 00000000000..4834e2c2b01 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_revw_1.c @@ -0,0 +1,31 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef uint32_t v8si __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); + +#define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) +#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) +#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y) + +#define INDEX_8 v8si + +#define PERMUTE(TYPE, NUNITS, REV_NUNITS) \ + TYPE permute_##TYPE##_##REV_NUNITS (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle \ + (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (0, REV_NUNITS - 1) })); \ + } + +#define TEST_ALL(T) \ + T (v8si, 8, 2) \ + T (v8sf, 8, 2) + +TEST_ALL (PERMUTE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\trevw\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c new file mode 100644 index 00000000000..2270be2bd29 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1.c @@ -0,0 +1,109 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#define SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE)\ +void scatter_store1##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + dst[i * STRIDE] = src[i];\ +} + +#define SCATTER_STORE2(OBJTYPE,STRIDETYPE)\ +void scatter_store2##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + dst[i * stride] = src[i];\ +} + +#define SCATTER_STORE3(OBJTYPE,STRIDETYPE)\ +void scatter_store3s5##OBJTYPE##STRIDETYPE\ + (OBJTYPE * restrict dst, OBJTYPE * restrict s1, OBJTYPE * restrict s2,\ + OBJTYPE * restrict s3, OBJTYPE * restrict s4, OBJTYPE * restrict s5,\ + STRIDETYPE count)\ +{\ + const STRIDETYPE STRIDE = 5;\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + dst[0 + (i * STRIDE)] = s1[i];\ + dst[4 + (i * STRIDE)] = s5[i];\ + dst[1 + (i * STRIDE)] = s2[i];\ + dst[2 + (i * STRIDE)] = s3[i];\ + dst[3 + (i * STRIDE)] = s4[i];\ + }\ +} + +#define SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE)\ +void scatter_store4##OBJTYPE##STRIDETYPE##STRIDE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + *dst = *src;\ + dst += STRIDE;\ + src += 1;\ + }\ +} + +#define SCATTER_STORE5(OBJTYPE,STRIDETYPE)\ +void scatter_store5##OBJTYPE##STRIDETYPE (OBJTYPE * restrict dst,\ + OBJTYPE * restrict src,\ + STRIDETYPE stride,\ + STRIDETYPE count)\ +{\ + for (STRIDETYPE i=0; i<count; i++)\ + {\ + *dst = *src;\ + dst += stride;\ + src += 1;\ + }\ +} + +SCATTER_STORE1 (double, long, 5) +SCATTER_STORE1 (double, long, 8) +SCATTER_STORE1 (double, long, 21) +SCATTER_STORE1 (double, long, 1009) + +SCATTER_STORE1 (float, int, 5) +SCATTER_STORE1 (float, int, 8) +SCATTER_STORE1 (float, int, 21) +SCATTER_STORE1 (float, int, 1009) + +SCATTER_STORE2 (double, long) +SCATTER_STORE2 (float, int) + +SCATTER_STORE3 (double, long) +SCATTER_STORE3 (float, int) + +SCATTER_STORE4 (double, long, 5) +/* NOTE: We can't vectorize SCATTER_STORE4 (float, int, 5) because we can't + prove that the offsets used for the gather load won't overflow. */ + +SCATTER_STORE5 (double, long) +SCATTER_STORE5 (float, int) + +/* Widened forms. */ +SCATTER_STORE1 (double, int, 5) +SCATTER_STORE1 (double, int, 8) +SCATTER_STORE1 (double, short, 5) +SCATTER_STORE1 (double, short, 8) + +SCATTER_STORE1 (float, short, 5) +SCATTER_STORE1 (float, short, 8) + +SCATTER_STORE2 (double, int) +SCATTER_STORE2 (float, short) + +SCATTER_STORE4 (double, int, 5) +SCATTER_STORE4 (float, short, 5) + +SCATTER_STORE5 (double, int) + +/* { dg-final { scan-assembler-times "st1d\\tz\[0-9\]+.d, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.d\\\]" 19 } } */ +/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw 2\\\]" 12 } } */ +/* { dg-final { scan-assembler-times "st1w\\tz\[0-9\]+.s, p\[0-9\]+, \\\[x\[0-9\]+, z\[0-9\]+.s, sxtw\\\]" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c new file mode 100644 index 00000000000..4d8cddc510f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_scatter_store_1_run.c @@ -0,0 +1,155 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <unistd.h> + +extern void abort (void); +extern void *memset(void *, int, size_t); + +#include "sve_scatter_store_1.c" + +#define NUM_SRC_ELEMS 13 +#define NUM_DST_ELEMS(STRIDE) (NUM_SRC_ELEMS * STRIDE) + +#define TEST_SCATTER_STORE_COMMON1(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ + memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + src[i] = i;\ + FUN##OBJTYPE##STRIDETYPE##STRIDE (dst, src, NUM_SRC_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + if (dst[i * STRIDE] != i)\ + abort ();\ +} + +#define TEST_SCATTER_STORE_COMMON2(FUN,OBJTYPE,STRIDETYPE,STRIDE)\ +{\ + OBJTYPE real_src[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS (STRIDE)]\ + __attribute__((aligned (32)));\ + memset (real_src, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ + memset (real_dst, 0, (1 + NUM_DST_ELEMS (STRIDE)) * sizeof (OBJTYPE));\ + OBJTYPE *src = &real_src[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + src[i] = i;\ + FUN##OBJTYPE##STRIDETYPE (dst, src, STRIDE, NUM_SRC_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + if (dst[i * STRIDE] != i)\ + abort ();\ +} + +#define TEST_SCATTER_STORE1(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_SCATTER_STORE_COMMON1 (scatter_store1, OBJTYPE, STRIDETYPE, STRIDE) + +#define TEST_SCATTER_STORE2(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_SCATTER_STORE_COMMON2 (scatter_store2, OBJTYPE, STRIDETYPE, STRIDE) + +#define TEST_SCATTER_STORE3(OBJTYPE,STRIDETYPE)\ +{\ + OBJTYPE real_src1[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src2[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src3[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src4[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_src5[1 + NUM_SRC_ELEMS]\ + __attribute__((aligned (32)));\ + OBJTYPE real_dst[1 + NUM_DST_ELEMS (5)]\ + __attribute__((aligned (32)));\ + memset (real_src1, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ + memset (real_src2, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ + memset (real_src3, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ + memset (real_src4, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ + memset (real_src5, 0, (1 + NUM_SRC_ELEMS) * sizeof (OBJTYPE));\ + memset (real_dst, 0, (1 + NUM_DST_ELEMS (5)) * sizeof (OBJTYPE));\ + OBJTYPE *src1 = &real_src1[1];\ + OBJTYPE *src2 = &real_src2[1];\ + OBJTYPE *src3 = &real_src3[1];\ + OBJTYPE *src4 = &real_src4[1];\ + OBJTYPE *src5 = &real_src5[1];\ + OBJTYPE *dst = &real_dst[1];\ + for (STRIDETYPE i = 0; i < NUM_SRC_ELEMS; i++)\ + {\ + STRIDETYPE base = i * 5;\ + src1[i] = base;\ + src2[i] = base + 1;\ + src3[i] = base + 2;\ + src4[i] = base + 3;\ + src5[i] = base + 4;\ + }\ + scatter_store3s5##OBJTYPE##STRIDETYPE \ + (dst, src1, src2, src3, src4, src5, NUM_SRC_ELEMS); \ + for (STRIDETYPE i = 0; i < NUM_DST_ELEMS (5); i++)\ + if (dst[i] != i)\ + abort ();\ +} + +#define TEST_SCATTER_STORE4(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_SCATTER_STORE_COMMON1 (scatter_store4, OBJTYPE, STRIDETYPE, STRIDE) + +#define TEST_SCATTER_STORE5(OBJTYPE,STRIDETYPE,STRIDE) \ + TEST_SCATTER_STORE_COMMON2 (scatter_store5, OBJTYPE, STRIDETYPE, STRIDE) + +int __attribute__ ((optimize (1))) +main () +{ + TEST_SCATTER_STORE1 (double, long, 5); + TEST_SCATTER_STORE1 (double, long, 8); + TEST_SCATTER_STORE1 (double, long, 21); + + TEST_SCATTER_STORE1 (float, int, 5); + TEST_SCATTER_STORE1 (float, int, 8); + TEST_SCATTER_STORE1 (float, int, 21); + + TEST_SCATTER_STORE2 (double, long, 5); + TEST_SCATTER_STORE2 (double, long, 8); + TEST_SCATTER_STORE2 (double, long, 21); + + TEST_SCATTER_STORE2 (float, int, 5); + TEST_SCATTER_STORE2 (float, int, 8); + TEST_SCATTER_STORE2 (float, int, 21); + + TEST_SCATTER_STORE3 (double, long); + TEST_SCATTER_STORE3 (float, int); + + TEST_SCATTER_STORE4 (double, long, 5); + + TEST_SCATTER_STORE5 (double, long, 5); + TEST_SCATTER_STORE5 (float, int, 5); + + /* Widened forms. */ + TEST_SCATTER_STORE1 (double, int, 5) + TEST_SCATTER_STORE1 (double, int, 8) + TEST_SCATTER_STORE1 (double, short, 5) + TEST_SCATTER_STORE1 (double, short, 8) + + TEST_SCATTER_STORE1 (float, short, 5) + TEST_SCATTER_STORE1 (float, short, 8) + + TEST_SCATTER_STORE2 (double, int, 5); + TEST_SCATTER_STORE2 (double, int, 8); + TEST_SCATTER_STORE2 (double, int, 21); + + TEST_SCATTER_STORE2 (float, short, 5); + TEST_SCATTER_STORE2 (float, short, 8); + TEST_SCATTER_STORE2 (float, short, 21); + + TEST_SCATTER_STORE4 (double, int, 5); + TEST_SCATTER_STORE4 (float, short, 5); + + TEST_SCATTER_STORE5 (double, int, 5); + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve_shift_1.c new file mode 100644 index 00000000000..b19cd7a3161 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_shift_1.c @@ -0,0 +1,108 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define DO_REG_OPS(TYPE) \ +void ashiftr_##TYPE (TYPE *dst, TYPE src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src; \ +} \ +void lshiftr_##TYPE (u##TYPE *dst, u##TYPE src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src; \ +} \ +void lshiftl_##TYPE (u##TYPE *dst, u##TYPE src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] << src; \ +} \ +void vashiftr_##TYPE (TYPE *dst, TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src[i]; \ +} \ +void vlshiftr_##TYPE (u##TYPE *dst, u##TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> src[i]; \ +} \ +void vlshiftl_##TYPE (u##TYPE *dst, u##TYPE *src, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] << src[i]; \ +} + +#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ +void vashiftr_imm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> VALUE; \ +} \ +void vlshiftr_imm_##NAME##_##TYPE (u##TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] >> VALUE; \ +} \ +void vlshiftl_imm_##NAME##_##TYPE (u##TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = dst[i] << VALUE; \ +} + +DO_REG_OPS (int32_t); +DO_REG_OPS (int64_t); + +DO_IMMEDIATE_OPS (0, int8_t, 0); +DO_IMMEDIATE_OPS (5, int8_t, 5); +DO_IMMEDIATE_OPS (7, int8_t, 7); + +DO_IMMEDIATE_OPS (0, int16_t, 0); +DO_IMMEDIATE_OPS (5, int16_t, 5); +DO_IMMEDIATE_OPS (15, int16_t, 15); + +DO_IMMEDIATE_OPS (0, int32_t, 0); +DO_IMMEDIATE_OPS (5, int32_t, 5); +DO_IMMEDIATE_OPS (31, int32_t, 31); + +DO_IMMEDIATE_OPS (0, int64_t, 0); +DO_IMMEDIATE_OPS (5, int64_t, 5); +DO_IMMEDIATE_OPS (63, int64_t, 63); + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_single_1.c new file mode 100644 index 00000000000..f7aeed06907 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_1.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -march=armv8-a+sve -msve-vector-bits=256" } */ + +#ifndef N +#define N 32 +#endif + +#include <stdint.h> + +#define TEST_LOOP(TYPE, VALUE) \ + void \ + test_##TYPE (TYPE *data) \ + { \ + _Pragma ("omp simd") \ + for (int i = 0; i < N / sizeof (TYPE); ++i) \ + data[i] = VALUE; \ + } + +TEST_LOOP (uint8_t, 1) +TEST_LOOP (int8_t, 2) +TEST_LOOP (uint16_t, 3) +TEST_LOOP (int16_t, 4) +TEST_LOOP (uint32_t, 5) +TEST_LOOP (int32_t, 6) +TEST_LOOP (uint64_t, 7) +TEST_LOOP (int64_t, 8) +TEST_LOOP (_Float16, 1.0f) +TEST_LOOP (float, 2.0f) +TEST_LOOP (double, 3.0) + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl8\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl4\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ + +/* { dg-final { scan-assembler-not {\twhile} } } */ +/* { dg-final { scan-assembler-not {\tb} } } */ +/* { dg-final { scan-assembler-not {\tcmp} } } */ +/* { dg-final { scan-assembler-not {\tindex} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_2.c b/gcc/testsuite/gcc.target/aarch64/sve_single_2.c new file mode 100644 index 00000000000..7daea6262d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_2.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -march=armv8-a+sve -msve-vector-bits=512" } */ + +#define N 64 +#include "sve_single_1.c" + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl16\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl8\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ + +/* { dg-final { scan-assembler-not {\twhile} } } */ +/* { dg-final { scan-assembler-not {\tb} } } */ +/* { dg-final { scan-assembler-not {\tcmp} } } */ +/* { dg-final { scan-assembler-not {\tindex} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_3.c b/gcc/testsuite/gcc.target/aarch64/sve_single_3.c new file mode 100644 index 00000000000..e779d6c50d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_3.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -march=armv8-a+sve -msve-vector-bits=1024" } */ + +#define N 128 +#include "sve_single_1.c" + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl32\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl16\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ + +/* { dg-final { scan-assembler-not {\twhile} } } */ +/* { dg-final { scan-assembler-not {\tb} } } */ +/* { dg-final { scan-assembler-not {\tcmp} } } */ +/* { dg-final { scan-assembler-not {\tindex} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_single_4.c b/gcc/testsuite/gcc.target/aarch64/sve_single_4.c new file mode 100644 index 00000000000..7c8b3015551 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_single_4.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fopenmp-simd -march=armv8-a+sve -msve-vector-bits=2048" } */ + +#define N 256 +#include "sve_single_1.c" + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #3\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #4\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #6\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #7\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #15360\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl64\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl32\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ + +/* { dg-final { scan-assembler-not {\twhile} } } */ +/* { dg-final { scan-assembler-not {\tb} } } */ +/* { dg-final { scan-assembler-not {\tcmp} } } */ +/* { dg-final { scan-assembler-not {\tindex} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c new file mode 100644 index 00000000000..460359e4be3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1.c @@ -0,0 +1,55 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += b; \ + a[i * 2 + 1] += c; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* We should use one DUP for each of the 8-, 16- and 32-bit types. + We should use two DUPs for each of the three 64-bit types. */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-not {\tzip2\t} } } */ + +/* The loop should be fully-masked. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c new file mode 100644 index 00000000000..7dd3640966a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i] += 1; \ + b[i * 4] += 2; \ + b[i * 4 + 1] += 3; \ + b[i * 4 + 2] += 4; \ + b[i * 4 + 3] += 5; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* The loop should be fully-masked. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 15 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 15 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 15 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 15 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* We should use WHILEs for all accesses. */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 20 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 20 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 30 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 30 } } */ + +/* 6 for the 8-bit types and 2 for the 16-bit types. */ +/* { dg-final { scan-assembler-times {\tuqdecb\t} 8 } } */ +/* 4 for the 16-bit types and 3 for the 32-bit types. */ +/* { dg-final { scan-assembler-times {\tuqdech\t} 7 } } */ +/* 6 for the 32-bit types and 3 for the 64-bit types. */ +/* { dg-final { scan-assembler-times {\tuqdecw\t} 9 } } */ +/* { dg-final { scan-assembler-times {\tuqdecd\t} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c new file mode 100644 index 00000000000..c1aeaf9b06e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_10_run.c @@ -0,0 +1,54 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_10.c" + +#define N1 (103 * 2) +#define N2 (111 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N2], b[N2 * 4]; \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + b[i * 4] = i * 3 + i % 7; \ + b[i * 4 + 1] = i * 5 + i % 9; \ + b[i * 4 + 2] = i * 7 + i % 11; \ + b[i * 4 + 3] = i * 9 + i % 13; \ + } \ + vec_slp_##TYPE (a, b, N1); \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + TYPE orig_a = i * 2 + i % 5; \ + TYPE orig_b1 = i * 3 + i % 7; \ + TYPE orig_b2 = i * 5 + i % 9; \ + TYPE orig_b3 = i * 7 + i % 11; \ + TYPE orig_b4 = i * 9 + i % 13; \ + TYPE expected_a = orig_a; \ + TYPE expected_b1 = orig_b1; \ + TYPE expected_b2 = orig_b2; \ + TYPE expected_b3 = orig_b3; \ + TYPE expected_b4 = orig_b4; \ + if (i < N1) \ + { \ + expected_a += 1; \ + expected_b1 += 2; \ + expected_b2 += 3; \ + expected_b3 += 4; \ + expected_b4 += 5; \ + } \ + if (a[i] != expected_a \ + || b[i * 4] != expected_b1 \ + || b[i * 4 + 1] != expected_b2 \ + || b[i * 4 + 2] != expected_b3 \ + || b[i * 4 + 3] != expected_b4) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c new file mode 100644 index 00000000000..3db5769deed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE1, TYPE2) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \ + TYPE2 *restrict b, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += 1; \ + a[i * 2 + 1] += 2; \ + b[i * 4] += 3; \ + b[i * 4 + 1] += 4; \ + b[i * 4 + 2] += 5; \ + b[i * 4 + 3] += 6; \ + } \ +} + +#define TEST_ALL(T) \ + T (int16_t, uint8_t) \ + T (uint16_t, int8_t) \ + T (int32_t, uint16_t) \ + T (uint32_t, int16_t) \ + T (float, uint16_t) \ + T (int64_t, float) \ + T (uint64_t, int32_t) \ + T (double, uint32_t) + +TEST_ALL (VEC_PERM) + +/* The loop should be fully-masked. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 5 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 5 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* We should use the same WHILEs for both accesses. */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-not {\twhilelo\tp[0-7]\.d} } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c new file mode 100644 index 00000000000..c302ef6fb76 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_11_run.c @@ -0,0 +1,45 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_11.c" + +#define N1 (103 * 2) +#define N2 (111 * 2) + +#define HARNESS(TYPE1, TYPE2) \ + { \ + TYPE1 a[N2]; \ + TYPE2 b[N2 * 2]; \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + b[i * 2] = i * 3 + i % 7; \ + b[i * 2 + 1] = i * 5 + i % 9; \ + } \ + vec_slp_##TYPE1##_##TYPE2 (a, b, N1 / 2); \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + TYPE1 orig_a = i * 2 + i % 5; \ + TYPE2 orig_b1 = i * 3 + i % 7; \ + TYPE2 orig_b2 = i * 5 + i % 9; \ + TYPE1 expected_a = orig_a; \ + TYPE2 expected_b1 = orig_b1; \ + TYPE2 expected_b2 = orig_b2; \ + if (i < N1) \ + { \ + expected_a += i & 1 ? 2 : 1; \ + expected_b1 += i & 1 ? 5 : 3; \ + expected_b2 += i & 1 ? 6 : 4; \ + } \ + if (a[i] != expected_a \ + || b[i * 2] != expected_b1 \ + || b[i * 2 + 1] != expected_b2) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c new file mode 100644 index 00000000000..9afe7e59ef2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define N1 (19 * 2) + +#define VEC_PERM(TYPE) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b) \ +{ \ + for (int i = 0; i < N1; ++i) \ + { \ + a[i] += 1; \ + b[i * 4] += 2; \ + b[i * 4 + 1] += 3; \ + b[i * 4 + 2] += 4; \ + b[i * 4 + 3] += 5; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* The loop should be fully-masked. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 15 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 15 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 15 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 15 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* We should use WHILEs for all accesses. */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 20 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 20 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 30 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 30 } } */ + +/* 6 for the 8-bit types and 2 for the 16-bit types. */ +/* { dg-final { scan-assembler-times {\tuqdecb\t} 8 } } */ +/* 4 for the 16-bit types and 3 for the 32-bit types. */ +/* { dg-final { scan-assembler-times {\tuqdech\t} 7 } } */ +/* 6 for the 32-bit types and 3 for the 64-bit types. */ +/* { dg-final { scan-assembler-times {\tuqdecw\t} 9 } } */ +/* { dg-final { scan-assembler-times {\tuqdecd\t} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c new file mode 100644 index 00000000000..8c854d4207c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_12_run.c @@ -0,0 +1,53 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_12.c" + +#define N2 (31 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N2], b[N2 * 4]; \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + b[i * 4] = i * 3 + i % 7; \ + b[i * 4 + 1] = i * 5 + i % 9; \ + b[i * 4 + 2] = i * 7 + i % 11; \ + b[i * 4 + 3] = i * 9 + i % 13; \ + } \ + vec_slp_##TYPE (a, b); \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + TYPE orig_a = i * 2 + i % 5; \ + TYPE orig_b1 = i * 3 + i % 7; \ + TYPE orig_b2 = i * 5 + i % 9; \ + TYPE orig_b3 = i * 7 + i % 11; \ + TYPE orig_b4 = i * 9 + i % 13; \ + TYPE expected_a = orig_a; \ + TYPE expected_b1 = orig_b1; \ + TYPE expected_b2 = orig_b2; \ + TYPE expected_b3 = orig_b3; \ + TYPE expected_b4 = orig_b4; \ + if (i < N1) \ + { \ + expected_a += 1; \ + expected_b1 += 2; \ + expected_b2 += 3; \ + expected_b3 += 4; \ + expected_b4 += 5; \ + } \ + if (a[i] != expected_a \ + || b[i * 4] != expected_b1 \ + || b[i * 4 + 1] != expected_b2 \ + || b[i * 4 + 2] != expected_b3 \ + || b[i * 4 + 3] != expected_b4) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c new file mode 100644 index 00000000000..f3ecbd7adbc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* The cost model thinks that the double loop isn't a win for SVE-128. */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable -fno-vect-cost-model" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, int n) \ +{ \ + TYPE res = 0; \ + for (int i = 0; i < n; ++i) \ + { \ + res += a[i * 2] * 3; \ + res += a[i * 2 + 1] * 5; \ + } \ + return res; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* ??? We don't treat the int8_t and int16_t loops as reductions. */ +/* ??? We don't treat the uint loops as SLP. */ +/* The loop should be fully-masked. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 2 } } */ +/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tfadd\n} } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c new file mode 100644 index 00000000000..282f1ae2310 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_slp_13.c" + +#define N1 (103 * 2) +#define N2 (111 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N2]; \ + TYPE expected = 0; \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + if (i < N1) \ + expected += a[i] * (i & 1 ? 5 : 3); \ + asm volatile (""); \ + } \ + if (vec_slp_##TYPE (a, N1 / 2) != expected) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_14.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_14.c new file mode 100644 index 00000000000..b4926fc63f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_14.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + TYPE a1 = a[i * 2]; \ + TYPE a2 = a[i * 2 + 1]; \ + TYPE b1 = b[i * 2]; \ + TYPE b2 = b[i * 2 + 1]; \ + a[i * 2] = b1 > 1 ? a1 / b1 : a1; \ + a[i * 2 + 1] = b2 > 2 ? a2 / b2 : a2; \ + } \ +} + +#define TEST_ALL(T) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* The loop should be fully-masked. The load XFAILs for fixed-length + SVE account for extra loads from the constant pool. */ +/* { dg-final { scan-assembler-times {\tld1w\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d} 1 } } */ +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_14_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_14_run.c new file mode 100644 index 00000000000..05b9413b283 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_14_run.c @@ -0,0 +1,34 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_14.c" + +#define N1 (103 * 2) +#define N2 (111 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N2], b[N2]; \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + b[i] = i % 11; \ + } \ + vec_slp_##TYPE (a, b, N1 / 2); \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + TYPE orig_a = i * 2 + i % 5; \ + TYPE orig_b = i % 11; \ + TYPE expected_a = orig_a; \ + if (i < N1 && orig_b > (i & 1 ? 2 : 1)) \ + expected_a /= orig_b; \ + if (a[i] != expected_a || b[i] != orig_b) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c new file mode 100644 index 00000000000..6c1b38277ec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_1.c" + +#define N (103 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[2] = { 3, 11 }; \ + for (unsigned int i = 0; i < N; ++i) \ + a[i] = i * 2 + i % 5; \ + vec_slp_##TYPE (a, b[0], b[1], N / 2); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 2]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c new file mode 100644 index 00000000000..3e71596021f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2.c @@ -0,0 +1,55 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += 10; \ + a[i * 2 + 1] += 17; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 5 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #10\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-not {\tzip2\t} } } */ + +/* The loop should be fully-masked. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c new file mode 100644 index 00000000000..7d4d5e8ca3d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_2.c" + +#define N (103 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[2] = { 10, 17 }; \ + for (unsigned int i = 0; i < N; ++i) \ + a[i] = i * 2 + i % 5; \ + vec_slp_##TYPE (a, N / 2); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 2]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c new file mode 100644 index 00000000000..3ac0eebf422 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 4] += 41; \ + a[i * 4 + 1] += 25; \ + a[i * 4 + 2] += 31; \ + a[i * 4 + 3] += 62; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* 1 for each 8-bit type. */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */ +/* 1 for each 16-bit type, 2 for each 32-bit type, and 4 for double. */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 12 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #62\n} 2 } } */ +/* The 32-bit types need 1 ZIP1 each. The 64-bit types need: + + ZIP1 ZIP1 (2 ZIP2s optimized away) + ZIP1 ZIP2. */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ + +/* The loop should be fully-masked. The 64-bit types need two loads + and stores each. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 12 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* { dg-final { scan-assembler-not {\tuqdec[bhw]\t} } } */ +/* { dg-final { scan-assembler-times {\tuqdecd\t} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c new file mode 100644 index 00000000000..7306355b873 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_3.c" + +#define N (77 * 4) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[4] = { 41, 25, 31, 62 }; \ + for (unsigned int i = 0; i < N; ++i) \ + a[i] = i * 2 + i % 5; \ + vec_slp_##TYPE (a, N / 4); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 4]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c new file mode 100644 index 00000000000..b0890fd934b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4.c @@ -0,0 +1,81 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 8] += 99; \ + a[i * 8 + 1] += 11; \ + a[i * 8 + 2] += 17; \ + a[i * 8 + 3] += 80; \ + a[i * 8 + 4] += 63; \ + a[i * 8 + 5] += 37; \ + a[i * 8 + 6] += 24; \ + a[i * 8 + 7] += 81; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* 1 for each 8-bit type, 2 for each 16-bit type, 4 for each 32-bit type + and 8 for double. */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 26 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #80\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #63\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #37\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #24\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #81\n} 2 } } */ +/* The 16-bit types need 1 ZIP1 each. The 32-bit types need: + + ZIP1 ZIP1 (2 ZIP2s optimized away) + ZIP1 ZIP2 + + and the 64-bit types need: + + ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away) + ZIP1 ZIP2 ZIP1 ZIP2 + ZIP1 ZIP2 ZIP1 ZIP2. */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 35 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */ + +/* The loop should be fully-masked. The 32-bit types need two loads + and stores each and the 64-bit types need four. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 12 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 12 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 24 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* { dg-final { scan-assembler-not {\tuqdec[bh]\t} } } */ +/* We use UQDECW instead of UQDECD ..., MUL #2. */ +/* { dg-final { scan-assembler-times {\tuqdecw\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tuqdecd\t} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c new file mode 100644 index 00000000000..2eb2a5ff07e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c @@ -0,0 +1,27 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_4.c" + +#define N (59 * 8) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 }; \ + for (unsigned int i = 0; i < N; ++i) \ + a[i] = i * 2 + i % 5; \ + vec_slp_##TYPE (a, N / 8); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 8]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c new file mode 100644 index 00000000000..0f8cf624e20 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable -ffast-math" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + TYPE x0 = b[0]; \ + TYPE x1 = b[1]; \ + for (int i = 0; i < n; ++i) \ + { \ + x0 += a[i * 2]; \ + x1 += a[i * 2 + 1]; \ + } \ + b[0] = x0; \ + b[1] = x1; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* ??? We don't think it's worth using SLP for the 64-bit loops and fall + back to the less efficient non-SLP implementation instead. */ +/* ??? At present we don't treat the int8_t and int16_t loops as + reductions. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not {\tld2b\t} } } */ +/* { dg-final { scan-assembler-not {\tld2h\t} } } */ +/* { dg-final { scan-assembler-not {\tld2w\t} } } */ +/* { dg-final { scan-assembler-not {\tld2d\t} { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ + +/* Should be 4, if we used reductions for int8_t and int16_t. */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c new file mode 100644 index 00000000000..476b40cb0e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5_run.c @@ -0,0 +1,30 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_slp_5.c" + +#define N (141 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[2] = { 40, 22 }; \ + for (unsigned int i = 0; i < N; ++i) \ + a[i] = i * 2 + i % 5; \ + vec_slp_##TYPE (a, b, N / 2); \ + TYPE x0 = 40; \ + TYPE x1 = 22; \ + for (unsigned int i = 0; i < N; i += 2) \ + { \ + x0 += a[i]; \ + x1 += a[i + 1]; \ + asm volatile (""); \ + } \ + if (x0 != b[0] || x1 != b[1]) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c new file mode 100644 index 00000000000..8cdceb57dc6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable -ffast-math" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + TYPE x0 = b[0]; \ + TYPE x1 = b[1]; \ + TYPE x2 = b[2]; \ + for (int i = 0; i < n; ++i) \ + { \ + x0 += a[i * 3]; \ + x1 += a[i * 3 + 1]; \ + x2 += a[i * 3 + 2]; \ + } \ + b[0] = x0; \ + b[1] = x1; \ + b[2] = x2; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* These loops can't use SLP. */ +/* { dg-final { scan-assembler-not {\tld1b\t} } } */ +/* { dg-final { scan-assembler-not {\tld1h\t} } } */ +/* { dg-final { scan-assembler-not {\tld1w\t} } } */ +/* { dg-final { scan-assembler-not {\tld1d\t} } } */ +/* { dg-final { scan-assembler {\tld3b\t} } } */ +/* { dg-final { scan-assembler {\tld3h\t} } } */ +/* { dg-final { scan-assembler {\tld3w\t} } } */ +/* { dg-final { scan-assembler {\tld3d\t} } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c new file mode 100644 index 00000000000..a9ca327c907 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_6_run.c @@ -0,0 +1,32 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_slp_6.c" + +#define N (77 * 3) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[3] = { 40, 22, 75 }; \ + for (unsigned int i = 0; i < N; ++i) \ + a[i] = i * 2 + i % 5; \ + vec_slp_##TYPE (a, b, N / 3); \ + TYPE x0 = 40; \ + TYPE x1 = 22; \ + TYPE x2 = 75; \ + for (unsigned int i = 0; i < N; i += 3) \ + { \ + x0 += a[i]; \ + x1 += a[i + 1]; \ + x2 += a[i + 2]; \ + asm volatile (""); \ + } \ + if (x0 != b[0] || x1 != b[1] || x2 != b[2]) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c new file mode 100644 index 00000000000..4dc9fafcdde --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c @@ -0,0 +1,72 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable -ffast-math" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + TYPE x0 = b[0]; \ + TYPE x1 = b[1]; \ + TYPE x2 = b[2]; \ + TYPE x3 = b[3]; \ + for (int i = 0; i < n; ++i) \ + { \ + x0 += a[i * 4]; \ + x1 += a[i * 4 + 1]; \ + x2 += a[i * 4 + 2]; \ + x3 += a[i * 4 + 3]; \ + } \ + b[0] = x0; \ + b[1] = x1; \ + b[2] = x2; \ + b[3] = x3; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* We can't use SLP for the 64-bit loops, since the number of reduction + results might be greater than the number of elements in the vector. + Otherwise we have two loads per loop, one for the initial vector + and one for the loop body. */ +/* ??? At present we don't treat the int8_t and int16_t loops as + reductions. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tld4b\t} } } */ +/* { dg-final { scan-assembler-not {\tld4h\t} } } */ +/* { dg-final { scan-assembler-not {\tld4w\t} } } */ +/* { dg-final { scan-assembler-not {\tld1d\t} } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 8 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 8 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 8 } } */ +/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */ + +/* Should be 4, if we used reductions for int8_t and int16_t. */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c new file mode 100644 index 00000000000..12446972fde --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7_run.c @@ -0,0 +1,34 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -ffast-math" } */ + +#include "sve_slp_7.c" + +#define N (54 * 4) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[4] = { 40, 22, 75, 19 }; \ + for (unsigned int i = 0; i < N; ++i) \ + a[i] = i * 2 + i % 5; \ + vec_slp_##TYPE (a, b, N / 4); \ + TYPE x0 = 40; \ + TYPE x1 = 22; \ + TYPE x2 = 75; \ + TYPE x3 = 19; \ + for (unsigned int i = 0; i < N; i += 4) \ + { \ + x0 += a[i]; \ + x1 += a[i + 1]; \ + x2 += a[i + 2]; \ + x3 += a[i + 3]; \ + asm volatile (""); \ + } \ + if (x0 != b[0] || x1 != b[1] || x2 != b[2] || x3 != b[3]) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c new file mode 100644 index 00000000000..caae4528d82 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += 1; \ + a[i * 2 + 1] += 2; \ + b[i * 4] += 3; \ + b[i * 4 + 1] += 4; \ + b[i * 4 + 2] += 5; \ + b[i * 4 + 3] += 6; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* The loop should be fully-masked. The load XFAILs for fixed-length + SVE account for extra loads from the constant pool. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 9 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 9 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 9 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 9 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* We should use WHILEs for the accesses to "a" and ZIPs for the accesses + to "b". */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tp[0-7]\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tp[0-7]\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tp[0-7]\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tp[0-7]\.d} 3 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tp[0-7]\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tp[0-7]\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tp[0-7]\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tp[0-7]\.d} 3 } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c new file mode 100644 index 00000000000..2717ca62de1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_8_run.c @@ -0,0 +1,44 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_8.c" + +#define N1 (103 * 2) +#define N2 (111 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N2], b[N2 * 2]; \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + b[i * 2] = i * 3 + i % 7; \ + b[i * 2 + 1] = i * 5 + i % 9; \ + } \ + vec_slp_##TYPE (a, b, N1 / 2); \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + TYPE orig_a = i * 2 + i % 5; \ + TYPE orig_b1 = i * 3 + i % 7; \ + TYPE orig_b2 = i * 5 + i % 9; \ + TYPE expected_a = orig_a; \ + TYPE expected_b1 = orig_b1; \ + TYPE expected_b2 = orig_b2; \ + if (i < N1) \ + { \ + expected_a += i & 1 ? 2 : 1; \ + expected_b1 += i & 1 ? 5 : 3; \ + expected_b2 += i & 1 ? 6 : 4; \ + } \ + if (a[i] != expected_a \ + || b[i * 2] != expected_b1 \ + || b[i * 2 + 1] != expected_b2) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c new file mode 100644 index 00000000000..af06270b6f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE1, TYPE2) \ +void __attribute__ ((weak)) \ +vec_slp_##TYPE1##_##TYPE2 (TYPE1 *restrict a, \ + TYPE2 *restrict b, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += 1; \ + a[i * 2 + 1] += 2; \ + b[i * 2] += 3; \ + b[i * 2 + 1] += 4; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t, uint16_t) \ + T (uint8_t, int16_t) \ + T (int16_t, uint32_t) \ + T (uint16_t, int32_t) \ + T (int32_t, double) \ + T (uint32_t, int64_t) \ + T (float, uint64_t) + +TEST_ALL (VEC_PERM) + +/* The loop should be fully-masked. The load XFAILs for fixed-length + SVE account for extra loads from the constant pool. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail { aarch64_sve && { ! vect_variable_length } } } } }*/ +/* { dg-final { scan-assembler-times {\tst1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 7 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 7 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 6 { xfail { aarch64_sve && { ! vect_variable_length } } } } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 6 } } */ +/* { dg-final { scan-assembler-not {\tldr} } } */ +/* { dg-final { scan-assembler-not {\tstr} } } */ + +/* We should use WHILEs for the accesses to "a" and unpacks for the accesses + to "b". */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */ +/* { dg-final { scan-assembler-not {\twhilelo\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tpunpklo\tp[0-7]\.h} 7 } } */ +/* { dg-final { scan-assembler-times {\tpunpkhi\tp[0-7]\.h} 7 } } */ + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c new file mode 100644 index 00000000000..0bde3b6ea03 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_9_run.c @@ -0,0 +1,39 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_9.c" + +#define N1 (103 * 2) +#define N2 (111 * 2) + +#define HARNESS(TYPE1, TYPE2) \ + { \ + TYPE1 a[N2]; \ + TYPE2 b[N2]; \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + b[i] = i * 3 + i % 7; \ + } \ + vec_slp_##TYPE1##_##TYPE2 (a, b, N1 / 2); \ + for (unsigned int i = 0; i < N2; ++i) \ + { \ + TYPE1 orig_a = i * 2 + i % 5; \ + TYPE2 orig_b = i * 3 + i % 7; \ + TYPE1 expected_a = orig_a; \ + TYPE2 expected_b = orig_b; \ + if (i < N1) \ + { \ + expected_a += i & 1 ? 2 : 1; \ + expected_b += i & 1 ? 4 : 3; \ + } \ + if (a[i] != expected_a || b[i] != expected_b) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_1.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_1.c new file mode 100644 index 00000000000..ba2f569fd5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_1.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with no data references. */ + +#define SPEC_LOOP(ARGTYPE,INDUCTYPE)\ +INDUCTYPE spec_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit)\ +{\ + INDUCTYPE i = 0;\ + while ((i & mask) != limit)\ + i += 1;\ + return i;\ +}\ + +#define SPEC_FP_LOOP(ARGTYPE,INDUCTYPE,FPTYPE)\ +FPTYPE spec_fp_loop_##ARGTYPE##INDUCTYPE##FPTYPE (ARGTYPE mask, ARGTYPE limit)\ +{\ + INDUCTYPE i = 0;\ + FPTYPE f = 0.0;\ + while ((i & mask) != limit)\ + {\ + f += 1;\ + i += 1;\ + }\ + return f;\ +}\ + +SPEC_LOOP (uint8_t, uint8_t) +SPEC_LOOP (uint16_t, uint16_t) +SPEC_LOOP (uint32_t, uint32_t) +SPEC_LOOP (uint64_t, uint64_t) + +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) + +/* Conversions. */ +SPEC_LOOP (uint16_t, uint8_t) + +SPEC_LOOP (uint32_t, uint8_t) +SPEC_LOOP (uint32_t, uint16_t) + +SPEC_LOOP (uint64_t, uint8_t) +SPEC_LOOP (uint64_t, uint16_t) +SPEC_LOOP (uint64_t, uint32_t) + +SPEC_FP_LOOP (uint32_t, uint32_t, float) +SPEC_FP_LOOP (uint64_t, uint64_t, double) + +SPEC_FP_LOOP (uint64_t, uint64_t, float) + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 17 "vect" } } */ +/* { dg-final { scan-assembler-times {\tbrka\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b} 17 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b} 5 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_10.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_10.c new file mode 100644 index 00000000000..c69164bb1ea --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_10.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +/* Speculative loop with two loads from global buffers which can be aligned, + but does require peeling. */ + +int a[500]; +int b[500]; + +int +foo (int n) +{ + int i = 0; + do + i += 1; + while (a[i] + b[i] < n); + return i; +} + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 4 bytes of ref" "vect" } } */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_11.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_11.c new file mode 100644 index 00000000000..92e4adc5571 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_11.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with two loads from global buffers which can be aligned + without any peeling. */ + +#define MAX_ARRAY_SIZE 500 + +#ifndef STRIDE_LEVEL +#define STRIDE_LEVEL 1 +#endif + +#define SPEC_LOOP(DATATYPE, ARGTYPE)\ +DATATYPE a##DATATYPE[MAX_ARRAY_SIZE];\ +DATATYPE b##DATATYPE[MAX_ARRAY_SIZE];\ +ARGTYPE spec_loop_##DATATYPE##_##ARGTYPE (DATATYPE n)\ +{\ + ARGTYPE i = -1;\ + do\ + i += 1;\ + while (a##DATATYPE[i*STRIDE_LEVEL] + b##DATATYPE[i*STRIDE_LEVEL] < n);\ + return i;\ +} + +/* TODO: Cannot yet vectorize due to gather load. */ +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) + +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) +SPEC_LOOP (float, int32_t) +SPEC_LOOP (double, int64_t) + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ + +/* { dg-final { scan-tree-dump "force alignment of aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of bint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of bint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bfloat" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of adouble" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bdouble" "vect" } } */ + +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bfloat" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref adouble" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bdouble" "vect" } } */ + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_11_run.c new file mode 100644 index 00000000000..ebcefdb623c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_11_run.c @@ -0,0 +1,61 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_11.c" + +extern void abort (void); + +#ifndef FILL_DATA +#define FILL_DATA 0 +#endif + +#ifndef EXIT_CONDITION +#define EXIT_CONDITION 5 +#endif + +#ifndef LOOP_COUNTS +#define LOOP_COUNTS {37,45,55,17,39,43} +#endif +int loop_counts[] = LOOP_COUNTS; + +/* Fill the arrays with the exit conditions. + Then refill at the correct strided accesses with fill data up to the end of + the loop count. */ + +#define TEST_SPEC_LOOP_FUNC(DATATYPE, ARGTYPE)\ +void test_spec_loop_##DATATYPE##_##ARGTYPE (ARGTYPE num_elements)\ +{\ + int i;\ + for (i=0; i<MAX_ARRAY_SIZE; i++)\ + {\ + a##DATATYPE[i] = EXIT_CONDITION;\ + b##DATATYPE[i] = EXIT_CONDITION;\ + }\ + for (i=0; (i<num_elements-1)*STRIDE_LEVEL; i++)\ + {\ + a##DATATYPE[i*STRIDE_LEVEL] = FILL_DATA;\ + b##DATATYPE[i*STRIDE_LEVEL] = FILL_DATA;\ + }\ + ARGTYPE ret = spec_loop_##DATATYPE##_##ARGTYPE (EXIT_CONDITION);\ + if (ret != num_elements - 1)\ + abort ();\ +} + +TEST_SPEC_LOOP_FUNC (int8_t, int8_t) +TEST_SPEC_LOOP_FUNC (int16_t, int16_t) +TEST_SPEC_LOOP_FUNC (int32_t, int32_t) +TEST_SPEC_LOOP_FUNC (int64_t, int64_t) +TEST_SPEC_LOOP_FUNC (float, int32_t) +TEST_SPEC_LOOP_FUNC (double, int64_t) + +int main (void) +{ + test_spec_loop_int8_t_int8_t (loop_counts[0]); + test_spec_loop_int16_t_int16_t (loop_counts[1]); + test_spec_loop_int32_t_int32_t (loop_counts[2]); + test_spec_loop_int64_t_int64_t (loop_counts[3]); + test_spec_loop_float_int32_t (loop_counts[4]); + test_spec_loop_double_int64_t (loop_counts[5]); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_12.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_12.c new file mode 100644 index 00000000000..d6caa8e7513 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_12.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +/* Speculative loop with two loads from global buffers which can be aligned + without any peeling, and an access stride of 2. */ + +#define STRIDE_LEVEL 2 + +#include "sve_speculative_11.c" + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ + +/* { dg-final { scan-tree-dump "force alignment of aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of bint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of bint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bint64_t" "vect" } } */ + +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint64_t" "vect" } } */ + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_12_run.c new file mode 100644 index 00000000000..42c346073c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_12_run.c @@ -0,0 +1,9 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#define STRIDE_LEVEL 2 +#define EXIT_CONDITION 7 +#define LOOP_COUNTS {43,27,19,54,25,27} + +#include "sve_speculative_11_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_13.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_13.c new file mode 100644 index 00000000000..ecd7e258161 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_13.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +/* Speculative loop with two loads from global buffers which can be aligned + without any peeling, and an access stride of 3. */ + +#define STRIDE_LEVEL 3 + +#include "sve_speculative_11.c" + +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 20 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Not allowing first faulting" 10 "vect" } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_13_run.c new file mode 100644 index 00000000000..519ff21e168 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_13_run.c @@ -0,0 +1,9 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#define STRIDE_LEVEL 3 +#define EXIT_CONDITION 9 +#define LOOP_COUNTS {19,47,15,35,23,33} + +#include "sve_speculative_11_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_14.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_14.c new file mode 100644 index 00000000000..218afb6c5ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_14.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +/* Speculative loop with two loads from global buffers which can be aligned + without any peeling, and an access stride of 4. */ + +#define STRIDE_LEVEL 4 + +#include "sve_speculative_11.c" + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ + +/* { dg-final { scan-tree-dump "force alignment of aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of bint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of bint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of bint64_t" "vect" } } */ + +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref bint64_t" "vect" } } */ + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_14_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_14_run.c new file mode 100644 index 00000000000..958e94fd822 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_14_run.c @@ -0,0 +1,11 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#define STRIDE_LEVEL 4 + +#define FILL_DATA 5 +#define EXIT_CONDITION 22 +#define LOOP_COUNTS {43,27,19,54,25,27} + +#include "sve_speculative_11_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_15.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_15.c new file mode 100644 index 00000000000..42ec564c90b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_15.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with two consecutive loads from a single global buffer + which can be aligned without any peeling, and an access stride of 2. */ + +#define MAX_ARRAY_SIZE 500 + +/* Minimum STRIDE_LEVEL is 2. */ +#ifndef STRIDE_LEVEL +#define STRIDE_LEVEL 2 +#endif + +#define SPEC_LOOP(DATATYPE, ARGTYPE)\ +DATATYPE a##DATATYPE[MAX_ARRAY_SIZE];\ +ARGTYPE spec_loop_##DATATYPE##_##ARGTYPE (DATATYPE n)\ +{\ + ARGTYPE i = -1;\ + do\ + i += 1;\ + while (a##DATATYPE[i*STRIDE_LEVEL] + a##DATATYPE[(i*STRIDE_LEVEL) + 1] < n);\ + return i;\ +} + +/* TODO: Cannot yet vectorize due to gather load. */ +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) + +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) +SPEC_LOOP (float, int32_t) +SPEC_LOOP (double, int64_t) + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ + +/* { dg-final { scan-tree-dump "force alignment of aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of adouble" "vect" } } */ + +/* { dg-final { scan-tree-dump "misalign = 1 bytes of ref aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 2 bytes of ref aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 4 bytes of ref aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 8 bytes of ref aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 4 bytes of ref afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref adouble" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 8 bytes of ref adouble" "vect" } } */ + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_15_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_15_run.c new file mode 100644 index 00000000000..533f99467fd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_15_run.c @@ -0,0 +1,56 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_15.c" + +extern void abort (void); + +#ifndef FILL_DATA +#define FILL_DATA 0 +#endif + +#ifndef EXIT_CONDITION +#define EXIT_CONDITION 5 +#endif + +#ifndef LOOP_COUNTS +#define LOOP_COUNTS {37,45,55,17,39,43} +#endif +int loop_counts[] = LOOP_COUNTS; + +/* Fill the arrays with the exit conditions. + Then refill at the correct strided accesses with fill data up to the end of + the loop count. */ + +#define TEST_SPEC_LOOP_FUNC(DATATYPE, ARGTYPE) \ +void \ +test_spec_loop_##DATATYPE##_##ARGTYPE (ARGTYPE num_elements) \ +{ \ + for (int i = 0; i < MAX_ARRAY_SIZE; ++i) \ + a##DATATYPE[i] = EXIT_CONDITION; \ + for (int i = 0; i < (num_elements - 1) * STRIDE_LEVEL; ++i) \ + a##DATATYPE[i] = FILL_DATA; \ + ARGTYPE ret = spec_loop_##DATATYPE##_##ARGTYPE (EXIT_CONDITION); \ + if (ret != num_elements - 1) \ + abort (); \ +} + +TEST_SPEC_LOOP_FUNC (int8_t, int8_t) +TEST_SPEC_LOOP_FUNC (int16_t, int16_t) +TEST_SPEC_LOOP_FUNC (int32_t, int32_t) +TEST_SPEC_LOOP_FUNC (int64_t, int64_t) +TEST_SPEC_LOOP_FUNC (float, int32_t) +TEST_SPEC_LOOP_FUNC (double, int64_t) + +int main (void) +{ + test_spec_loop_int8_t_int8_t (loop_counts[0]); + test_spec_loop_int16_t_int16_t (loop_counts[1]); + test_spec_loop_int32_t_int32_t (loop_counts[2]); + test_spec_loop_int64_t_int64_t (loop_counts[3]); + test_spec_loop_float_int32_t (loop_counts[4]); + test_spec_loop_double_int64_t (loop_counts[5]); + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_16.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_16.c new file mode 100644 index 00000000000..f192c7f374d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_16.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +/* Speculative loop with two consecutive loads from a single global buffer + which can be aligned without any peeling, and an access stride of 3. */ + +#define STRIDE_LEVEL 3 + +#include "sve_speculative_15.c" + +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 20 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Not allowing first faulting" 10 "vect" } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_16_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_16_run.c new file mode 100644 index 00000000000..7c53e7aeed6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_16_run.c @@ -0,0 +1,9 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#define STRIDE_LEVEL 3 +#define EXIT_CONDITION 7 +#define LOOP_COUNTS {43,27,19,54,25,27} + +#include "sve_speculative_15_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_17.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_17.c new file mode 100644 index 00000000000..b7e472e0deb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_17.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +/* Speculative loop with two consecutive loads from a single global buffer + which can be aligned without any peeling, and an access stride of 4. */ + +#define STRIDE_LEVEL 4 + +#include "sve_speculative_15.c" + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ + +/* { dg-final { scan-tree-dump "force alignment of aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "force alignment of aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of adouble" "vect" } } */ + +/* { dg-final { scan-tree-dump "misalign = 1 bytes of ref aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint8_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 2 bytes of ref aint16_t" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 4 bytes of ref aint32_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 8 bytes of ref aint64_t" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 4 bytes of ref afloat" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 0 bytes of ref adouble" "vect" } } */ +/* { dg-final { scan-tree-dump "misalign = 8 bytes of ref adouble" "vect" } } */ + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_17_run.c new file mode 100644 index 00000000000..5453116429a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_17_run.c @@ -0,0 +1,9 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#define STRIDE_LEVEL 4 +#define EXIT_CONDITION 9 +#define LOOP_COUNTS {19,47,15,35,23,33} + +#include "sve_speculative_15_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_18.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_18.c new file mode 100644 index 00000000000..8a11a50c749 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_18.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=scalable" } */ + +/* Speculative loop with two loads which cannot both be aligned. + Will use first faulting (instead of versioning) to cope with alignment + issues. */ + +#define STRIDE_LEVEL 1 + +#include "sve_speculative_9.c" + +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 8 "vect" } } */ +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_18_run.c new file mode 100644 index 00000000000..6826087bf3d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_18_run.c @@ -0,0 +1,8 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ + +#define STRIDE_LEVEL 1 +#define EXIT_CONDITION 97 +#define LOOP_COUNTS {63,73,7,73,28,37} + +#include "sve_speculative_9_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_19.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_19.c new file mode 100644 index 00000000000..90a1e231fb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_19.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=scalable" } */ + +/* Speculative loop with two strided loads which cannot both be aligned. + Will attempt first faulting and fail to use it. */ + +#define STRIDE_LEVEL 2 + +#include "sve_speculative_9.c" + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 8 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Not allowing first faulting: load step is invalid" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_19_run.c new file mode 100644 index 00000000000..539b45a2900 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_19_run.c @@ -0,0 +1,8 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ + +#define STRIDE_LEVEL 2 +#define EXIT_CONDITION 17 +#define LOOP_COUNTS {21,7,9,85,24,31} + +#include "sve_speculative_9_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_1_run.c new file mode 100644 index 00000000000..f4bb55ed6f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_1_run.c @@ -0,0 +1,47 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -ffast-math" } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -ffast-math -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_1.c" + +extern void abort (void); + +#define TEST_LOOP(ARGTYPE,INDUCTYPE)\ +{\ + INDUCTYPE res = spec_loop_##ARGTYPE##INDUCTYPE (0xFF, 0xAE);\ + if (res != 0xAE)\ + abort ();\ +}\ + +#define TEST_FP_LOOP(ARGTYPE,INDUCTYPE,FPTYPE)\ +{\ + FPTYPE res = spec_fp_loop_##ARGTYPE##INDUCTYPE##FPTYPE (0xFF, 0xAE);\ + if (res != 0xAE)\ + abort ();\ +}\ + +int main () +{ + TEST_LOOP (uint8_t, uint8_t); + TEST_LOOP (uint16_t, uint16_t); + TEST_LOOP (uint32_t, uint32_t); + TEST_LOOP (uint64_t, uint64_t); + TEST_LOOP (int32_t, int32_t); + TEST_LOOP (int64_t, int64_t); + + TEST_LOOP (uint16_t, uint8_t) + + TEST_LOOP (uint32_t, uint8_t) + TEST_LOOP (uint32_t, uint16_t) + + TEST_LOOP (uint64_t, uint8_t) + TEST_LOOP (uint64_t, uint16_t) + TEST_LOOP (uint64_t, uint32_t) + + TEST_FP_LOOP (uint32_t, uint32_t, float) + TEST_FP_LOOP (uint64_t, uint64_t, double) + + TEST_FP_LOOP (uint64_t, uint64_t, float) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_2.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_2.c new file mode 100644 index 00000000000..108c5a6fbe6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_2.c @@ -0,0 +1,72 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> +#include <stdbool.h> + +/* Speculative loop with no data references. */ + +/* FIXME: dup of rhs into predicate register is made of horrible code. */ +#define SPEC_LOOP(ARGTYPE,INDUCTYPE)\ +INDUCTYPE spec_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit,\ + bool rhs)\ +{\ + INDUCTYPE i = 0;\ + bool lhs = (i & mask) != limit;\ + while (lhs == rhs)\ + {\ + i += 1;\ + lhs = (i & mask) != limit;\ + }\ + return i;\ +}\ + +#define SPEC_FP_LOOP(ARGTYPE,INDUCTYPE,FPTYPE)\ +INDUCTYPE spec_fp_loop_##ARGTYPE##INDUCTYPE##FPTYPE (ARGTYPE mask, ARGTYPE limit,\ + bool rhs)\ +{\ + INDUCTYPE i = 0;\ + FPTYPE f = 0.0;\ + bool lhs = (i & mask) != limit;\ + while (lhs == rhs)\ + {\ + f += 1;\ + i += 1;\ + lhs = (i & mask) != limit;\ + }\ + return f;\ +}\ + +SPEC_LOOP (uint8_t, uint8_t) +SPEC_LOOP (uint16_t, uint16_t) +SPEC_LOOP (uint32_t, uint32_t) +SPEC_LOOP (uint64_t, uint64_t) + +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) + +/* Conversions. */ +SPEC_LOOP (uint16_t, uint8_t) + +SPEC_LOOP (uint32_t, uint8_t) +SPEC_LOOP (uint32_t, uint16_t) + +SPEC_LOOP (uint64_t, uint8_t) +SPEC_LOOP (uint64_t, uint16_t) +SPEC_LOOP (uint64_t, uint32_t) + +SPEC_FP_LOOP (uint32_t, uint32_t, float) +SPEC_FP_LOOP (uint64_t, uint64_t, double) + +SPEC_FP_LOOP (uint64_t, uint64_t, float) + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 17 "vect" } } */ +/* { dg-final { scan-assembler-times {\tbrka\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b} 17 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b} 5 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_20.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_20.c new file mode 100644 index 00000000000..55802fb4383 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_20.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details" } */ + +/* Speculative loop with two strided loads which cannot both be aligned. + Will attempt first faulting and fail to use it. */ + +#define STRIDE_LEVEL 3 + +#include "sve_speculative_9.c" + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 20 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Not allowing first faulting: load step is invalid" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_20_run.c new file mode 100644 index 00000000000..43337252063 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_20_run.c @@ -0,0 +1,8 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ + +#define STRIDE_LEVEL 3 +#define EXIT_CONDITION 93 +#define LOOP_COUNTS {93,7,82,39,76,55} + +#include "sve_speculative_9_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_21.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_21.c new file mode 100644 index 00000000000..f3673751276 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_21.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=scalable" } */ + +/* Speculative loop with two strided loads which cannot both be aligned. + Will attempt first faulting and fail to use it. */ + +#define STRIDE_LEVEL 4 + +#include "sve_speculative_9.c" + +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 8 "vect" } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_21_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_21_run.c new file mode 100644 index 00000000000..c48547d0eb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_21_run.c @@ -0,0 +1,8 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ + +#define STRIDE_LEVEL 4 +#define EXIT_CONDITION 72 +#define LOOP_COUNTS {21,34,18,55,33,55} + +#include "sve_speculative_9_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_22.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_22.c new file mode 100644 index 00000000000..1369ba4428a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_22.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +/* Speculative loop with two loads which cannot both be aligned. + Exit condition of the while loop uses the loop induction value. + Will use first faulting (instead of versioning) to cope with alignment + issues. */ + +#ifndef STRIDE_LEVEL +#define STRIDE_LEVEL 1 +#endif + +#define SPEC_LOOP(DATATYPE, ARGTYPE)\ +ARGTYPE spec_loop_##DATATYPE##_##ARGTYPE (DATATYPE *a, DATATYPE *b)\ +{\ + ARGTYPE i = -1;\ + do\ + i += 1;\ + while (a[i*STRIDE_LEVEL] + b[i*STRIDE_LEVEL] != i);\ + return i;\ +} + +/* TODO: Cannot yet vectorize due to gather load. */ +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) + +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) +SPEC_LOOP (float, int32_t) +SPEC_LOOP (double, int64_t) + +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 8 "vect" } } */ +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_22_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_22_run.c new file mode 100644 index 00000000000..17a6ee0d72a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_22_run.c @@ -0,0 +1,54 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ + +#include "sve_speculative_22.c" + +extern void abort (void); + +#ifndef MAX_ARRAY_SIZE +#define MAX_ARRAY_SIZE 500 +#endif + +#ifndef FILL_DATA_A +#define FILL_DATA_A 55 +#endif + +#ifndef FILL_DATA_B +#define FILL_DATA_B 7 +#endif + +/* Fill the arrays with the exit conditions. */ + +#define TEST_SPEC_LOOP_FUNC(DATATYPE, ARGTYPE)\ +void test_spec_loop_##DATATYPE##_##ARGTYPE ()\ +{\ + DATATYPE a[MAX_ARRAY_SIZE];\ + DATATYPE b[MAX_ARRAY_SIZE];\ + int i;\ + for (i=0; i<MAX_ARRAY_SIZE; i++)\ + {\ + a[i] = FILL_DATA_A;\ + b[i] = FILL_DATA_B;\ + }\ + ARGTYPE ret = spec_loop_##DATATYPE##_##ARGTYPE (a, b);\ + if (ret != FILL_DATA_A + FILL_DATA_B)\ + abort ();\ +} + +TEST_SPEC_LOOP_FUNC (int8_t, int8_t) +TEST_SPEC_LOOP_FUNC (int16_t, int16_t) +TEST_SPEC_LOOP_FUNC (int32_t, int32_t) +TEST_SPEC_LOOP_FUNC (int64_t, int64_t) +TEST_SPEC_LOOP_FUNC (float, int32_t) +TEST_SPEC_LOOP_FUNC (double, int64_t) + +int main (void) +{ + test_spec_loop_int8_t_int8_t (); + test_spec_loop_int16_t_int16_t (); + test_spec_loop_int32_t_int32_t (); + test_spec_loop_int64_t_int64_t (); + test_spec_loop_float_int32_t (); + test_spec_loop_double_int64_t (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_23.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_23.c new file mode 100644 index 00000000000..28edbf54df2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_23.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +/* Speculative loop with two loads from the loop phi, which cannot both be aligned. */ + +#define SPEC_LOOP(DATATYPE, ARGTYPE)\ +ARGTYPE spec_loop_##DATATYPE##_##ARGTYPE (DATATYPE *a, DATATYPE*b, DATATYPE n)\ +{\ + DATATYPE x;\ + while ((x=*a++ + *b++) != n);\ + return x;\ +} + +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) +SPEC_LOOP (float, int32_t) +SPEC_LOOP (double, int64_t) + + +/* { dg-final { scan-tree-dump-times "unknown alignment, setting loop as first faulting" 12 "vect" } } */ +/* { dg-final { scan-tree-dump-not "loop versioned for vectorization to enhance alignment" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_23_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_23_run.c new file mode 100644 index 00000000000..e0230bd5712 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_23_run.c @@ -0,0 +1,66 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ + +#include "sve_speculative_23.c" + +extern void abort (void); + +#ifndef MAX_ARRAY_SIZE +#define MAX_ARRAY_SIZE 500 +#endif + +#ifndef FILL_DATA +#define FILL_DATA 0 +#endif + +#ifndef EXIT_CONDITION +#define EXIT_CONDITION 8 +#endif + +#ifndef LOOP_COUNTS +#define LOOP_COUNTS {57,81,93,117,47,77} +#endif +int loop_counts[] = LOOP_COUNTS; + +/* Fill the arrays with the exit conditions. + Then refill at the correct strided accesses with fill data up to the end of + the loop count. */ + +#define TEST_SPEC_LOOP_FUNC(DATATYPE, ARGTYPE)\ +void test_spec_loop_##DATATYPE##_##ARGTYPE (ARGTYPE num_elements)\ +{\ + DATATYPE a[MAX_ARRAY_SIZE];\ + DATATYPE b[MAX_ARRAY_SIZE];\ + int i;\ + for (i=0; i<MAX_ARRAY_SIZE; i++)\ + {\ + a[i] = EXIT_CONDITION;\ + b[i] = EXIT_CONDITION;\ + }\ + for (i=0; (i<num_elements-1); i++)\ + {\ + a[i] = FILL_DATA;\ + b[i] = FILL_DATA;\ + }\ + ARGTYPE ret = spec_loop_##DATATYPE##_##ARGTYPE (a, b, EXIT_CONDITION * 2);\ + if (ret != EXIT_CONDITION * 2)\ + abort ();\ +} + +TEST_SPEC_LOOP_FUNC (int8_t, int8_t) +TEST_SPEC_LOOP_FUNC (int16_t, int16_t) +TEST_SPEC_LOOP_FUNC (int32_t, int32_t) +TEST_SPEC_LOOP_FUNC (int64_t, int64_t) +TEST_SPEC_LOOP_FUNC (float, int32_t) +TEST_SPEC_LOOP_FUNC (double, int64_t) + +int main (void) +{ + test_spec_loop_int8_t_int8_t (loop_counts[0]); + test_spec_loop_int16_t_int16_t (loop_counts[1]); + test_spec_loop_int32_t_int32_t (loop_counts[2]); + test_spec_loop_int64_t_int64_t (loop_counts[3]); + test_spec_loop_float_int32_t (loop_counts[4]); + test_spec_loop_double_int64_t (loop_counts[5]); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_24.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_24.c new file mode 100644 index 00000000000..3d8bc214007 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_24.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +int __attribute__ ((noinline, noclone)) +foo (int *mask) +{ + int res = 0; + for (int i = 0; mask[i] != 0xf8; ++i) + res += i; + return res; +} + +/* { dg-final { scan-assembler {\twrffr\t.*\tldff1w\t.*\trdffr\t} } } */ +/* { dg-final { scan-assembler {\trdffr\t.*\tbrka\t.*\tadd\tz[0-9]\.s, p[0-7]/m,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_24_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_24_run.c new file mode 100644 index 00000000000..982ef14804d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_24_run.c @@ -0,0 +1,13 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +#include "sve_speculative_24.c" + +int +main (void) +{ + int mask[] = { 1, 5, 9, 11, 15, 19, 100, 101, 102, 0xf8 }; + if (foo (mask) != 36) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_25.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_25.c new file mode 100644 index 00000000000..8f206838f1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_25.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include "sve_speculative_24.c" + +/* { dg-final { scan-assembler {\tld1w\t.*\tbrka\t.*\tadd\tz[0-9]\.s, p[0-7]/m,} } } */ +/* { dg-final { scan-assembler {\twhilelo\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_25_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_25_run.c new file mode 100644 index 00000000000..e501d41787a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_25_run.c @@ -0,0 +1,5 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_24_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_26.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_26.c new file mode 100644 index 00000000000..36e27226d7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_26.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256" } */ + +int mask[] = { 1, 5, 9, 11, 15, 19, 100, 101, 102, 0xf8 }; + +int __attribute__ ((noinline, noclone)) +foo (void) +{ + int res = 0; + int i = -1; + do + { + i += 1; + res += i; + } + while (mask[i] != 0xf8); + return res; +} + + +/* { dg-final { scan-assembler {\tld1w\t.*\tbrka\t.*\tadd\tz[0-9]\.s, p[0-7]/m,} } } */ +/* { dg-final { scan-assembler-not {\twhilelo\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_26_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_26_run.c new file mode 100644 index 00000000000..52a8724f9f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_26_run.c @@ -0,0 +1,13 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_26.c" + +int +main (void) +{ + if (foo () != 45) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_27.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_27.c new file mode 100644 index 00000000000..1b74d5c86cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_27.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +int __attribute__ ((noinline, noclone)) +foo (short *ptr1, short *ptr2) +{ + int res = 0; + for (int i = 0; ptr1[i]; ++i) + res += ptr2[i] * i; + return res; +} + +/* { dg-final { scan-assembler {\twrffr\t.*\tldff1h\t.*\tldff1h\t.*\trdffr\t} } } */ +/* { dg-final { scan-assembler {\tincw\tz[0-9]} } } */ +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_27_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_27_run.c new file mode 100644 index 00000000000..c4320d3b6d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_27_run.c @@ -0,0 +1,29 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +#include "sve_speculative_27.c" + +#define N 60 +#define COUNT 37 + +short array1[N]; +short array2[N]; + +int +main (void) +{ + int expected = 0; + for (unsigned int i = 0; i < N; ++i) + { + array1[i] = 2 * i + (i & 1) + 1; + array2[i] = i * ((i & 3) + 1); + if (i < COUNT) + expected += array2[i] * i; + asm volatile (""); + } + array1[COUNT] = 0; + int res = foo (array1, array2); + if (res != expected) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_28.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_28.c new file mode 100644 index 00000000000..7fd596978f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_28.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +int __attribute__ ((weak)) +f (int x) +{ + for (int i = 0;; i++) + if (x & (1 << i)) + return i; +} + +/* { dg-final { scan-assembler {\tcmpne\tp[0-9]+\.s,} } } */ +/* { dg-final { scan-assembler {\tbrka\t} } } */ +/* { dg-final { scan-assembler {\tlastb\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_28_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_28_run.c new file mode 100644 index 00000000000..255ed93afbf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_28_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +#include "sve_speculative_28.c" + +int +main (void) +{ + if (f (4) != 2) + __builtin_abort (); + if (f (32) != 5) + __builtin_abort (); + if (f (128) != 7) + __builtin_abort (); + if (f (16384) != 14) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_29.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_29.c new file mode 100644 index 00000000000..4f1fd54bc70 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_29.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +float __attribute__ ((weak)) +f (int x, float y) +{ + float f = 0.0f; + for (int i = 0;; i++, f += y) + if (x & (1 << i)) + return f; +} + +/* { dg-final { scan-assembler {\tcmpne\tp[0-9]+\.s,} } } */ +/* { dg-final { scan-assembler {\tbrka\t} } } */ +/* { dg-final { scan-assembler {\tfadda\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_29_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_29_run.c new file mode 100644 index 00000000000..6709f0a7985 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_29_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -march=armv8-a+sve" } */ + +#include "sve_speculative_29.c" + +int +main (void) +{ + if (f (4, 2.0f) != 4.0f) + __builtin_abort (); + if (f (32, 4.0f) != 20.0f) + __builtin_abort (); + if (f (128, 100.f) != 700.0f) + __builtin_abort (); + if (f (16384, 3.0f) != 42.0f) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_2_run.c new file mode 100644 index 00000000000..ad2c9c874b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_2_run.c @@ -0,0 +1,45 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -ffast-math" } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -ffast-math -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_2.c" + +extern void abort (void); + +#define TEST_LOOP(ARGTYPE,INDUCTYPE)\ +{\ + INDUCTYPE res = spec_loop_##ARGTYPE##INDUCTYPE (0xFF, 0xAE, true);\ + if (res != 0xAE)\ + abort ();\ +}\ + +#define TEST_FP_LOOP(ARGTYPE,INDUCTYPE,FPTYPE)\ +{\ + FPTYPE res = spec_fp_loop_##ARGTYPE##INDUCTYPE##FPTYPE (0xFF, 0xAE, true);\ + if (res != 0xAE)\ + abort ();\ +}\ + +int main () +{ + TEST_LOOP (uint8_t, uint8_t); + TEST_LOOP (uint16_t, uint16_t); + TEST_LOOP (uint32_t, uint32_t); + TEST_LOOP (uint64_t, uint64_t); + TEST_LOOP (int32_t, int32_t); + TEST_LOOP (int64_t, int64_t); + + TEST_LOOP (uint16_t, uint8_t) + + TEST_LOOP (uint32_t, uint8_t) + TEST_LOOP (uint32_t, uint16_t) + + TEST_LOOP (uint64_t, uint8_t) + TEST_LOOP (uint64_t, uint16_t) + TEST_LOOP (uint64_t, uint32_t) + + TEST_FP_LOOP (uint32_t, uint32_t, float) + TEST_FP_LOOP (uint64_t, uint64_t, double) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c new file mode 100644 index 00000000000..25f3047444e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_3.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with different sizes and no data references . + Cannot be vectorized. */ + +#define SPEC_FP_LOOP(ARGTYPE,INDUCTYPE,FPTYPE)\ +FPTYPE spec_fp_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit)\ +{\ + INDUCTYPE i = 0;\ + FPTYPE f = 0.0;\ + while ((i & mask) != limit)\ + {\ + f += 1;\ + i += 1;\ + }\ + return f;\ +}\ + +SPEC_FP_LOOP (uint32_t, uint32_t, double) + +/* { dg-final { scan-tree-dump-times "Not vectorized: Multiple ncopies not supported" 1 "vect" } } */ +/* { dg-final { scan-assembler-not "brka\tp\[0-9\]*.b, p\[0-9\]*\/z, p\[0-9\]*.b" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_4.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_4.c new file mode 100644 index 00000000000..32b8c71c92a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_4.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with a load. */ + +#define SPEC_LOOP(ARGTYPE,INDUCTYPE)\ +INDUCTYPE spec_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit, ARGTYPE * array)\ +{\ + uint64_t i = 0;\ + INDUCTYPE r = 0;\ + while ((i & mask) != limit)\ + {\ + r = array[i];\ + i++;\ + }\ + return r;\ +} + +#define SPEC_FP_LOOP(ARGTYPE,FPTYPE)\ +FPTYPE spec_fp_loop_##ARGTYPE##FPTYPE (ARGTYPE mask, ARGTYPE limit, FPTYPE * array)\ +{\ + uint64_t i = 0;\ + FPTYPE f = 0.0;\ + while ((i & mask) != limit)\ + {\ + f = array[i];\ + i++;\ + }\ + return f;\ +} + +SPEC_LOOP (uint8_t, uint8_t) +SPEC_LOOP (uint16_t, uint16_t) +SPEC_LOOP (uint32_t, uint32_t) +SPEC_LOOP (uint64_t, uint64_t) + +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) + +/* Conversions. */ +SPEC_LOOP (uint16_t, uint8_t) + +SPEC_LOOP (uint32_t, uint8_t) +SPEC_LOOP (uint32_t, uint16_t) + +SPEC_LOOP (uint64_t, uint8_t) +SPEC_LOOP (uint64_t, uint16_t) +SPEC_LOOP (uint64_t, uint32_t) + +SPEC_FP_LOOP (uint32_t, float) +SPEC_FP_LOOP (uint64_t, double) + +SPEC_FP_LOOP (uint64_t, float) + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 17 "vect" } } */ +/* { dg-final { scan-assembler-times {\tbrka\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b} 17 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h} 3 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d} 5 } } */ +/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_4_run.c new file mode 100644 index 00000000000..96834ba51be --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_4_run.c @@ -0,0 +1,56 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -ffast-math" } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -ffast-math -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_4.c" + +extern void abort (void); +#include <string.h> + +#define MAX 0xAE + +#define TEST_LOOP(ARGTYPE,INDUCTYPE)\ +{\ + ARGTYPE array[MAX];\ + memset (array, 0, sizeof (ARGTYPE) * MAX);\ + array[MAX - 1] = 72;\ + INDUCTYPE res = spec_loop_##ARGTYPE##INDUCTYPE (0xFF, MAX, array);\ + if (res != 72)\ + abort ();\ +} + +#define TEST_FP_LOOP(ARGTYPE,FPTYPE)\ +{\ + FPTYPE array[MAX];\ + memset (array, 0, sizeof (FPTYPE) * MAX);\ + array[MAX - 1] = 54.5;\ + FPTYPE res = spec_fp_loop_##ARGTYPE##FPTYPE (0xFF, MAX, array);\ + if (res != 54.5)\ + abort ();\ +} + +int main () +{ + TEST_LOOP (uint8_t, uint8_t); + TEST_LOOP (uint16_t, uint16_t); + TEST_LOOP (uint32_t, uint32_t); + TEST_LOOP (uint64_t, uint64_t); + TEST_LOOP (int32_t, int32_t); + TEST_LOOP (int64_t, int64_t); + + TEST_LOOP (uint16_t, uint8_t) + + TEST_LOOP (uint32_t, uint8_t) + TEST_LOOP (uint32_t, uint16_t) + + TEST_LOOP (uint64_t, uint8_t) + TEST_LOOP (uint64_t, uint16_t) + TEST_LOOP (uint64_t, uint32_t) + + TEST_FP_LOOP (uint32_t, float) + TEST_FP_LOOP (uint64_t, double) + + TEST_FP_LOOP (uint64_t, float) + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_5.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5.c new file mode 100644 index 00000000000..d1d8f8fbaaa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with a load. Exit condition in the array. */ + +#ifndef EXIT_CONDITION +#define EXIT_CONDITION 1 +#endif + +#define SPEC_LOOP(ARGTYPE)\ +ARGTYPE spec_loop_##ARGTYPE (ARGTYPE * array)\ +{\ + ARGTYPE i = 0;\ + ARGTYPE r = EXIT_CONDITION + 1;\ + while (r != EXIT_CONDITION)\ + {\ + r = array[i];\ + i++;\ + }\ + return i;\ +} + +#define SPEC_FP_LOOP(FPTYPE, ARGTYPE)\ +ARGTYPE spec_loop_##ARGTYPE##FPTYPE (FPTYPE * array)\ +{\ + ARGTYPE i = 0;\ + ARGTYPE r = EXIT_CONDITION + 1;\ + while (r != EXIT_CONDITION)\ + {\ + r = array[i];\ + i++;\ + }\ + return i;\ +} + +/* TODO: Cannot yet vectorize due to gather load. */ +SPEC_LOOP (int8_t) +SPEC_LOOP (int16_t) + +SPEC_LOOP (int32_t) +SPEC_LOOP (int64_t) +SPEC_FP_LOOP (float, int32_t) +SPEC_FP_LOOP (double, int64_t) + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ +/* { dg-final { scan-assembler-times {\tbrka\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b} 4 } } */ +/* { dg-final { scan-assembler-not {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b} } } */ +/* { dg-final { scan-assembler-not {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h} } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-not {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s} } } */ +/* { dg-final { scan-assembler-not {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run.c new file mode 100644 index 00000000000..a8f7f9fff17 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run.c @@ -0,0 +1,104 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_5.c" + +#define _GNU_SOURCE +#include <sys/mman.h> +extern void abort (void); +extern void *mremap (void *old_address, size_t old_size, + size_t new_size, int flags, ... /* void *new_address */); + +#ifndef FILL_DATA +#define FILL_DATA 0 +#endif + +#ifndef LOOP_COUNTS +#define LOOP_COUNTS {22,20,13,17,29,19} +#endif +int loop_counts[] = LOOP_COUNTS; + +/* Program will fault if memory beyond the boundaries of BUF is accessed. */ + +#define SPACE_SIZE 4096*sizeof(int) + +/* Enable to confirm program segfaults when accessing outside of BUF. */ +#ifdef CHECK_SEGFAULT +#define ADDITIONAL 1 +#else +#define ADDITIONAL 0 +#endif + +/* BUF is an array of NUM_ELEMENTS size. + BUF_PRE points to 4 elements before BUF. + Before calling SPEC_LOOP, set the last element of BUF and the + four elements of BUF_PRE to the exit condition. + Fill the rest of BUF to the fill data. */ + +#define TEST_SPEC_LOOP_FUNC(ARGTYPE)\ +void test_spec_loop_##ARGTYPE (void *bufend, ARGTYPE num_elements)\ +{\ + int i;\ + ARGTYPE* buf = ((ARGTYPE*)bufend) - num_elements;\ + ARGTYPE* buf_pre = ((ARGTYPE*)bufend) - num_elements - 4;\ + for (i=0; i<num_elements-1; i++)\ + buf[i] = FILL_DATA;\ + buf[num_elements - 1 + ADDITIONAL] = EXIT_CONDITION;\ + for (i=0; i<4; i++)\ + buf_pre[i] = EXIT_CONDITION;\ + ARGTYPE ret = spec_loop_##ARGTYPE (buf);\ + if (ret != num_elements)\ + abort ();\ +} + +#define TEST_SPEC_FP_LOOP_FUNC(FPTYPE, ARGTYPE)\ +void test_spec_loop_##ARGTYPE##FPTYPE (void *bufend, ARGTYPE num_elements)\ +{\ + int i;\ + FPTYPE* buf = ((FPTYPE*)bufend) - num_elements;\ + FPTYPE* buf_pre = ((FPTYPE*)bufend) - num_elements - 4;\ + for (i=0; i<num_elements-1; i++)\ + buf[i] = FILL_DATA;\ + buf[num_elements - 1 + ADDITIONAL] = EXIT_CONDITION;\ + for (i=0; i<4; i++)\ + buf_pre[i] = EXIT_CONDITION;\ + ARGTYPE ret = spec_loop_##ARGTYPE##FPTYPE (buf);\ + if (ret != num_elements)\ + abort ();\ +} + +TEST_SPEC_LOOP_FUNC (int8_t) +TEST_SPEC_LOOP_FUNC (int16_t) +TEST_SPEC_LOOP_FUNC (int32_t) +TEST_SPEC_LOOP_FUNC (int64_t) +TEST_SPEC_FP_LOOP_FUNC (float, int32_t) +TEST_SPEC_FP_LOOP_FUNC (double, int64_t) + +int main (void) +{ + /* Map in two pages worth of space. Then reduce it down to a single page. + This will result in the second page of data being unmapped - ie it + will cause a segfault if accessed. */ + + void *space = mmap (0, SPACE_SIZE * 2, PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0); + if (space == (void*)-1) + abort (); + + void *space_new = mremap (space, SPACE_SIZE * 2, SPACE_SIZE, 0); + if (space != space_new) + abort (); + + /* set END to the start of the second (unmapped) page. */ + char *end = space + SPACE_SIZE; + + test_spec_loop_int8_t (end, loop_counts[0]); + test_spec_loop_int16_t (end, loop_counts[1]); + test_spec_loop_int32_t (end, loop_counts[2]); + test_spec_loop_int64_t (end, loop_counts[3]); + test_spec_loop_int32_tfloat (end, loop_counts[4]); + test_spec_loop_int64_tdouble (end, loop_counts[5]); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run_2.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run_2.c new file mode 100644 index 00000000000..ed12336f47d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run_2.c @@ -0,0 +1,8 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +/* Use exit condition of 0. */ +#define EXIT_CONDITION 0 +#define FILL_DATA 1 +#include "sve_speculative_5_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run_3.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run_3.c new file mode 100644 index 00000000000..c6a5edf86b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_5_run_3.c @@ -0,0 +1,9 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +/* Use exit condition of 0 and less than a single iteration. */ +#define EXIT_CONDITION 0 +#define FILL_DATA 1 +#define LOOP_COUNTS {3,5,3,1,5,1} +#include "sve_speculative_5_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c new file mode 100644 index 00000000000..4765b22f014 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_6.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with a conditional load. */ + +#define SPEC_LOOP(ARGTYPE,INDUCTYPE)\ +INDUCTYPE spec_loop_##ARGTYPE##INDUCTYPE (ARGTYPE mask, ARGTYPE limit,\ + ARGTYPE * array, ARGTYPE * cond)\ +{\ + uint64_t i = 0;\ + INDUCTYPE r = 0;\ + while ((i & mask) != limit)\ + {\ + if (cond[i])\ + r = array[i];\ + i++;\ + }\ + return r;\ +} + +SPEC_LOOP (uint8_t, uint8_t) +SPEC_LOOP (uint16_t, uint16_t) +SPEC_LOOP (uint32_t, uint32_t) +SPEC_LOOP (uint64_t, uint64_t) + +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) + +/* Conversions. */ +SPEC_LOOP (uint16_t, uint8_t) + +SPEC_LOOP (uint32_t, uint8_t) +SPEC_LOOP (uint32_t, uint16_t) + +SPEC_LOOP (uint64_t, uint8_t) +SPEC_LOOP (uint64_t, uint16_t) +SPEC_LOOP (uint64_t, uint32_t) + +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "Speculative loop mask load/stores not supported" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_7.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_7.c new file mode 100644 index 00000000000..0c2d62387e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_7.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with a load and a test. */ + +uint32_t +search (uint32_t *array) +{ + for (;;) + { + uint32_t x = *array++ >> 7; + if (x >= 200) + return x; + } +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-assembler-times {\tbrka\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b} 1 } } */ +/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_8.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_8.c new file mode 100644 index 00000000000..8c70e2f9012 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_8.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-inline -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with a load which requires multiple copies and a test. */ + +uint32_t +search (uint64_t *array) +{ + for (;;) + { + uint32_t x = *array++ >> 7; + if (x >= 200) + return x; + } +} + +/* { dg-final { scan-tree-dump "multiple copies not supported for speculative loops" "vect" } } */ +/* { dg-final { scan-tree-dump "not vectorized: relevant stmt not supported" "vect" } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_9.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_9.c new file mode 100644 index 00000000000..c21b44614c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_9.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -fdump-tree-vect-details -msve-vector-bits=256" } */ + +#include <stdint.h> + +/* Speculative loop with two loads which cannot both be aligned. */ + +#ifndef STRIDE_LEVEL +#define STRIDE_LEVEL 1 +#endif + +#define SPEC_LOOP(DATATYPE, ARGTYPE)\ +ARGTYPE spec_loop_##DATATYPE##_##ARGTYPE (DATATYPE *a, DATATYPE*b, DATATYPE n)\ +{\ + ARGTYPE i = -1;\ + do\ + i += 1;\ + while (a[i*STRIDE_LEVEL] + b[i*STRIDE_LEVEL] < n);\ + return i;\ +} + +/* TODO: Cannot yet vectorize due to gather load. */ +SPEC_LOOP (int8_t, int8_t) +SPEC_LOOP (int16_t, int16_t) + +SPEC_LOOP (int32_t, int32_t) +SPEC_LOOP (int64_t, int64_t) +SPEC_LOOP (float, int32_t) +SPEC_LOOP (double, int64_t) + + +/* { dg-final { scan-tree-dump-times "loop versioned for vectorization to enhance alignment" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_speculative_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_speculative_9_run.c new file mode 100644 index 00000000000..f9470020fd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_speculative_9_run.c @@ -0,0 +1,67 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve" } */ +/* { dg-options "-O3 -fno-common -ffast-math -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_speculative_9.c" + +extern void abort (void); + +#ifndef MAX_ARRAY_SIZE +#define MAX_ARRAY_SIZE 500 +#endif + +#ifndef FILL_DATA +#define FILL_DATA 0 +#endif + +#ifndef EXIT_CONDITION +#define EXIT_CONDITION 5 +#endif + +#ifndef LOOP_COUNTS +#define LOOP_COUNTS {37,45,55,17,39,43} +#endif +int loop_counts[] = LOOP_COUNTS; + +/* Fill the arrays with the exit conditions. + Then refill at the correct strided accesses with fill data up to the end of + the loop count. */ + +#define TEST_SPEC_LOOP_FUNC(DATATYPE, ARGTYPE)\ +void test_spec_loop_##DATATYPE##_##ARGTYPE (ARGTYPE num_elements)\ +{\ + DATATYPE a[MAX_ARRAY_SIZE];\ + DATATYPE b[MAX_ARRAY_SIZE];\ + int i;\ + for (i=0; i<MAX_ARRAY_SIZE; i++)\ + {\ + a[i] = EXIT_CONDITION;\ + b[i] = EXIT_CONDITION;\ + }\ + for (i=0; (i<num_elements-1)*STRIDE_LEVEL; i++)\ + {\ + a[i*STRIDE_LEVEL] = FILL_DATA;\ + b[i*STRIDE_LEVEL] = FILL_DATA;\ + }\ + ARGTYPE ret = spec_loop_##DATATYPE##_##ARGTYPE (a, b, EXIT_CONDITION);\ + if (ret != num_elements - 1)\ + abort ();\ +} + +TEST_SPEC_LOOP_FUNC (int8_t, int8_t) +TEST_SPEC_LOOP_FUNC (int16_t, int16_t) +TEST_SPEC_LOOP_FUNC (int32_t, int32_t) +TEST_SPEC_LOOP_FUNC (int64_t, int64_t) +TEST_SPEC_LOOP_FUNC (float, int32_t) +TEST_SPEC_LOOP_FUNC (double, int64_t) + +int main (void) +{ + test_spec_loop_int8_t_int8_t (loop_counts[0]); + test_spec_loop_int16_t_int16_t (loop_counts[1]); + test_spec_loop_int32_t_int32_t (loop_counts[2]); + test_spec_loop_int64_t_int64_t (loop_counts[3]); + test_spec_loop_float_int32_t (loop_counts[4]); + test_spec_loop_double_int64_t (loop_counts[5]); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_spill_1.c b/gcc/testsuite/gcc.target/aarch64/sve_spill_1.c new file mode 100644 index 00000000000..fffcaca53db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_spill_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +void consumer (void *); + +#define TEST_LOOP(TYPE, VAL) \ + void \ + double_loop_##TYPE (TYPE *x) \ + { \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL; \ + consumer (x); \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL; \ + consumer (x); \ + } + +TEST_LOOP (uint16_t, 511); +TEST_LOOP (uint32_t, 511); +TEST_LOOP (uint64_t, 511); + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, #511\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, #511\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #511\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_spill_2.c b/gcc/testsuite/gcc.target/aarch64/sve_spill_2.c new file mode 100644 index 00000000000..f09797c8a23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_spill_2.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +void consumer (void *); + +#define TEST_LOOP(TYPE) \ + void \ + multi_loop_##TYPE (TYPE *x, TYPE val) \ + { \ + for (int i = 0; i < 7; ++i) \ + x[i] += val; \ + consumer (x); \ + for (int i = 0; i < 7; ++i) \ + x[i] += val; \ + consumer (x); \ + for (int i = 0; i < 7; ++i) \ + x[i] += val; \ + consumer (x); \ + } + +/* One iteration is enough. */ +TEST_LOOP (uint8_t); +TEST_LOOP (uint16_t); +/* Two iterations are enough. Complete unrolling makes sense + even at -O2. */ +TEST_LOOP (uint32_t); +/* Four iterations are needed; ought to stay a loop. */ +TEST_LOOP (uint64_t); + +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.h} 3 } } */ +/* { dg-final { scan-assembler {\twhilelo\tp[0-9]\.s} } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.d} 6 } } */ +/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_spill_3.c b/gcc/testsuite/gcc.target/aarch64/sve_spill_3.c new file mode 100644 index 00000000000..e8d113a65a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_spill_3.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +void consumer (void *); + +#define TEST_LOOP(TYPE) \ + void \ + multi_loop_##TYPE (TYPE *x, TYPE val1, TYPE val2, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + x[i * 2] += val1; \ + x[i * 2 + 1] += val2; \ + } \ + consumer (x); \ + for (int i = 0; i < n; ++i) \ + { \ + x[i * 2] += val1; \ + x[i * 2 + 1] += val2; \ + } \ + consumer (x); \ + for (int i = 0; i < n; ++i) \ + { \ + x[i * 2] += val1; \ + x[i * 2 + 1] += val2; \ + } \ + consumer (x); \ + } + +/* One iteration is enough. */ +TEST_LOOP (uint8_t); +TEST_LOOP (uint16_t); +/* Two iterations are enough. Complete unrolling makes sense + even at -O2. */ +TEST_LOOP (uint32_t); +/* Four iterations are needed; ought to stay a loop. */ +TEST_LOOP (uint64_t); + +/* { dg-final { scan-assembler {\tld1b\tz[0-9]\.b} } } */ +/* { dg-final { scan-assembler {\tld1h\tz[0-9]\.h} } } */ +/* { dg-final { scan-assembler {\tld1w\tz[0-9]\.s} } } */ +/* { dg-final { scan-assembler {\tld1d\tz[0-9]\.d} } } */ +/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_spill_4.c b/gcc/testsuite/gcc.target/aarch64/sve_spill_4.c new file mode 100644 index 00000000000..1229dd43043 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_spill_4.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +void consumer (void *); + +#define TEST_LOOP(TYPE, VAL) \ + void \ + multi_loop_##TYPE (TYPE *x) \ + { \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL; \ + consumer (x); \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL; \ + consumer (x); \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL; \ + consumer (x); \ + } + +TEST_LOOP (uint16_t, 0x1234); +TEST_LOOP (uint32_t, 0x12345); +TEST_LOOP (uint64_t, 0x123456); + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.h,} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h,} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_spill_5.c b/gcc/testsuite/gcc.target/aarch64/sve_spill_5.c new file mode 100644 index 00000000000..05198fcec65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_spill_5.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include <stdint.h> + +void consumer (void *); + +#define TEST_LOOP(TYPE, VAL) \ + void \ + multi_loop_##TYPE (TYPE *x) \ + { \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL + i; \ + consumer (x); \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL + i; \ + consumer (x); \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL + i; \ + consumer (x); \ + } + +TEST_LOOP (uint8_t, 3); +TEST_LOOP (uint16_t, 4); +TEST_LOOP (uint32_t, 5); +TEST_LOOP (uint64_t, 6); +TEST_LOOP (float, 2.5f); +TEST_LOOP (double, 3.5); + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\..,} 18 } } */ +/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_spill_6.c b/gcc/testsuite/gcc.target/aarch64/sve_spill_6.c new file mode 100644 index 00000000000..b349b84a4a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_spill_6.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +void consumer (void *); + +#define TEST_LOOP(TYPE, VAL) \ + void \ + multi_loop_##TYPE (TYPE *x1, TYPE *x2, TYPE *x3, TYPE *x4, int which) \ + { \ + if (which) \ + { \ + for (int i = 0; i < 7; ++i) \ + x1[i] += VAL; \ + consumer (x1); \ + for (int i = 0; i < 7; ++i) \ + x2[i] -= VAL; \ + consumer (x2); \ + } \ + else \ + { \ + for (int i = 0; i < 7; ++i) \ + x3[i] &= VAL; \ + consumer (x3); \ + } \ + for (int i = 0; i < 7; ++i) \ + x4[i] |= VAL; \ + consumer (x4); \ + } + +TEST_LOOP (uint8_t, 0x12); +TEST_LOOP (uint16_t, 0x1234); +TEST_LOOP (uint32_t, 0x12345); +TEST_LOOP (uint64_t, 0x123456); + +/* { dg-final { scan-assembler {\tld1b\tz[0-9]+\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\tz[0-9]+\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+\.s,} } } */ +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+\.d,} } } */ +/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_spill_7.c b/gcc/testsuite/gcc.target/aarch64/sve_spill_7.c new file mode 100644 index 00000000000..38b061f4cc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_spill_7.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */ + +#include <stdint.h> + +void consumer (void *); + +#define TEST_LOOP(TYPE, VAL) \ + void \ + multi_loop_##TYPE (TYPE *x, int n) \ + { \ + for (int k = 0; k < 4; ++k) \ + { \ + for (int j = 0; j < n; ++j) \ + { \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL + i; \ + asm volatile (""); \ + } \ + for (int j = 0; j < n; ++j) \ + consumer (x); \ + for (int j = 0; j < n; ++j) \ + { \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL + i; \ + asm volatile (""); \ + } \ + consumer (x); \ + for (int i = 0; i < 100; ++i) \ + x[i] += VAL + i; \ + consumer (x); \ + } \ + } + +TEST_LOOP (uint8_t, 3); +TEST_LOOP (uint16_t, 4); +TEST_LOOP (uint32_t, 5); +TEST_LOOP (uint64_t, 6); +TEST_LOOP (float, 2.5f); +TEST_LOOP (double, 3.5); + +/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\..,} 18 } } */ +/* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ +/* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c new file mode 100644 index 00000000000..3e7367cd9fa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_store_scalar_offset_1.c @@ -0,0 +1,55 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int8_t v32qi __attribute__((vector_size(32))); + +void sve_store_64_z_lsl (uint64_t *a, unsigned long i) +{ + asm volatile ("" : "=w" (*(v4di *) &a[i])); +} + +void sve_store_64_s_lsl (int64_t *a, signed long i) +{ + asm volatile ("" : "=w" (*(v4di *) &a[i])); +} + +void sve_store_32_z_lsl (uint32_t *a, unsigned long i) +{ + asm volatile ("" : "=w" (*(v8si *) &a[i])); +} + +void sve_store_32_s_lsl (int32_t *a, signed long i) +{ + asm volatile ("" : "=w" (*(v8si *) &a[i])); +} + +void sve_store_16_z_lsl (uint16_t *a, unsigned long i) +{ + asm volatile ("" : "=w" (*(v16hi *) &a[i])); +} + +void sve_store_16_s_lsl (int16_t *a, signed long i) +{ + asm volatile ("" : "=w" (*(v16hi *) &a[i])); +} + +/* ??? The other argument order leads to a redundant move. */ +void sve_store_8_z (unsigned long i, uint8_t *a) +{ + asm volatile ("" : "=w" (*(v32qi *) &a[i])); +} + +void sve_store_8_s (signed long i, int8_t *a) +{ + asm volatile ("" : "=w" (*(v32qi *) &a[i])); +} + +/* { dg-final { scan-assembler-times {\tst1d\tz0\.d, p[0-7], \[x0, x1, lsl 3\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz0\.s, p[0-7], \[x0, x1, lsl 2\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz0\.h, p[0-7], \[x0, x1, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz0\.b, p[0-7], \[x1, x0\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c new file mode 100644 index 00000000000..bb23f9886c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_1.c @@ -0,0 +1,116 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef char v32qi __attribute__((vector_size(32))); +typedef struct { v32qi a[2]; } v64qi; + +typedef short v16hi __attribute__((vector_size(32))); +typedef struct { v16hi a[2]; } v32hi; + +typedef int v8si __attribute__((vector_size(32))); +typedef struct { v8si a[2]; } v16si; + +typedef long v4di __attribute__((vector_size(32))); +typedef struct { v4di a[2]; } v8di; + +typedef float v8sf __attribute__((vector_size(32))); +typedef struct { v8sf a[2]; } v16sf; + +typedef double v4df __attribute__((vector_size(32))); +typedef struct { v4df a[2]; } v8df; + +#define TEST_TYPE(TYPE, REG1, REG2) \ + void \ + f1_##TYPE (TYPE *a) \ + { \ + register TYPE x asm (#REG1) = a[0]; \ + asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \ + register TYPE y asm (#REG2) = x; \ + asm volatile ("# test " #TYPE " 2 %0, %1, %2" \ + : "=&w" (x) : "0" (x), "w" (y)); \ + a[1] = x; \ + } \ + /* This must compile, but we don't care how. */ \ + void \ + f2_##TYPE (TYPE *a) \ + { \ + TYPE x = a[0]; \ + x.a[0][3] = 1; \ + x.a[1][2] = 12; \ + asm volatile ("# %0" :: "w" (x)); \ + } \ + void \ + f3_##TYPE (TYPE *a, int i) \ + { \ + TYPE x = a[0]; \ + x.a[0][i] = 1; \ + asm volatile ("# %0" :: "w" (x)); \ + } \ + void \ + f4_##TYPE (TYPE *a, int i, int j) \ + { \ + TYPE x = a[0]; \ + x.a[i][j] = 44; \ + asm volatile ("# %0" :: "w" (x)); \ + } + +TEST_TYPE (v64qi, z0, z2) +TEST_TYPE (v32hi, z5, z7) +TEST_TYPE (v16si, z10, z12) +TEST_TYPE (v8di, z15, z17) +TEST_TYPE (v16sf, z20, z23) +TEST_TYPE (v8df, z28, z30) + +/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v64qi 1 z0\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z0.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z1.d\n} } } */ +/* { dg-final { scan-assembler { test v64qi 2 z0, z0, z2\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1h\tz5.h, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v32hi 1 z5\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz7.d, z5.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz8.d, z6.d\n} } } */ +/* { dg-final { scan-assembler { test v32hi 2 z5, z5, z7\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz5.h, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1w\tz10.s, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz11.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v16si 1 z10\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz12.d, z10.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz13.d, z11.d\n} } } */ +/* { dg-final { scan-assembler { test v16si 2 z10, z10, z12\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz10.s, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz11.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1d\tz15.d, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v8di 1 z15\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z15.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz18.d, z16.d\n} } } */ +/* { dg-final { scan-assembler { test v8di 2 z15, z15, z17\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz15.d, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v16sf 1 z20\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */ +/* { dg-final { scan-assembler { test v16sf 2 z20, z20, z23\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz29.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v8df 1 z28\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz30.d, z28.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz31.d, z29.d\n} } } */ +/* { dg-final { scan-assembler { test v8df 2 z28, z28, z30\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz29.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c new file mode 100644 index 00000000000..d36aa75483a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_2.c @@ -0,0 +1,111 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef char v32qi __attribute__((vector_size(32))); +typedef struct { v32qi a[3]; } v96qi; + +typedef short v16hi __attribute__((vector_size(32))); +typedef struct { v16hi a[3]; } v48hi; + +typedef int v8si __attribute__((vector_size(32))); +typedef struct { v8si a[3]; } v24si; + +typedef long v4di __attribute__((vector_size(32))); +typedef struct { v4di a[3]; } v12di; + +typedef float v8sf __attribute__((vector_size(32))); +typedef struct { v8sf a[3]; } v24sf; + +typedef double v4df __attribute__((vector_size(32))); +typedef struct { v4df a[3]; } v12df; + +#define TEST_TYPE(TYPE, REG1, REG2) \ + void \ + f_##TYPE (TYPE *a) \ + { \ + register TYPE x asm (#REG1) = a[0]; \ + asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \ + register TYPE y asm (#REG2) = x; \ + asm volatile ("# test " #TYPE " 2 %0, %1, %2" \ + : "=&w" (x) : "0" (x), "w" (y)); \ + a[1] = x; \ + } + +TEST_TYPE (v96qi, z0, z3) +TEST_TYPE (v48hi, z6, z2) +TEST_TYPE (v24si, z12, z15) +TEST_TYPE (v12di, z16, z13) +TEST_TYPE (v24sf, z20, z23) +TEST_TYPE (v12df, z26, z29) + +/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v96qi 1 z0\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z0.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z1.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz5.d, z2.d\n} } } */ +/* { dg-final { scan-assembler { test v96qi 2 z0, z0, z3\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v48hi 1 z6\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */ +/* { dg-final { scan-assembler { test v48hi 2 z6, z6, z2\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1w\tz12.s, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v24si 1 z12\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz15.d, z12.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z13.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z14.d\n} } } */ +/* { dg-final { scan-assembler { test v24si 2 z12, z12, z15\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1d\tz16.d, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz17.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v12di 1 z16\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz13.d, z16.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz14.d, z17.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz15.d, z18.d\n} } } */ +/* { dg-final { scan-assembler { test v12di 2 z16, z16, z13\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz16.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v24sf 1 z20\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz23.d, z20.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz24.d, z21.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz25.d, z22.d\n} } } */ +/* { dg-final { scan-assembler { test v24sf 2 z20, z20, z23\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz28.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v12df 1 z26\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz29.d, z26.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz30.d, z27.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz31.d, z28.d\n} } } */ +/* { dg-final { scan-assembler { test v12df 2 z26, z26, z29\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz27.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz28.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c new file mode 100644 index 00000000000..d97d6973359 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_move_3.c @@ -0,0 +1,129 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef char v32qi __attribute__((vector_size(32))); +typedef struct { v32qi a[4]; } v128qi; + +typedef short v16hi __attribute__((vector_size(32))); +typedef struct { v16hi a[4]; } v64hi; + +typedef int v8si __attribute__((vector_size(32))); +typedef struct { v8si a[4]; } v32si; + +typedef long v4di __attribute__((vector_size(32))); +typedef struct { v4di a[4]; } v16di; + +typedef float v8sf __attribute__((vector_size(32))); +typedef struct { v8sf a[4]; } v32sf; + +typedef double v4df __attribute__((vector_size(32))); +typedef struct { v4df a[4]; } v16df; + +#define TEST_TYPE(TYPE, REG1, REG2) \ + void \ + f_##TYPE (TYPE *a) \ + { \ + register TYPE x asm (#REG1) = a[0]; \ + asm volatile ("# test " #TYPE " 1 %0" :: "w" (x)); \ + register TYPE y asm (#REG2) = x; \ + asm volatile ("# test " #TYPE " 2 %0, %1, %2" \ + : "=&w" (x) : "0" (x), "w" (y)); \ + a[1] = x; \ + } + +TEST_TYPE (v128qi, z0, z4) +TEST_TYPE (v64hi, z6, z2) +TEST_TYPE (v32si, z12, z16) +TEST_TYPE (v16di, z17, z13) +TEST_TYPE (v32sf, z20, z16) +TEST_TYPE (v16df, z24, z28) + +/* { dg-final { scan-assembler {\tld1b\tz0.b, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1b\tz1.b, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1b\tz2.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1b\tz3.b, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v128qi 1 z0\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z0.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz5.d, z1.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz6.d, z2.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz7.d, z3.d\n} } } */ +/* { dg-final { scan-assembler { test v128qi 2 z0, z0, z4\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz0.b, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz1.b, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz2.b, p[0-7], \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1b\tz3.b, p[0-7], \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1h\tz6.h, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz7.h, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz8.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1h\tz9.h, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v64hi 1 z6\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz2.d, z6.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz3.d, z7.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz4.d, z8.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz5.d, z9.d\n} } } */ +/* { dg-final { scan-assembler { test v64hi 2 z6, z6, z2\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz6.h, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz7.h, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz8.h, p[0-7], \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1h\tz9.h, p[0-7], \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1w\tz12.s, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz13.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz14.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz15.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v32si 1 z12\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z12.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z13.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz18.d, z14.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz19.d, z15.d\n} } } */ +/* { dg-final { scan-assembler { test v32si 2 z12, z12, z16\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz12.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz13.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz14.s, p[0-7], \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz15.s, p[0-7], \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1d\tz17.d, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz18.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz19.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz20.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v16di 1 z17\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz13.d, z17.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz14.d, z18.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz15.d, z19.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */ +/* { dg-final { scan-assembler { test v16di 2 z17, z17, z13\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz17.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz18.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz19.d, p[0-7], \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz20.d, p[0-7], \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1w\tz20.s, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz21.s, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz22.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1w\tz23.s, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v32sf 1 z20\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz16.d, z20.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz17.d, z21.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz18.d, z22.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz19.d, z23.d\n} } } */ +/* { dg-final { scan-assembler { test v32sf 2 z20, z20, z16\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz20.s, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz21.s, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz22.s, p[0-7], \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1w\tz23.s, p[0-7], \[x0, #7, mul vl\]\n} } } */ + +/* { dg-final { scan-assembler {\tld1d\tz24.d, p[0-7]/z, \[x0\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz25.d, p[0-7]/z, \[x0, #1, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz26.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tld1d\tz27.d, p[0-7]/z, \[x0, #3, mul vl\]\n} } } */ +/* { dg-final { scan-assembler { test v16df 1 z24\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz28.d, z24.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz29.d, z25.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz30.d, z26.d\n} } } */ +/* { dg-final { scan-assembler {\tmov\tz31.d, z27.d\n} } } */ +/* { dg-final { scan-assembler { test v16df 2 z24, z24, z28\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz24.d, p[0-7], \[x0, #4, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz25.d, p[0-7], \[x0, #5, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz26.d, p[0-7], \[x0, #6, mul vl\]\n} } } */ +/* { dg-final { scan-assembler {\tst1d\tz27.d, p[0-7], \[x0, #7, mul vl\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c new file mode 100644 index 00000000000..6d7b5fecbce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1.c @@ -0,0 +1,89 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#ifndef TYPE +#define TYPE unsigned char +#endif + +#ifndef NAME +#define NAME(X) X +#endif + +#define N 1024 + +void __attribute__ ((noinline, noclone)) +NAME(f2) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c) +{ + for (int i = 0; i < N; ++i) + { + a[i] = c[i * 2]; + b[i] = c[i * 2 + 1]; + } +} + +void __attribute__ ((noinline, noclone)) +NAME(f3) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d) +{ + for (int i = 0; i < N; ++i) + { + a[i] = d[i * 3]; + b[i] = d[i * 3 + 1]; + c[i] = d[i * 3 + 2]; + } +} + +void __attribute__ ((noinline, noclone)) +NAME(f4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d, TYPE *__restrict e) +{ + for (int i = 0; i < N; ++i) + { + a[i] = e[i * 4]; + b[i] = e[i * 4 + 1]; + c[i] = e[i * 4 + 2]; + d[i] = e[i * 4 + 3]; + } +} + +void __attribute__ ((noinline, noclone)) +NAME(g2) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c) +{ + for (int i = 0; i < N; ++i) + { + c[i * 2] = a[i]; + c[i * 2 + 1] = b[i]; + } +} + +void __attribute__ ((noinline, noclone)) +NAME(g3) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d) +{ + for (int i = 0; i < N; ++i) + { + d[i * 3] = a[i]; + d[i * 3 + 1] = b[i]; + d[i * 3 + 2] = c[i]; + } +} + +void __attribute__ ((noinline, noclone)) +NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d, TYPE *__restrict e) +{ + for (int i = 0; i < N; ++i) + { + e[i * 4] = a[i]; + e[i * 4 + 1] = b[i]; + e[i * 4 + 2] = c[i]; + e[i * 4 + 3] = d[i]; + } +} + +/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c new file mode 100644 index 00000000000..7ae718ada2c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned long +#define ITYPE long +#include "sve_struct_vect_7.c" + +/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c new file mode 100644 index 00000000000..5ab3ff68bda --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_10_run.c @@ -0,0 +1,6 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned long +#define ITYPE long +#include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c new file mode 100644 index 00000000000..6771938131b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE float +#define ITYPE int +#include "sve_struct_vect_7.c" + +/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c new file mode 100644 index 00000000000..f9c129801fc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_11_run.c @@ -0,0 +1,6 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE float +#define ITYPE int +#include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c new file mode 100644 index 00000000000..37c11b3b29a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE double +#define ITYPE long +#include "sve_struct_vect_7.c" + +/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c new file mode 100644 index 00000000000..c7ed3fe2806 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_12_run.c @@ -0,0 +1,6 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE double +#define ITYPE long +#include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c new file mode 100644 index 00000000000..3e3b9d733e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_13.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ + +#define TYPE unsigned char +#define NAME(X) qi_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE unsigned short +#define NAME(X) hi_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE unsigned int +#define NAME(X) si_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE unsigned long +#define NAME(X) di_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE float +#define NAME(X) sf_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +#define TYPE double +#define NAME(X) df_##X +#include "sve_struct_vect_1.c" +#undef NAME +#undef TYPE + +/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c new file mode 100644 index 00000000000..c3e81f500e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_14.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=512" } */ + +#include "sve_struct_vect_13.c" + +/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c new file mode 100644 index 00000000000..635910e11a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_15.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=1024" } */ + +#include "sve_struct_vect_13.c" + +/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c new file mode 100644 index 00000000000..9afc0708fb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_16.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=2048" } */ + +#include "sve_struct_vect_13.c" + +/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c new file mode 100644 index 00000000000..80c99961791 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define N 2000 + +#define TEST_LOOP(NAME, TYPE) \ + void __attribute__((weak)) \ + NAME (TYPE *restrict dest, TYPE *restrict src) \ + { \ + for (int i = 0; i < N; ++i) \ + dest[i] += src[i * 2]; \ + } + +#define TEST(NAME) \ + TEST_LOOP (NAME##_i8, signed char) \ + TEST_LOOP (NAME##_i16, unsigned short) \ + TEST_LOOP (NAME##_f32, float) \ + TEST_LOOP (NAME##_f64, double) + +TEST (test) + +/* Check the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ + +/* Check the scalar tail. */ +/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + +/* The only branches should be in the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c new file mode 100644 index 00000000000..970c6de6f08 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_17_run.c @@ -0,0 +1,32 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_17.c" + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 2]; \ + for (int i = 0; i < N; ++i) \ + out[i] = i * 7 / 2; \ + for (int i = 0; i < N * 2; ++i) \ + in[i] = i * 9 / 2; \ + NAME (out, in); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2 + in[i * 2]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c new file mode 100644 index 00000000000..90e0b53c7df --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define N 2000 + +#define TEST_LOOP(NAME, TYPE) \ + void __attribute__((weak)) \ + NAME (TYPE *restrict dest, TYPE *restrict src) \ + { \ + for (int i = 0; i < N; ++i) \ + dest[i] += src[i * 4]; \ + } + +#define TEST(NAME) \ + TEST_LOOP (NAME##_i8, signed char) \ + TEST_LOOP (NAME##_i16, unsigned short) \ + TEST_LOOP (NAME##_f32, float) \ + TEST_LOOP (NAME##_f64, double) + +TEST (test) + +/* Check the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ + +/* Check the scalar tail. */ +/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + +/* The only branches should be in the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c new file mode 100644 index 00000000000..f7db5aea413 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_18_run.c @@ -0,0 +1,32 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_18.c" + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 4]; \ + for (int i = 0; i < N; ++i) \ + out[i] = i * 7 / 2; \ + for (int i = 0; i < N * 4; ++i) \ + in[i] = i * 9 / 2; \ + NAME (out, in); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2 + in[i * 4]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c new file mode 100644 index 00000000000..3430459a2f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, TYPE) \ + void __attribute__((weak)) \ + NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i] += src[i * 2]; \ + } + +#define TEST(NAME) \ + TEST_LOOP (NAME##_i8, signed char) \ + TEST_LOOP (NAME##_i16, unsigned short) \ + TEST_LOOP (NAME##_f32, float) \ + TEST_LOOP (NAME##_f64, double) + +TEST (test) + +/* Check the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld2d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ + +/* Check the scalar tail. */ +/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + +/* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c new file mode 100644 index 00000000000..94593cef684 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_19_run.c @@ -0,0 +1,41 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_19.c" + +volatile int x; + +#define N 1000 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 2]; \ + int counts[] = { 0, 1, N - 1 }; \ + for (int j = 0; j < 3; ++j) \ + { \ + int count = counts[j]; \ + for (int i = 0; i < N; ++i) \ + out[i] = i * 7 / 2; \ + for (int i = 0; i < N * 2; ++i) \ + in[i] = i * 9 / 2; \ + NAME (out, in, count); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2; \ + if (i < count) \ + expected += in[i * 2]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + x += 1; \ + } \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c new file mode 100644 index 00000000000..1f99c676586 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_1_run.c @@ -0,0 +1,65 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_1.c" + +extern void abort() __attribute__((noreturn)); + +TYPE a[N], b[N], c[N], d[N], e[N * 4]; + +void __attribute__ ((noinline, noclone)) +init_array (TYPE *array, int n, TYPE base, TYPE step) +{ + for (int i = 0; i < n; ++i) + array[i] = base + step * i; +} + +void __attribute__ ((noinline, noclone)) +check_array (TYPE *array, int n, TYPE base, TYPE step) +{ + for (int i = 0; i < n; ++i) + if (array[i] != (TYPE) (base + step * i)) + abort (); +} + +int +main (void) +{ + init_array (e, 2 * N, 11, 5); + f2 (a, b, e); + check_array (a, N, 11, 10); + check_array (b, N, 16, 10); + + init_array (e, 3 * N, 7, 6); + f3 (a, b, c, e); + check_array (a, N, 7, 18); + check_array (b, N, 13, 18); + check_array (c, N, 19, 18); + + init_array (e, 4 * N, 4, 11); + f4 (a, b, c, d, e); + check_array (a, N, 4, 44); + check_array (b, N, 15, 44); + check_array (c, N, 26, 44); + check_array (d, N, 37, 44); + + init_array (a, N, 2, 8); + init_array (b, N, 6, 8); + g2 (a, b, e); + check_array (e, 2 * N, 2, 4); + + init_array (a, N, 4, 15); + init_array (b, N, 9, 15); + init_array (c, N, 14, 15); + g3 (a, b, c, e); + check_array (e, 3 * N, 4, 5); + + init_array (a, N, 14, 36); + init_array (b, N, 23, 36); + init_array (c, N, 32, 36); + init_array (d, N, 41, 36); + g4 (a, b, c, d, e); + check_array (e, 4 * N, 14, 9); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c new file mode 100644 index 00000000000..8e5a96361f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned short +#include "sve_struct_vect_1.c" + +/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c new file mode 100644 index 00000000000..aad0e104379 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, TYPE) \ + void __attribute__((weak)) \ + NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i] += src[i * 4]; \ + } + +#define TEST(NAME) \ + TEST_LOOP (NAME##_i8, signed char) \ + TEST_LOOP (NAME##_i16, unsigned short) \ + TEST_LOOP (NAME##_f32, float) \ + TEST_LOOP (NAME##_f64, double) + +TEST (test) + +/* Check the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld4d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ + +/* Check the scalar tail. */ +/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + +/* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c new file mode 100644 index 00000000000..3be63364455 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_20_run.c @@ -0,0 +1,41 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_20.c" + +volatile int x; + +#define N 1000 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 4]; \ + int counts[] = { 0, 1, N - 1 }; \ + for (int j = 0; j < 3; ++j) \ + { \ + int count = counts[j]; \ + for (int i = 0; i < N; ++i) \ + out[i] = i * 7 / 2; \ + for (int i = 0; i < N * 4; ++i) \ + in[i] = i * 9 / 2; \ + NAME (out, in, count); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2; \ + if (i < count) \ + expected += in[i * 4]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + x += 1; \ + } \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c new file mode 100644 index 00000000000..ac3a7dd2383 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define N 2000 + +#define TEST_LOOP(NAME, TYPE) \ + void __attribute__((weak)) \ + NAME (TYPE *restrict dest, TYPE *restrict src) \ + { \ + for (int i = 0; i < N; ++i) \ + dest[i] += src[i * 3]; \ + } + +#define TEST(NAME) \ + TEST_LOOP (NAME##_i8, signed char) \ + TEST_LOOP (NAME##_i16, unsigned short) \ + TEST_LOOP (NAME##_f32, float) \ + TEST_LOOP (NAME##_f64, double) + +TEST (test) + +/* Check the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ + +/* Check the scalar tail. */ +/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + +/* The only branches should be in the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c new file mode 100644 index 00000000000..94d72d1835a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_21_run.c @@ -0,0 +1,32 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_21.c" + +volatile int x; + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 3]; \ + for (int i = 0; i < N; ++i) \ + out[i] = i * 7 / 2; \ + for (int i = 0; i < N * 3; ++i) \ + in[i] = i * 9 / 2; \ + NAME (out, in); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2 + in[i * 3]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + x += 1; \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c new file mode 100644 index 00000000000..c17766c7d23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TEST_LOOP(NAME, TYPE) \ + void __attribute__((weak)) \ + NAME (TYPE *restrict dest, TYPE *restrict src, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i] += src[i * 3]; \ + } + +#define TEST(NAME) \ + TEST_LOOP (NAME##_i8, signed char) \ + TEST_LOOP (NAME##_i16, unsigned short) \ + TEST_LOOP (NAME##_f32, float) \ + TEST_LOOP (NAME##_f64, double) + +TEST (test) + +/* Check the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1b\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1h\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\t} 1 } } */ + +/* Check the scalar tail. */ +/* { dg-final { scan-assembler-times {\tldrb\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrb\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldrh\tw} 2 } } */ +/* { dg-final { scan-assembler-times {\tstrh\tw} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + +/* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c new file mode 100644 index 00000000000..550364b16d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_22_run.c @@ -0,0 +1,41 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_22.c" + +volatile int x; + +#define N 1000 + +#undef TEST_LOOP +#define TEST_LOOP(NAME, TYPE) \ + { \ + TYPE out[N]; \ + TYPE in[N * 3]; \ + int counts[] = { 0, 1, N - 1 }; \ + for (int j = 0; j < 3; ++j) \ + { \ + int count = counts[j]; \ + for (int i = 0; i < N; ++i) \ + out[i] = i * 7 / 2; \ + for (int i = 0; i < N * 3; ++i) \ + in[i] = i * 9 / 2; \ + NAME (out, in, count); \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 7 / 2; \ + if (i < count) \ + expected += in[i * 3]; \ + if (out[i] != expected) \ + __builtin_abort (); \ + x += 1; \ + } \ + } \ + } + +int +main (void) +{ + TEST (test); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c new file mode 100644 index 00000000000..6229b78b72e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_2_run.c @@ -0,0 +1,5 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned short +#include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c new file mode 100644 index 00000000000..3a29ae16701 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned int +#include "sve_struct_vect_1.c" + +/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c new file mode 100644 index 00000000000..7703dc6c043 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_3_run.c @@ -0,0 +1,5 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned int +#include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c new file mode 100644 index 00000000000..0c526365829 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned long +#include "sve_struct_vect_1.c" + +/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c new file mode 100644 index 00000000000..4ea2cff9dd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_4_run.c @@ -0,0 +1,5 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned long +#include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c new file mode 100644 index 00000000000..efc1c9d2e2c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE float +#include "sve_struct_vect_1.c" + +/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c new file mode 100644 index 00000000000..f0d56e87dcc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_5_run.c @@ -0,0 +1,5 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE float +#include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c new file mode 100644 index 00000000000..ff445c1fbb0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE double +#include "sve_struct_vect_1.c" + +/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4d\t{z[0-9]+.d - z[0-9]+.d}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c new file mode 100644 index 00000000000..b0b685c0789 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_6_run.c @@ -0,0 +1,5 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE double +#include "sve_struct_vect_1_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c new file mode 100644 index 00000000000..9712f89d171 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7.c @@ -0,0 +1,84 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#ifndef TYPE +#define TYPE unsigned char +#define ITYPE signed char +#endif + +void __attribute__ ((noinline, noclone)) +f2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n) +{ + for (ITYPE i = 0; i < n; ++i) + { + a[i] = c[i * 2]; + b[i] = c[i * 2 + 1]; + } +} + +void __attribute__ ((noinline, noclone)) +f3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d, ITYPE n) +{ + for (ITYPE i = 0; i < n; ++i) + { + a[i] = d[i * 3]; + b[i] = d[i * 3 + 1]; + c[i] = d[i * 3 + 2]; + } +} + +void __attribute__ ((noinline, noclone)) +f4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d, TYPE *__restrict e, ITYPE n) +{ + for (ITYPE i = 0; i < n; ++i) + { + a[i] = e[i * 4]; + b[i] = e[i * 4 + 1]; + c[i] = e[i * 4 + 2]; + d[i] = e[i * 4 + 3]; + } +} + +void __attribute__ ((noinline, noclone)) +g2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n) +{ + for (ITYPE i = 0; i < n; ++i) + { + c[i * 2] = a[i]; + c[i * 2 + 1] = b[i]; + } +} + +void __attribute__ ((noinline, noclone)) +g3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d, ITYPE n) +{ + for (ITYPE i = 0; i < n; ++i) + { + d[i * 3] = a[i]; + d[i * 3 + 1] = b[i]; + d[i * 3 + 2] = c[i]; + } +} + +void __attribute__ ((noinline, noclone)) +g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + TYPE *__restrict d, TYPE *__restrict e, ITYPE n) +{ + for (ITYPE i = 0; i < n; ++i) + { + e[i * 4] = a[i]; + e[i * 4 + 1] = b[i]; + e[i * 4 + 2] = c[i]; + e[i * 4 + 3] = d[i]; + } +} + +/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c new file mode 100644 index 00000000000..5cfb7559a5c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_7_run.c @@ -0,0 +1,67 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_struct_vect_7.c" + +#define N 93 + +extern void abort() __attribute__((noreturn)); + +TYPE a[N], b[N], c[N], d[N], e[N * 4]; + +void __attribute__ ((noinline, noclone)) +init_array (TYPE *array, int n, TYPE base, TYPE step) +{ + for (int i = 0; i < n; ++i) + array[i] = base + step * i; +} + +void __attribute__ ((noinline, noclone)) +check_array (TYPE *array, int n, TYPE base, TYPE step) +{ + for (int i = 0; i < n; ++i) + if (array[i] != (TYPE) (base + step * i)) + abort (); +} + +int +main (void) +{ + init_array (e, 2 * N, 11, 5); + f2 (a, b, e, N); + check_array (a, N, 11, 10); + check_array (b, N, 16, 10); + + init_array (e, 3 * N, 7, 6); + f3 (a, b, c, e, N); + check_array (a, N, 7, 18); + check_array (b, N, 13, 18); + check_array (c, N, 19, 18); + + init_array (e, 4 * N, 4, 11); + f4 (a, b, c, d, e, N); + check_array (a, N, 4, 44); + check_array (b, N, 15, 44); + check_array (c, N, 26, 44); + check_array (d, N, 37, 44); + + init_array (a, N, 2, 8); + init_array (b, N, 6, 8); + g2 (a, b, e, N); + check_array (e, 2 * N, 2, 4); + + init_array (a, N, 4, 15); + init_array (b, N, 9, 15); + init_array (c, N, 14, 15); + g3 (a, b, c, e, N); + check_array (e, 3 * N, 4, 5); + + init_array (a, N, 14, 36); + init_array (b, N, 23, 36); + init_array (c, N, 32, 36); + init_array (d, N, 41, 36); + g4 (a, b, c, d, e, N); + check_array (e, 4 * N, 14, 9); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c new file mode 100644 index 00000000000..57cb93de5d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned short +#define ITYPE short +#include "sve_struct_vect_7.c" + +/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c new file mode 100644 index 00000000000..59005a2f05b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_8_run.c @@ -0,0 +1,6 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned short +#define ITYPE short +#include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c new file mode 100644 index 00000000000..d897d556d05 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned int +#define ITYPE int +#include "sve_struct_vect_7.c" + +/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst3w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {\tst4w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+\]\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c new file mode 100644 index 00000000000..ab694b4a971 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_struct_vect_9_run.c @@ -0,0 +1,6 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE unsigned int +#define ITYPE int +#include "sve_struct_vect_7_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_subr_1.c b/gcc/testsuite/gcc.target/aarch64/sve_subr_1.c new file mode 100644 index 00000000000..1d8dc76719d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_subr_1.c @@ -0,0 +1,64 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +#include <stdint.h> + +#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ +void vsubr_arithimm_##NAME##_##TYPE (TYPE *dst, int count) \ +{ \ + for (int i = 0; i < count; ++i) \ + dst[i] = VALUE - dst[i]; \ +} + +#define DO_ARITH_OPS(TYPE) \ + DO_IMMEDIATE_OPS (0, TYPE, 0); \ + DO_IMMEDIATE_OPS (5, TYPE, 5); \ + DO_IMMEDIATE_OPS (255, TYPE, 255); \ + DO_IMMEDIATE_OPS (256, TYPE, 256); \ + DO_IMMEDIATE_OPS (257, TYPE, 257); \ + DO_IMMEDIATE_OPS (65280, TYPE, 65280); \ + DO_IMMEDIATE_OPS (65281, TYPE, 65281); \ + DO_IMMEDIATE_OPS (-1, TYPE, minus1); + +DO_ARITH_OPS (int8_t) +DO_ARITH_OPS (int16_t) +DO_ARITH_OPS (int32_t) +DO_ARITH_OPS (int64_t) + +/* { dg-final { scan-assembler-not {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #257\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #65280\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} } } */ + +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} } } */ + +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} } } */ + +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #256\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #257\n} } } */ +/* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #65280\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #65281\n} } } */ +/* { dg-final { scan-assembler-not {\tsubr\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c new file mode 100644 index 00000000000..0c7b887d232 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_trn1_1.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#ifndef BIAS +#define BIAS 0 +#endif + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define MASK_2(X, Y) X, Y + X +#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) +#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y) +#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y) +#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y) + +#define INDEX_4 v4di +#define INDEX_8 v8si +#define INDEX_16 v16hi +#define INDEX_32 v32qi + +#define PERMUTE(TYPE, NUNITS) \ + TYPE permute_##TYPE (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle \ + (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (BIAS, NUNITS) })); \ + } + +#define TEST_ALL(T) \ + T (v4di, 4) \ + T (v8si, 8) \ + T (v16hi, 16) \ + T (v32qi, 32) \ + T (v4df, 4) \ + T (v8sf, 8) \ + T (v16hf, 16) + +TEST_ALL (PERMUTE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\ttrn1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_trn2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_trn2_1.c new file mode 100644 index 00000000000..6654781bbd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_trn2_1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#define BIAS 1 +#include "sve_trn1_1.c" + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\ttrn2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1.c new file mode 100644 index 00000000000..c415c4bf5d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +unpack_double_int_plus8 (double *d, int32_t *s, int size) +{ + for (int i = 0; i < size; i++) + d[i] = s[i] + 8; +} + +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1_run.c new file mode 100644 index 00000000000..f8d9cc2b2ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_signed_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_unpack_fcvt_signed_1.c" + +#define ARRAY_SIZE 89 + +#define VAL1 ((i * 88654) - (33 * 88654)) + +int __attribute__ ((optimize (1))) +main (void) +{ + double array_dest[ARRAY_SIZE]; + int32_t array_source[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } + + unpack_double_int_plus8 (array_dest, array_source, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest[i] != (double) (VAL1 + 8)) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1.c new file mode 100644 index 00000000000..fb9fe810cf9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +void __attribute__ ((noinline, noclone)) +unpack_double_int_plus9 (double *d, uint32_t *s, int size) +{ + for (int i = 0; i < size; i++) + d[i] = (double) (s[i] + 9); +} + +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1_run.c new file mode 100644 index 00000000000..93788a342ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_fcvt_unsigned_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_unpack_fcvt_unsigned_1.c" + +#define ARRAY_SIZE 153 + +#define VAL1 ((unsigned int) ((i * 345435) - (21 * 345435))) + +int __attribute__ ((optimize (1))) +main (void) +{ + double array_dest[ARRAY_SIZE]; + uint32_t array_source[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } + + unpack_double_int_plus9 (array_dest, array_source, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest[i] != (double) (VAL1 + 9)) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1.c new file mode 100644 index 00000000000..73c7a815e36 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +void __attribute__ ((noinline, noclone)) +unpack_float_plus_7point9 (double *d, float *s, int size) +{ + for (int i = 0; i < size; i++) + d[i] = s[i] + 7.9; +} + +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfcvt\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1_run.c new file mode 100644 index 00000000000..2a645b33d4b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_float_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_unpack_float_1.c" + +#define ARRAY_SIZE 199 + +#define VAL1 ((float) ((i * 645.56665) - (645.56665))) + +int __attribute__ ((optimize (1))) +main (void) +{ + double array_dest[ARRAY_SIZE]; + float array_source[ARRAY_SIZE]; + + for (int i = 0; i < ARRAY_SIZE; i++) + { + array_source[i] = VAL1; + asm volatile ("" ::: "memory"); + } + + unpack_float_plus_7point9 (array_dest, array_source, ARRAY_SIZE); + for (int i = 0; i < ARRAY_SIZE; i++) + if (array_dest[i] != (double) (VAL1 + 7.9)) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c new file mode 100644 index 00000000000..4d345cf81e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define UNPACK(TYPED, TYPES) \ +void __attribute__ ((noinline, noclone)) \ +unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ +{ \ + for (int i = 0; i < size; i++) \ + d[i] = s[i] + 1; \ +} + +#define TEST_ALL(T) \ + T (int64_t, int32_t) \ + T (int32_t, int16_t) \ + T (int16_t, int8_t) \ + T (uint64_t, int32_t) \ + T (uint32_t, int16_t) \ + T (uint16_t, int8_t) + +TEST_ALL (UNPACK) + +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c new file mode 100644 index 00000000000..d183408d124 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_signed_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_unpack_signed_1.c" + +#define ARRAY_SIZE 33 + +#define TEST_LOOP(TYPED, TYPES) \ + { \ + TYPED arrayd[ARRAY_SIZE]; \ + TYPES arrays[ARRAY_SIZE]; \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + { \ + arrays[i] = (i - 10) * 3; \ + asm volatile ("" ::: "memory"); \ + } \ + unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c new file mode 100644 index 00000000000..fa8de963264 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define UNPACK(TYPED, TYPES) \ +void __attribute__ ((noinline, noclone)) \ +unpack_##TYPED##_##TYPES (TYPED *d, TYPES *s, int size) \ +{ \ + for (int i = 0; i < size; i++) \ + d[i] = s[i] + 1; \ +} + +#define TEST_ALL(T) \ + T (int64_t, uint32_t) \ + T (int32_t, uint16_t) \ + T (int16_t, uint8_t) \ + T (uint64_t, uint32_t) \ + T (uint32_t, uint16_t) \ + T (uint16_t, uint8_t) + +TEST_ALL (UNPACK) + +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.h, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c new file mode 100644 index 00000000000..3fa66220f17 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_unpack_unsigned_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_unpack_unsigned_1.c" + +#define ARRAY_SIZE 85 + +#define TEST_LOOP(TYPED, TYPES) \ + { \ + TYPED arrayd[ARRAY_SIZE]; \ + TYPES arrays[ARRAY_SIZE]; \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + { \ + arrays[i] = (i - 10) * 3; \ + asm volatile ("" ::: "memory"); \ + } \ + unpack_##TYPED##_##TYPES (arrayd, arrays, ARRAY_SIZE); \ + for (int i = 0; i < ARRAY_SIZE; i++) \ + if (arrayd[i] != (TYPED) ((TYPES) ((i - 10) * 3) + 1)) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c new file mode 100644 index 00000000000..aaa4fdccbf0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define UZP1(TYPE, MASK) \ +TYPE uzp1_##TYPE (TYPE values1, TYPE values2) \ +{ \ + return __builtin_shuffle (values1, values2, MASK); \ +} + + +UZP1 (v4di, ((v4di) { 0, 2, 4, 6 })); +UZP1 (v8si, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 })); +UZP1 (v16hi, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30 })); +UZP1 (v32qi, ((v32qi) { 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62 })); +UZP1 (v4df, ((v4di) { 0, 2, 4, 6 })); +UZP1 (v8sf, ((v8si) { 0, 2, 4, 6, 8, 10, 12, 14 })); +UZP1 (v16hf, ((v16hi) { 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30 })); + +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} } } */ + +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c new file mode 100644 index 00000000000..d35dad0ffca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp1_1_run.c @@ -0,0 +1,63 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ + +#include "sve_uzp1_1.c" + +#define TEST_UZP1(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ +{ \ + TYPE expected_result = EXPECTED_RESULT; \ + TYPE values1 = VALUES1; \ + TYPE values2 = VALUES2; \ + TYPE dest; \ + dest = uzp1_##TYPE (values1, values2); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ +} + +int main (void) +{ + TEST_UZP1 (v4di, + ((v4di) { 4, 6, 12, 36 }), + ((v4di) { 4, 5, 6, 7 }), + ((v4di) { 12, 24, 36, 48 })); + TEST_UZP1 (v8si, + ((v8si) { 3, 5, 7, 9, 33, 35, 37, 39 }), + ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_UZP1 (v16hi, + ((v16hi) { 3, 5, 7, 9, 11, 13, 15, 17, + 33, 35, 37, 39, 41, 43, 45, 47 }), + ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_UZP1 (v32qi, + ((v32qi) { 4, 6, 4, 6, 4, 6, 4, 6, + 4, 6, 4, 6, 4, 6, 4, 6, + 12, 36, 12, 36, 12, 36, 12, 36, + 12, 36, 12, 36, 12, 36, 12, 36 }), + ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_UZP1 (v4df, + ((v4df) { 4.0, 6.0, 12.0, 36.0 }), + ((v4df) { 4.0, 5.0, 6.0, 7.0 }), + ((v4df) { 12.0, 24.0, 36.0, 48.0 })); + TEST_UZP1 (v8sf, + ((v8sf) { 3.0, 5.0, 7.0, 9.0, 33.0, 35.0, 37.0, 39.0 }), + ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), + ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); + TEST_UZP1 (v16hf, + ((v16hf) { 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, + 33.0, 35.0, 37.0, 39.0, 41.0, 43.0, 45.0, 47.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c new file mode 100644 index 00000000000..1bb84d80eb0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define UZP2(TYPE, MASK) \ +TYPE uzp2_##TYPE (TYPE values1, TYPE values2) \ +{ \ + return __builtin_shuffle (values1, values2, MASK); \ +} + +UZP2 (v4di, ((v4di) { 1, 3, 5, 7 })); +UZP2 (v8si, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 })); +UZP2 (v16hi, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31 })); +UZP2 (v32qi, ((v32qi) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31, + 33, 35, 37, 39, 41, 43, 45, 47, + 49, 51, 53, 55, 57, 59, 61, 63 })); +UZP2 (v4df, ((v4di) { 1, 3, 5, 7 })); +UZP2 (v8sf, ((v8si) { 1, 3, 5, 7, 9, 11, 13, 15 })); +UZP2 (v16hf, ((v16hi) { 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31 })); + +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} } } */ +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler-not {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} } } */ + +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c new file mode 100644 index 00000000000..d7a241c1258 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_uzp2_1_run.c @@ -0,0 +1,63 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ + +#include "sve_uzp2_1.c" + +#define TEST_UZP2(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ +{ \ + TYPE expected_result = EXPECTED_RESULT; \ + TYPE values1 = VALUES1; \ + TYPE values2 = VALUES2; \ + TYPE dest; \ + dest = uzp2_##TYPE (values1, values2); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ +} + +int main (void) +{ + TEST_UZP2 (v4di, + ((v4di) { 5, 7, 24, 48 }), + ((v4di) { 4, 5, 6, 7 }), + ((v4di) { 12, 24, 36, 48 })); + TEST_UZP2 (v8si, + ((v8si) { 4, 6, 8, 10, 34, 36, 38, 40 }), + ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_UZP2 (v16hi, + ((v16hi) { 4, 6, 8, 10, 12, 14, 16, 18, + 34, 36, 38, 40, 42, 44, 46, 48 }), + ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_UZP2 (v32qi, + ((v32qi) { 5, 7, 5, 7, 5, 7, 5, 7, + 5, 7, 5, 7, 5, 7, 5, 7, + 24, 48, 24, 48, 24, 48, 24, 48, + 24, 48, 24, 48, 24, 48, 24, 48 }), + ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_UZP2 (v4df, + ((v4df) { 5.0, 7.0, 24.0, 48.0 }), + ((v4df) { 4.0, 5.0, 6.0, 7.0 }), + ((v4df) { 12.0, 24.0, 36.0, 48.0 })); + TEST_UZP2 (v8sf, + ((v8sf) { 4.0, 6.0, 8.0, 10.0, 34.0, 36.0, 38.0, 40.0 }), + ((v8sf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0 }), + ((v8sf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0 })); + TEST_UZP2 (v16hf, + ((v16hf) { 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, + 34.0, 36.0, 38.0, 40.0, 42.0, 44.0, 46.0, 48.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1.c new file mode 100644 index 00000000000..54342836fb7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * n]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (VF-1)*4 rather than (257-1)*4. */ +/* { dg-final { scan-assembler-not {, 1024} } } */ +/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ +/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1.h b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1.h new file mode 100644 index 00000000000..43b098ff413 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1.h @@ -0,0 +1,61 @@ +extern void abort (void) __attribute__ ((noreturn)); + +#define MARGIN 6 + +void __attribute__ ((weak, optimize ("no-tree-vectorize"))) +test (int n, int m, int offset) +{ + int abs_n = (n < 0 ? -n : n); + int abs_m = (m < 0 ? -m : m); + int max_i = (abs_n > abs_m ? abs_n : abs_m); + int abs_offset = (offset < 0 ? -offset : offset); + int size = MARGIN * 2 + max_i * SIZE + abs_offset; + TYPE *array = (TYPE *) __builtin_alloca (size * sizeof (TYPE)); + for (int i = 0; i < size; ++i) + array[i] = i; + int base_x = offset < 0 ? MARGIN - offset : MARGIN; + int base_y = offset < 0 ? MARGIN : MARGIN + offset; + int start_x = n < 0 ? base_x - n * (SIZE - 1) : base_x; + int start_y = m < 0 ? base_y - m * (SIZE - 1) : base_y; + f (&array[start_x], &array[start_y], n, m); + int j = 0; + int start = (n < 0 ? size - 1 : 0); + int end = (n < 0 ? -1 : size); + int inc = (n < 0 ? -1 : 1); + for (int i = start; i != end; i += inc) + { + if (j == SIZE || i != start_x + j * n) + { + if (array[i] != i) + abort (); + } + else if (n == 0) + { + TYPE sum = i; + for (; j < SIZE; j++) + { + int next_y = start_y + j * m; + if (n >= 0 ? next_y < i : next_y > i) + sum += array[next_y]; + else if (next_y == i) + sum += sum; + else + sum += next_y; + } + if (array[i] != sum) + abort (); + } + else + { + int next_y = start_y + j * m; + TYPE base = i; + if (n >= 0 ? next_y < i : next_y > i) + base += array[next_y]; + else + base += next_y; + if (array[i] != base) + abort (); + j += 1; + } + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1_run.c new file mode 100644 index 00000000000..5f06df428b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_1_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_1.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = 0; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, n, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c new file mode 100644 index 00000000000..958dce4262d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, unsigned short n, unsigned short m) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (257-1)*4 rather than (VF-1)*4. */ +/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x[0-9]+, 10, 16} 2 } } */ +/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */ +/* Two range checks and a check for n being zero. (m being zero is OK.) */ +/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2_run.c new file mode 100644 index 00000000000..78a94c06aa1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_2_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_2.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = 0; n < 10; ++n) + for (int m = 0; m < 10; ++m) + for (int offset = -17; offset <= 17; ++offset) + { + test (n, m, offset); + test (n, m, offset + n * (SIZE - 1)); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_3.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_3.c new file mode 100644 index 00000000000..85479c4cb1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_3.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, int n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * n]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (VF-1)*4 rather than (257-1)*4. */ +/* { dg-final { scan-assembler-not {, 1024} } } */ +/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ +/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_3_run.c new file mode 100644 index 00000000000..517aae73064 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_3_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_3.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, n, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c new file mode 100644 index 00000000000..54d592d8ef1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, int n, int m) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (257-1)*4 rather than (VF-1)*4. */ +/* { dg-final { scan-assembler-times {\tsbfiz\tx[0-9]+, x[0-9]+, 10, 32} 2 } } */ +/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */ +/* { dg-final { scan-assembler {\tcmp\tw3, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */ +/* Two range checks and a check for n being zero. (m being zero is OK.) */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4_run.c new file mode 100644 index 00000000000..e83e6689ab5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_4_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_4.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int m = -10; m < 10; ++m) + for (int offset = -17; offset <= 17; ++offset) + { + test (n, m, offset); + test (n, m, offset + n * (SIZE - 1)); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_5.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_5.c new file mode 100644 index 00000000000..a756ac34d07 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_5.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE double +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, long n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * n]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */ +/* Should multiply by (VF-1)*8 rather than (257-1)*8. */ +/* { dg-final { scan-assembler-not {, 2048} } } */ +/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ +/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_5_run.c new file mode 100644 index 00000000000..dbff8d0d946 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_5_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_5.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, n, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_6.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_6.c new file mode 100644 index 00000000000..d9192da0645 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_6.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE long +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, long n, long m) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tx[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tx[0-9]+} } } */ +/* Should multiply by (257-1)*8 rather than (VF-1)*8. */ +/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 2 } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */ +/* Two range checks and a check for n being zero. (m being zero is OK.) */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_6_run.c new file mode 100644 index 00000000000..3a95a935b9c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_6_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_6.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int m = -10; m < 10; ++m) + for (int offset = -17; offset <= 17; ++offset) + { + test (n, m, offset); + test (n, m, offset + n * (SIZE - 1)); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_7.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_7.c new file mode 100644 index 00000000000..b8692df76ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_7.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE double +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, long n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */ +/* Should multiply by (257-1)*8 rather than (VF-1)*8. */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x1, 2048} 1 } } */ +/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 1 } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_7_run.c new file mode 100644 index 00000000000..40c4e24dc88 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_7_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_7.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, 1, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_8.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_8.c new file mode 100644 index 00000000000..5497ac5546b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_8.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#define TYPE long +#define SIZE 257 + +void +f (TYPE *x, TYPE *y, long n __attribute__((unused)), long m) +{ + for (int i = 0; i < SIZE; ++i) + x[i] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tx[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tx[0-9]+} } } */ +/* Should multiply by (257-1)*8 rather than (VF-1)*8. */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 2048} 1 } } */ +/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 1 } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks only; doesn't matter whether n is zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_var_stride_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_8_run.c new file mode 100644 index 00000000000..ab513533329 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_var_stride_8_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_var_stride_8.c" +#include "sve_var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (1, n, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C new file mode 100644 index 00000000000..9be09546c80 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1.C @@ -0,0 +1,245 @@ +/* { dg-do assemble } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256 --save-temps" } */ + +#include <stdint.h> + +typedef int8_t v32qi __attribute__((vector_size(32))); +typedef int16_t v16hi __attribute__((vector_size(32))); +typedef int32_t v8si __attribute__((vector_size(32))); +typedef int64_t v4di __attribute__((vector_size(32))); + +typedef uint8_t v32qu __attribute__((vector_size(32))); +typedef uint16_t v16hu __attribute__((vector_size(32))); +typedef uint32_t v8su __attribute__((vector_size(32))); +typedef uint64_t v4du __attribute__((vector_size(32))); + +#define DEF_VCOND_VAR(TYPE, COND, SUFFIX) \ +TYPE vcond_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a, TYPE b) \ +{ \ + TYPE r; \ + r = a COND b ? x : y; \ + return r; \ +} + +#define DEF_VCOND_IMM(TYPE, COND, IMM, SUFFIX) \ +TYPE vcond_imm_##TYPE##_##SUFFIX (TYPE x, TYPE y, TYPE a) \ +{ \ + TYPE r; \ + r = a COND IMM ? x : y; \ + return r; \ +} + +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ + T (v32qi, COND, SUFFIX) \ + T (v16hi, COND, SUFFIX) \ + T (v8si, COND, SUFFIX) \ + T (v4di, COND, SUFFIX) + +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ + T (v32qu, COND, SUFFIX) \ + T (v16hu, COND, SUFFIX) \ + T (v8su, COND, SUFFIX) \ + T (v4du, COND, SUFFIX) + +#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ + TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ + TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) + +#define TEST_VAR_ALL(T) \ + TEST_COND_VAR_ALL (T, >, gt) \ + TEST_COND_VAR_ALL (T, <, lt) \ + TEST_COND_VAR_ALL (T, >=, ge) \ + TEST_COND_VAR_ALL (T, <=, le) \ + TEST_COND_VAR_ALL (T, ==, eq) \ + TEST_COND_VAR_ALL (T, !=, ne) + +#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (v32qi, COND, IMM, SUFFIX) \ + T (v16hi, COND, IMM, SUFFIX) \ + T (v8si, COND, IMM, SUFFIX) \ + T (v4di, COND, IMM, SUFFIX) + +#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (v32qu, COND, IMM, SUFFIX) \ + T (v16hu, COND, IMM, SUFFIX) \ + T (v8su, COND, IMM, SUFFIX) \ + T (v4du, COND, IMM, SUFFIX) + +#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX) + +#define TEST_IMM_ALL(T) \ + /* Expect immediates to make it into the encoding. */ \ + TEST_COND_IMM_ALL (T, >, 5, gt) \ + TEST_COND_IMM_ALL (T, <, 5, lt) \ + TEST_COND_IMM_ALL (T, >=, 5, ge) \ + TEST_COND_IMM_ALL (T, <=, 5, le) \ + TEST_COND_IMM_ALL (T, ==, 5, eq) \ + TEST_COND_IMM_ALL (T, !=, 5, ne) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, 15, gt2) \ + TEST_COND_IMM_SIGNED_ALL (T, <, 15, lt2) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, 15, ge2) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, 15, le2) \ + TEST_COND_IMM_SIGNED_ALL (T, ==, 15, eq2) \ + TEST_COND_IMM_SIGNED_ALL (T, !=, 15, ne2) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, -16, gt3) \ + TEST_COND_IMM_SIGNED_ALL (T, <, -16, lt3) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, -16, ge3) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, -16, le3) \ + TEST_COND_IMM_SIGNED_ALL (T, ==, -16, eq3) \ + TEST_COND_IMM_SIGNED_ALL (T, !=, -16, ne3) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, gt4) \ + /* Testing if an unsigned value >= 0 or < 0 is pointless as it will \ + get folded away by the compiler. */ \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, le4) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 31, gt5) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <, 31, lt5) \ + TEST_COND_IMM_UNSIGNED_ALL (T, >=, 31, ge5) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 31, le5) \ + \ + /* Expect immediates to NOT make it into the encoding, and instead be \ + forced into a register. */ \ + TEST_COND_IMM_ALL (T, >, 32, gt6) \ + TEST_COND_IMM_ALL (T, <, 32, lt6) \ + TEST_COND_IMM_ALL (T, >=, 32, ge6) \ + TEST_COND_IMM_ALL (T, <=, 32, le6) \ + TEST_COND_IMM_ALL (T, ==, 32, eq6) \ + TEST_COND_IMM_ALL (T, !=, 32, ne6) + +TEST_VAR_ALL (DEF_VCOND_VAR) +TEST_IMM_ALL (DEF_VCOND_IMM) + +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} } } */ + + + +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} } } */ + +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} } } */ +/* { dg-final { scan-assembler {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} } } */ + + + +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */ + +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} } } */ + + +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ + +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ + +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ + +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #31\n} } } */ +/* { dg-final { scan-assembler {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #31\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_1_run.C b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1_run.C new file mode 100644 index 00000000000..42e09d94393 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_1_run.C @@ -0,0 +1,46 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_vcond_1.C" + +#define NUM_ELEMS(X) (sizeof (X) / sizeof (X[0])) + +#define TEST_VCOND_VAR(TYPE, COND, SUFFIX) \ +{ \ + TYPE x, y, a, b; \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + { \ + a[i] = i - 2; \ + b[i] = NUM_ELEMS (x) - 2 - i; \ + x[i] = i * 2; \ + y[i] = -i * 3; \ + } \ + TYPE r = vcond_##TYPE##_##SUFFIX (x, y, a, b); \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + if (r[i] != (a[i] COND b[i] ? x[i] : y[i])) \ + __builtin_abort (); \ +} + +#define TEST_VCOND_IMM(TYPE, COND, IMM, SUFFIX) \ +{ \ + TYPE x, y, a; \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + { \ + a[i] = IMM - 2 + i; \ + x[i] = i * 2; \ + y[i] = -i * 3; \ + } \ + TYPE r = vcond_imm_##TYPE##_##SUFFIX (x, y, a); \ + for (int i = 0; i < NUM_ELEMS (x); ++i) \ + if (r[i] != (a[i] COND IMM ? x[i] : y[i])) \ + __builtin_abort (); \ +} + + +int main (int argc, char **argv) +{ + TEST_VAR_ALL (TEST_VCOND_VAR) + TEST_IMM_ALL (TEST_VCOND_IMM) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.c new file mode 100644 index 00000000000..0c67f8147c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2.c @@ -0,0 +1,318 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ + void __attribute__ ((noinline, noclone)) \ + vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ + DATA_TYPE *__restrict__ x, \ + DATA_TYPE *__restrict__ y, \ + CMP_TYPE *__restrict__ a, \ + CMP_TYPE *__restrict__ b, \ + int n) \ + { \ + for (int i = 0; i < n; i++) \ + { \ + DATA_TYPE xval = x[i], yval = y[i]; \ + CMP_TYPE aval = a[i], bval = b[i]; \ + r[i] = aval COND bval ? xval : yval; \ + } \ + } + +#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \ + void __attribute__ ((noinline, noclone)) \ + vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \ + DATA_TYPE *__restrict__ x, \ + DATA_TYPE *__restrict__ y, \ + CMP_TYPE *__restrict__ a, \ + int n) \ + { \ + for (int i = 0; i < n; i++) \ + { \ + DATA_TYPE xval = x[i], yval = y[i]; \ + CMP_TYPE aval = a[i]; \ + r[i] = aval COND (CMP_TYPE) IMM ? xval : yval; \ + } \ + } + +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \ + T (int8_t, int8_t, COND, SUFFIX) \ + T (int16_t, int16_t, COND, SUFFIX) \ + T (int32_t, int32_t, COND, SUFFIX) \ + T (int64_t, int64_t, COND, SUFFIX) \ + T (_Float16, int16_t, COND, SUFFIX##_float16) \ + T (float, int32_t, COND, SUFFIX##_float) \ + T (double, int64_t, COND, SUFFIX##_double) + +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \ + T (uint8_t, uint8_t, COND, SUFFIX) \ + T (uint16_t, uint16_t, COND, SUFFIX) \ + T (uint32_t, uint32_t, COND, SUFFIX) \ + T (uint64_t, uint64_t, COND, SUFFIX) \ + T (_Float16, uint16_t, COND, SUFFIX##_float16) \ + T (float, uint32_t, COND, SUFFIX##_float) \ + T (double, uint64_t, COND, SUFFIX##_double) + +#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \ + TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \ + TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX) + +#define TEST_VAR_ALL(T) \ + TEST_COND_VAR_ALL (T, >, _gt) \ + TEST_COND_VAR_ALL (T, <, _lt) \ + TEST_COND_VAR_ALL (T, >=, _ge) \ + TEST_COND_VAR_ALL (T, <=, _le) \ + TEST_COND_VAR_ALL (T, ==, _eq) \ + TEST_COND_VAR_ALL (T, !=, _ne) + +#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (int8_t, int8_t, COND, IMM, SUFFIX) \ + T (int16_t, int16_t, COND, IMM, SUFFIX) \ + T (int32_t, int32_t, COND, IMM, SUFFIX) \ + T (int64_t, int64_t, COND, IMM, SUFFIX) \ + T (_Float16, int16_t, COND, IMM, SUFFIX##_float16) \ + T (float, int32_t, COND, IMM, SUFFIX##_float) \ + T (double, int64_t, COND, IMM, SUFFIX##_double) + +#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX) \ + T (uint8_t, uint8_t, COND, IMM, SUFFIX) \ + T (uint16_t, uint16_t, COND, IMM, SUFFIX) \ + T (uint32_t, uint32_t, COND, IMM, SUFFIX) \ + T (uint64_t, uint64_t, COND, IMM, SUFFIX) \ + T (_Float16, uint16_t, COND, IMM, SUFFIX##_float16) \ + T (float, uint32_t, COND, IMM, SUFFIX##_float) \ + T (double, uint64_t, COND, IMM, SUFFIX##_double) + +#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX) \ + TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX) + +#define TEST_IMM_ALL(T) \ + /* Expect immediates to make it into the encoding. */ \ + TEST_COND_IMM_ALL (T, >, 5, _gt) \ + TEST_COND_IMM_ALL (T, <, 5, _lt) \ + TEST_COND_IMM_ALL (T, >=, 5, _ge) \ + TEST_COND_IMM_ALL (T, <=, 5, _le) \ + TEST_COND_IMM_ALL (T, ==, 5, _eq) \ + TEST_COND_IMM_ALL (T, !=, 5, _ne) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2) \ + TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2) \ + TEST_COND_IMM_ALL (T, ==, 15, _eq2) \ + TEST_COND_IMM_ALL (T, !=, 15, _ne2) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3) \ + TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3) \ + TEST_COND_IMM_ALL (T, ==, 16, _eq3) \ + TEST_COND_IMM_ALL (T, !=, 16, _ne3) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4) \ + TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4) \ + TEST_COND_IMM_ALL (T, ==, -16, _eq4) \ + TEST_COND_IMM_ALL (T, !=, -16, _ne4) \ + \ + TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5) \ + TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5) \ + TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5) \ + TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5) \ + TEST_COND_IMM_ALL (T, ==, -17, _eq5) \ + TEST_COND_IMM_ALL (T, !=, -17, _ne5) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6) \ + /* Testing if an unsigned value >= 0 or < 0 is pointless as it will \ + get folded away by the compiler. */ \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6) \ + \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7) \ + TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7) \ + \ + /* Expect immediates to NOT make it into the encoding, and instead be \ + forced into a register. */ \ + TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8) \ + TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8) \ + TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8) + +TEST_VAR_ALL (DEF_VCOND_VAR) +TEST_IMM_ALL (DEF_VCOND_IMM) + +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-7], z[0-9]+\.b, z[0-9]+\.b\n} 66 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 132 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 132 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 132 } } */ + +/* There are two signed ordered register comparisons for .b, one for a + variable comparison and one for one of the two out-of-range constant + comparisons. The other out-of-ranger constant comparison can be + adjusted to an in-range value by inverting the handling of equality. + + The same pattern appears twice for .h, .s and .d, once for integer data + and once for floating-point data. */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ + +/* Out-of-range >= is converted to in-range >. */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphs\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* Out-of-range < is converted to in-range <=. */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplo\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + +/* 6 for .b: {signed, unsigned\n} x {variable, too high, too low}. */ +/* 12 for .h,.s and .d: the above 6 repeated for integer and floating-point + data. */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ + +/* Also used for >= 16. */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* gcc converts "a < 15" into "a <= 14". */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */ + +/* gcc converts "a >= 15" into "a > 14". */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #14\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #14\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpgt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #14\n} 2 } } */ + +/* Also used for < 16. */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* Appears once for each signedness. */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #15\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* gcc converts "a > -16" into "a >= -15". */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */ + +/* Also used for <= -17. */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* Also used for > -17. */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-16\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-16\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpge\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-16\n} 4 } } */ + +/* gcc converts "a <= -16" into "a < -15". */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #-15\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #-15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmplt\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #-15\n} 2 } } */ + +/* gcc converts "a > 0" into "a != 0". */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpne\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */ + +/* gcc converts "a <= 0" into "a == 0". */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #0\n} 2 } } */ + +/* Also used for >= 128. */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */ + +/* gcc converts "a < 127" into "a <= 126". */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */ + +/* gcc converts "a >= 127" into "a > 126". */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #126\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #126\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tcmphi\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #126\n} 2 } } */ + +/* Also used for < 128. */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #127\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #127\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #127\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.c new file mode 100644 index 00000000000..4cdb5bb9e43 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_2_run.c @@ -0,0 +1,49 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vcond_2.c" + +#define N 97 + +#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \ +{ \ + DATA_TYPE x[N], y[N], r[N]; \ + CMP_TYPE a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + x[i] = i; \ + y[i] = (i & 1) + 5; \ + a[i] = i - N / 3; \ + b[i] = N - N / 3 - i; \ + asm volatile ("" ::: "memory"); \ + } \ + vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (a[i] COND b[i] ? x[i] : y[i])) \ + __builtin_abort (); \ +} + +#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \ +{ \ + DATA_TYPE x[N], y[N], r[N]; \ + CMP_TYPE a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + x[i] = i; \ + y[i] = (i & 1) + 5; \ + a[i] = IMM - N / 3 + i; \ + asm volatile ("" ::: "memory"); \ + } \ + vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i])) \ + __builtin_abort (); \ +} + +int __attribute__ ((optimize (1))) +main (int argc, char **argv) +{ + TEST_VAR_ALL (TEST_VCOND_VAR) + TEST_IMM_ALL (TEST_VCOND_IMM) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_3.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_3.c new file mode 100644 index 00000000000..9750bd07fda --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_3.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_SEL_IMM(TYPE, SUFFIX, IMM) \ +void \ +sel_##TYPE##_##SUFFIX (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + a[i] = b[i] != 0 ? IMM : 0; \ +} + +#define DEF_SEL_VAR(TYPE) \ +void \ +sel_##TYPE##_var (TYPE *restrict a, TYPE *restrict b, TYPE val, int n) \ +{ \ + for (int i = 0; i < n; i++) \ + a[i] = b[i] != 0 ? val : 0; \ +} + +#define TEST_TYPE8(TYPE) \ + DEF_SEL_VAR (TYPE) \ + DEF_SEL_IMM (TYPE, m128, -128) \ + DEF_SEL_IMM (TYPE, m127, -127) \ + DEF_SEL_IMM (TYPE, 2, 2) \ + DEF_SEL_IMM (TYPE, 127, 127) + +#define TEST_TYPE16(TYPE) \ + TEST_TYPE8 (TYPE) \ + DEF_SEL_IMM (TYPE, m32768, -32768) \ + DEF_SEL_IMM (TYPE, m32767, -32767) \ + DEF_SEL_IMM (TYPE, m32512, -32512) \ + DEF_SEL_IMM (TYPE, m32511, -32511) \ + DEF_SEL_IMM (TYPE, m256, -256) \ + DEF_SEL_IMM (TYPE, m255, -255) \ + DEF_SEL_IMM (TYPE, m129, -129) \ + DEF_SEL_IMM (TYPE, 128, 128) \ + DEF_SEL_IMM (TYPE, 256, 256) \ + DEF_SEL_IMM (TYPE, 32511, 32511) \ + DEF_SEL_IMM (TYPE, 32512, 32512) \ + DEF_SEL_IMM (TYPE, 32767, 32767) + +#define TEST_TYPE32(TYPE) \ + TEST_TYPE16 (TYPE) \ + DEF_SEL_IMM (TYPE, m65536, -65536) \ + DEF_SEL_IMM (TYPE, m32769, -32769) \ + DEF_SEL_IMM (TYPE, 32768, 32768) + +TEST_TYPE8 (int8_t) +TEST_TYPE16 (int16_t) +TEST_TYPE32 (int32_t) +TEST_TYPE32 (int64_t) + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #-128\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #-127\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, p[0-7]/z, #127\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-32768\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-32512\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-256\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-128\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #-127\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #2\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #127\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #256\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.[hsd], p[0-7]/z, #32512\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_4.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_4.c new file mode 100644 index 00000000000..a46800ce9a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_4.c @@ -0,0 +1,139 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define eq(A, B) ((A) == (B)) +#define ne(A, B) ((A) != (B)) +#define olt(A, B) ((A) < (B)) +#define ole(A, B) ((A) <= (B)) +#define oge(A, B) ((A) >= (B)) +#define ogt(A, B) ((A) > (B)) +#define ordered(A, B) (!__builtin_isunordered (A, B)) +#define unordered(A, B) (__builtin_isunordered (A, B)) +#define ueq(A, B) (!__builtin_islessgreater (A, B)) +#define ult(A, B) (__builtin_isless (A, B)) +#define ule(A, B) (__builtin_islessequal (A, B)) +#define uge(A, B) (__builtin_isgreaterequal (A, B)) +#define ugt(A, B) (__builtin_isgreater (A, B)) +#define nueq(A, B) (__builtin_islessgreater (A, B)) +#define nult(A, B) (!__builtin_isless (A, B)) +#define nule(A, B) (!__builtin_islessequal (A, B)) +#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) +#define nugt(A, B) (!__builtin_isgreater (A, B)) + +#define TEST_LOOP(TYPE1, TYPE2, CMP) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \ + TYPE1 *restrict src, \ + TYPE1 fallback, \ + TYPE2 *restrict a, \ + TYPE2 *restrict b, \ + int count) \ + { \ + for (int i = 0; i < count; ++i) \ + dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \ + } \ + \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE1##_##TYPE2##_##CMP##_zero (TYPE1 *restrict dest, \ + TYPE1 *restrict src, \ + TYPE1 fallback, \ + TYPE2 *restrict a, \ + int count) \ + { \ + for (int i = 0; i < count; ++i) \ + dest[i] = CMP (a[i], 0) ? src[i] : fallback; \ + } \ + \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE1##_##TYPE2##_##CMP##_sel (TYPE1 *restrict dest, \ + TYPE1 if_true, \ + TYPE1 if_false, \ + TYPE2 *restrict a, \ + TYPE2 b, int count) \ + { \ + for (int i = 0; i < count; ++i) \ + dest[i] = CMP (a[i], b) ? if_true : if_false; \ + } + +#define TEST_CMP(CMP) \ + TEST_LOOP (int32_t, float, CMP) \ + TEST_LOOP (uint32_t, float, CMP) \ + TEST_LOOP (int64_t, float, CMP) \ + TEST_LOOP (uint64_t, float, CMP) \ + TEST_LOOP (float, float, CMP) \ + TEST_LOOP (int32_t, double, CMP) \ + TEST_LOOP (uint32_t, double, CMP) \ + TEST_LOOP (int64_t, double, CMP) \ + TEST_LOOP (uint64_t, double, CMP) \ + TEST_LOOP (double, double, CMP) + +TEST_CMP (eq) +TEST_CMP (ne) +TEST_CMP (olt) +TEST_CMP (ole) +TEST_CMP (oge) +TEST_CMP (ogt) +TEST_CMP (ordered) +TEST_CMP (unordered) +TEST_CMP (ueq) +TEST_CMP (ult) +TEST_CMP (ule) +TEST_CMP (uge) +TEST_CMP (ugt) +TEST_CMP (nueq) +TEST_CMP (nult) +TEST_CMP (nule) +TEST_CMP (nuge) +TEST_CMP (nugt) + +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 5 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 10 { xfail *-*-* } } } */ + +/* 5 for ne, 5 for ueq and 5 for nueq. */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */ + +/* 5 for lt, 5 for ult and 5 for nult. */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */ + +/* 5 for le, 5 for ule and 5 for nule. */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */ + +/* 5 for gt, 5 for ugt and 5 for nugt. */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */ + +/* 5 for ge, 5 for uge and 5 for nuge. */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */ + +/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */ +/* 3 loops * 5 invocations for all 12 unordered comparisons. */ +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */ + +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */ + +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */ + +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */ + +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */ + +/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */ +/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2, + for all 12 unordered comparisons. */ +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_4_run.c new file mode 100644 index 00000000000..e8d06bb9f17 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_4_run.c @@ -0,0 +1,88 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ +/* { dg-require-effective-target fenv_exceptions } */ + +#ifndef TEST_EXCEPTIONS +#define TEST_EXCEPTIONS 1 +#endif + +#include <fenv.h> + +#include "sve_vcond_4.c" + +#define N 401 + +#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID) \ + { \ + TYPE1 dest1[N], dest2[N], dest3[N], src[N]; \ + TYPE2 a[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + src[i] = i * i; \ + if (i % 5 == 0) \ + a[i] = 0; \ + else if (i % 3) \ + a[i] = i * 0.1; \ + else \ + a[i] = i; \ + if (i % 7 == 0) \ + b[i] = __builtin_nan (""); \ + else if (i % 6) \ + b[i] = i * 0.1; \ + else \ + b[i] = i; \ + asm volatile ("" ::: "memory"); \ + } \ + feclearexcept (FE_ALL_EXCEPT); \ + test_##TYPE1##_##TYPE2##_##CMP##_var (dest1, src, 11, a, b, N); \ + test_##TYPE1##_##TYPE2##_##CMP##_zero (dest2, src, 22, a, N); \ + test_##TYPE1##_##TYPE2##_##CMP##_sel (dest3, 33, 44, a, 9, N); \ + if (TEST_EXCEPTIONS \ + && !fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \ + __builtin_abort (); \ + for (int i = 0; i < N; ++i) \ + { \ + if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \ + __builtin_abort (); \ + if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22)) \ + __builtin_abort (); \ + if (dest3[i] != (CMP (a[i], 9) ? 33 : 44)) \ + __builtin_abort (); \ + } \ + } + +#define RUN_CMP(CMP, EXPECT_INVALID) \ + RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \ + RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \ + RUN_LOOP (int64_t, float, CMP, EXPECT_INVALID) \ + RUN_LOOP (uint64_t, float, CMP, EXPECT_INVALID) \ + RUN_LOOP (float, float, CMP, EXPECT_INVALID) \ + RUN_LOOP (int32_t, double, CMP, EXPECT_INVALID) \ + RUN_LOOP (uint32_t, double, CMP, EXPECT_INVALID) \ + RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \ + RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \ + RUN_LOOP (double, double, CMP, EXPECT_INVALID) + +int __attribute__ ((optimize (1))) +main (void) +{ + RUN_CMP (eq, 0) + RUN_CMP (ne, 0) + RUN_CMP (olt, 1) + RUN_CMP (ole, 1) + RUN_CMP (oge, 1) + RUN_CMP (ogt, 1) + RUN_CMP (ordered, 0) + RUN_CMP (unordered, 0) + RUN_CMP (ueq, 0) + RUN_CMP (ult, 0) + RUN_CMP (ule, 0) + RUN_CMP (uge, 0) + RUN_CMP (ugt, 0) + RUN_CMP (nueq, 0) + RUN_CMP (nult, 0) + RUN_CMP (nule, 0) + RUN_CMP (nuge, 0) + RUN_CMP (nugt, 0) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_5.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_5.c new file mode 100644 index 00000000000..1b978bdac99 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_5.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -fno-trapping-math" } */ + +/* The difference here is that nueq can use LTGT. */ + +#include "sve_vcond_4.c" + +/* 5 for eqand 5 for ueq. */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 10 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 20 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 5 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 10 { xfail *-*-* } } } */ + +/* 5 for lt, 5 for ult, 5 for nueq and 5 for nult. */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */ + +/* 5 for le, 5 for ule and 5 for nule. */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */ + +/* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt. */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */ + +/* 5 for ge, 5 for uge and 5 for nuge. */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */ + +/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */ +/* 3 loops * 5 invocations for ordered, unordered amd ueq. */ +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 45 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 14 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 28 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 7 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 14 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */ +/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */ + +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */ + +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */ +/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */ + +/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */ +/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2, + for ordered, unordered and ueq. */ +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 63 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_5_run.c new file mode 100644 index 00000000000..efb92be1947 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_5_run.c @@ -0,0 +1,6 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -fno-trapping-math" } */ +/* { dg-require-effective-target fenv_exceptions } */ + +#define TEST_EXCEPTIONS 0 +#include "sve_vcond_4_run.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_6.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6.c new file mode 100644 index 00000000000..74336050d8d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define b_and(A, B) ((A) & (B)) +#define b_orr(A, B) ((A) | (B)) +#define b_eor(A, B) ((A) ^ (B)) +#define b_nand(A, B) (!((A) & (B))) +#define b_nor(A, B) (!((A) | (B))) +#define b_bic(A, B) ((A) & !(B)) +#define b_orn(A, B) ((A) | !(B)) + +#define LOOP(TYPE, BINOP) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE##_##BINOP (TYPE *restrict dest, TYPE *restrict src, \ + TYPE *restrict a, TYPE *restrict b, \ + TYPE *restrict c, TYPE *restrict d, \ + TYPE fallback, int count) \ + { \ + for (int i = 0; i < count; ++i) \ + dest[i] = (BINOP (__builtin_isunordered (a[i], b[i]), \ + __builtin_isunordered (c[i], d[i])) \ + ? src[i] : fallback); \ + } + +#define TEST_BINOP(T, BINOP) \ + T (_Float16, BINOP) \ + T (float, BINOP) \ + T (double, BINOP) + +#define TEST_ALL(T) \ + TEST_BINOP (T, b_and) \ + TEST_BINOP (T, b_orr) \ + TEST_BINOP (T, b_eor) \ + TEST_BINOP (T, b_nand) \ + TEST_BINOP (T, b_nor) \ + TEST_BINOP (T, b_bic) \ + TEST_BINOP (T, b_orn) + +TEST_ALL (LOOP) + +/* Currently we don't manage to remove ANDs from the other loops. */ +/* { dg-final { scan-assembler-times {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler {\tand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} } } */ +/* { dg-final { scan-assembler-times {\torr\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\teor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\tnand\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ +/* { dg-final { scan-assembler-times {\torn\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6_run.c new file mode 100644 index 00000000000..edad9b8272d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_6_run.c @@ -0,0 +1,35 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vcond_6.c" + +#define N 401 + +#define RUN_LOOP(TYPE, BINOP) \ + { \ + TYPE dest[N], src[N], a[N], b[N], c[N], d[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + src[i] = i * i; \ + a[i] = i % 5 < 3 ? __builtin_nan("") : i; \ + b[i] = i % 7 < 4 ? __builtin_nan("") : i; \ + c[i] = i % 9 < 5 ? __builtin_nan("") : i; \ + d[i] = i % 11 < 6 ? __builtin_nan("") : i; \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE##_##BINOP (dest, src, a, b, c, d, 100, N); \ + for (int i = 0; i < N; ++i) \ + { \ + int res = BINOP (__builtin_isunordered (a[i], b[i]), \ + __builtin_isunordered (c[i], d[i])); \ + if (dest[i] != (res ? src[i] : 100.0)) \ + __builtin_abort (); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (RUN_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_7.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_7.c new file mode 100644 index 00000000000..287b67e83cc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_7.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define TEST(TYPE) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE (TYPE *a, TYPE a1, TYPE a2, TYPE a3, TYPE a4, int n) \ + { \ + for (int i = 0; i < n; i += 2) \ + { \ + a[i] = a[i] >= 1 && a[i] != 3 ? a1 : a2; \ + a[i + 1] = a[i + 1] >= 1 && a[i + 1] != 3 ? a3 : a4; \ + } \ + } + +#define TEST_ALL \ + TEST (int8_t) \ + TEST (uint8_t) \ + TEST (int16_t) \ + TEST (uint16_t) \ + TEST (int32_t) \ + TEST (uint32_t) \ + TEST (int64_t) \ + TEST (uint64_t) \ + TEST (float) \ + TEST (double) + +TEST_ALL + +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]} 10 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_7_run.c new file mode 100644 index 00000000000..f5e4e6ae773 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_7_run.c @@ -0,0 +1,25 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vcond_7.c" + +#define N 133 + +#undef TEST +#define TEST(TYPE) \ + { \ + TYPE a[N]; \ + for (int i = 0; i < N; ++i) \ + a[i] = i % 7; \ + test_##TYPE (a, 10, 11, 12, 13, N); \ + for (int i = 0; i < N; ++i) \ + if (a[i] != 10 + (i & 1) * 2 + (i % 7 == 0 || i % 7 == 3)) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_8.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_8.c new file mode 100644 index 00000000000..07793970ef6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_8.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define TEST(TYPE) \ + void __attribute__ ((noinline, noclone)) \ + test_##TYPE (int *restrict a, TYPE *restrict b, int a1, int a2, \ + int a3, int a4, int n) \ + { \ + for (int i = 0; i < n; i += 2) \ + { \ + a[i] = a[i] >= 1 & b[i] != 3 ? a1 : a2; \ + a[i + 1] = a[i + 1] >= 1 & b[i + 1] != 3 ? a3 : a4; \ + } \ + } + +#define TEST_ALL \ + TEST (int8_t) \ + TEST (uint8_t) \ + TEST (int16_t) \ + TEST (uint16_t) \ + TEST (int64_t) \ + TEST (uint64_t) \ + TEST (double) + +TEST_ALL + +/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\t} 15 } } */ +/* { dg-final { scan-assembler-times {\tld1d\t} 6 } } */ +/* { dg-final { scan-assembler-times {\tsel\tz[0-9]} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vcond_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vcond_8_run.c new file mode 100644 index 00000000000..66b0258419c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vcond_8_run.c @@ -0,0 +1,29 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vcond_8.c" + +#define N 133 + +#undef TEST +#define TEST(TYPE) \ + { \ + int a[N]; \ + TYPE b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i % 5; \ + b[i] = i % 7; \ + } \ + test_##TYPE (a, b, 10, 11, 12, 13, N); \ + for (int i = 0; i < N; ++i) \ + if (a[i] != 10 + (i & 1) * 2 + (i % 5 == 0 || i % 7 == 3)) \ + __builtin_abort (); \ + } + +int +main (void) +{ + TEST_ALL + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c new file mode 100644 index 00000000000..3b7c3e75775 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c @@ -0,0 +1,57 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <stdint.h> +#include <stdbool.h> + +#define VEC_BOOL_CMPNE(VARTYPE, INDUCTYPE) \ +void \ +vec_bool_cmpne##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \ + INDUCTYPE start, INDUCTYPE n, \ + INDUCTYPE mask) \ +{ \ + INDUCTYPE i; \ + for (i = 0; i < n; i++) \ + { \ + bool lhs = i >= start; \ + bool rhs = (i & mask) != 0x3D; \ + if (lhs != rhs) \ + dst[i] = src[i]; \ + } \ +} + +#define VEC_BOOL_CMPEQ(VARTYPE, INDUCTYPE) \ +void \ +vec_bool_cmpeq##VARTYPE##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \ + INDUCTYPE start, INDUCTYPE n, \ + INDUCTYPE mask) \ +{ \ + INDUCTYPE i; \ + for (i = 0; i < n; i++) \ + { \ + bool lhs = i >= start; \ + bool rhs = (i & mask) != 0x3D; \ + if (lhs == rhs) \ + dst[i] = src[i]; \ + } \ +} + +VEC_BOOL_CMPNE (uint8_t, uint8_t) +VEC_BOOL_CMPNE (uint16_t, uint16_t) +VEC_BOOL_CMPNE (uint32_t, uint32_t) +VEC_BOOL_CMPNE (uint64_t, uint64_t) +VEC_BOOL_CMPNE (float, uint32_t) +VEC_BOOL_CMPNE (double, uint64_t) + +VEC_BOOL_CMPEQ (uint8_t, uint8_t) +VEC_BOOL_CMPEQ (uint16_t, uint16_t) +VEC_BOOL_CMPEQ (uint32_t, uint32_t) +VEC_BOOL_CMPEQ (uint64_t, uint64_t) +VEC_BOOL_CMPEQ (float, uint32_t) +VEC_BOOL_CMPEQ (double, uint64_t) + +/* Both CMPNE and CMPEQ loops will contain an exclusive predicate or. */ +/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */ +/* CMPEQ will also contain a masked predicate not operation, which gets + folded to BIC. */ +/* { dg-final { scan-assembler-times {\tbic\tp[0-9]+\.b, p[0-7]/z, p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c new file mode 100644 index 00000000000..8c341c0e932 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c @@ -0,0 +1,72 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */ + +#include "sve_vec_bool_cmp_1.c" + +extern void abort (void); + +#define N 103 + +#define TEST_VEC_BOOL_CMPNE(VARTYPE,INDUCTYPE) \ +{ \ + INDUCTYPE i; \ + VARTYPE src[N]; \ + VARTYPE dst[N]; \ + for (i = 0; i < N; i++) \ + { \ + src[i] = i; \ + dst[i] = i * 2; \ + } \ + vec_bool_cmpne##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \ + for (i = 0; i < 13; i++) \ + if (dst[i] != i) \ + abort (); \ + for (i = 13; i < N; i++) \ + if (i != 0x3D && dst[i] != (i * 2)) \ + abort (); \ + else if (i == 0x3D && dst[i] != 0x3D) \ + abort (); \ +} + +#define TEST_VEC_BOOL_CMPEQ(VARTYPE,INDUCTYPE) \ +{ \ + INDUCTYPE i; \ + VARTYPE src[N]; \ + VARTYPE dst[N]; \ + for (i = 0; i < N; i++) \ + { \ + src[i] = i; \ + dst[i] = i * 2; \ + } \ + vec_bool_cmpeq##VARTYPE##INDUCTYPE (dst, src, 13, 97, 0xFF); \ + for (i = 0; i < 13; i++) \ + if (dst[i] != (i * 2)) \ + abort (); \ + for (i = 13; i < 97; i++) \ + if (i != 0x3D && dst[i] != i) \ + abort (); \ + else if (i == 0x3D && dst[i] != (0x3D) * 2) \ + abort (); \ + for (i = 97; i < N; i++) \ + if (dst[i] != (i * 2)) \ + abort (); \ +} + +int main () +{ + TEST_VEC_BOOL_CMPNE (uint8_t, uint8_t); + TEST_VEC_BOOL_CMPNE (uint16_t, uint16_t); + TEST_VEC_BOOL_CMPNE (uint32_t, uint32_t); + TEST_VEC_BOOL_CMPNE (uint64_t, uint64_t); + TEST_VEC_BOOL_CMPNE (float, uint32_t); + TEST_VEC_BOOL_CMPNE (double, uint64_t); + + TEST_VEC_BOOL_CMPEQ (uint8_t, uint8_t); + TEST_VEC_BOOL_CMPEQ (uint16_t, uint16_t); + TEST_VEC_BOOL_CMPEQ (uint32_t, uint32_t); + TEST_VEC_BOOL_CMPEQ (uint64_t, uint64_t); + TEST_VEC_BOOL_CMPEQ (float, uint32_t); + TEST_VEC_BOOL_CMPEQ (double, uint64_t); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1.c new file mode 100644 index 00000000000..95f19f7f786 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define NUM_ELEMS(TYPE) (128 / sizeof (TYPE)) + +#define DUP_FN(TYPE) \ +void __attribute__ ((noinline, noclone)) \ +dup_##TYPE (TYPE *r, TYPE v) \ +{ \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + r[i] = v; \ +} + +DUP_FN (int8_t) +DUP_FN (int16_t) +DUP_FN (int32_t) +DUP_FN (int64_t) +DUP_FN (_Float16) +DUP_FN (float) +DUP_FN (double) + +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.b, w[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, w[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, x[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, h[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, s[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, d[0-9]+\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1_run.c new file mode 100644 index 00000000000..ba7eb44be70 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_1_run.c @@ -0,0 +1,28 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vec_init_1.c" + +#define TEST_INIT_VECTOR(TYPE, VAL) \ + { \ + TYPE r[NUM_ELEMS (TYPE)]; \ + dup_##TYPE (r, VAL); \ + for (int i = 0; i < NUM_ELEMS (TYPE); i++) \ + if (r[i] != VAL) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_INIT_VECTOR (int8_t, 0x2a); + TEST_INIT_VECTOR (int16_t, 0x3976); + TEST_INIT_VECTOR (int32_t, 0x31232976); + TEST_INIT_VECTOR (int64_t, 0x9489363731232976LL); + + TEST_INIT_VECTOR (_Float16, -0x1.fp10); + TEST_INIT_VECTOR (float, -0x1.fe02p10); + TEST_INIT_VECTOR (double, 0x1.fe02eeeee1p10); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c new file mode 100644 index 00000000000..3d5b584e9e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_init_2.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=256" } */ + +typedef unsigned int v8si __attribute__ ((vector_size(32))); + +void +f (v8si *ptr, int x) +{ + *ptr += (v8si) { x, x, 1, 2, 3, x, x, 4 }; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c new file mode 100644 index 00000000000..ae8542f2c75 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define VEC_PERM(TYPE, MASKTYPE) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_perm_##TYPE (TYPE values1, TYPE values2, MASKTYPE mask) \ +{ \ + return __builtin_shuffle (values1, values2, mask); \ +} + +VEC_PERM (v4di, v4di); +VEC_PERM (v8si, v8si); +VEC_PERM (v16hi, v16hi); +VEC_PERM (v32qi, v32qi); +VEC_PERM (v4df, v4di); +VEC_PERM (v8sf, v8si); +VEC_PERM (v16hf, v16hi); + +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c new file mode 100644 index 00000000000..6ab82250d4c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_overrange_run.c @@ -0,0 +1,111 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_vec_perm_1.c" + +#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, \ + VALUES1, VALUES2, MASK) \ +{ \ + TYPE expected_result = EXPECTED_RESULT; \ + TYPE values1 = VALUES1; \ + TYPE values2 = VALUES2; \ + MASK_TYPE mask = MASK; \ + TYPE dest; \ + dest = vec_perm_##TYPE (values1, values2, mask); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ +} + +int main (void) +{ + TEST_VEC_PERM (v4di, v4di, + ((v4di) { 5, 36, 7, 48 }), + ((v4di) { 4, 5, 6, 7 }), + ((v4di) { 12, 24, 36, 48 }), + ((v4di) { 1 + (8 * 1), 6 + (8 * 3), + 3 + (8 * 1), 7 + (8 * 5) })); + TEST_VEC_PERM (v8si, v8si, + ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }), + ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }), + ((v8si) { 9 + (16 * 2), 13 + (16 * 5), + 15 + (16 * 1), 7 + (16 * 0), + 6 + (16 * 8), 5 + (16 * 2), + 4 + (16 * 3), 10 + (16 * 2) })); + TEST_VEC_PERM (v16hi, v16hi, + ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34, + 7, 48, 3, 35, 9, 8, 7, 13 }), + ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 }), + ((v16hi) { 9 + (32 * 2), 13 + (32 * 2), + 15 + (32 * 8), 7 + (32 * 9), + 25 + (32 * 4), 26 + (32 * 3), + 27 + (32 * 1), 17 + (32 * 2), + 4 + (32 * 6), 31 + (32 * 7), + 0 + (32 * 8), 18 + (32 * 9), + 6 + (32 * 6), 5 + (32 * 7), + 4 + (32 * 2), 10 + (32 * 2) })); + TEST_VEC_PERM (v32qi, v32qi, + ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24, + 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24 }), + ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 }), + ((v32qi) { 5 + (64 * 3), 6 + (64 * 1), + 7 + (64 * 2), 8 + (64 * 1), + 9 + (64 * 3), 10 + (64 * 1), + 28 + (64 * 3), 29 + (64 * 3), + 30 + (64 * 1), 31 + (64 * 1), + 32 + (64 * 3), 33 + (64 * 2), + 54 + (64 * 2), 55 + (64 * 2), + 56 + (64 * 1), 61 + (64 * 2), + 5 + (64 * 2), 6 + (64 * 1), + 7 + (64 * 2), 8 + (64 * 2), + 9 + (64 * 2), 10 + (64 * 1), + 28 + (64 * 3), 29 + (64 * 1), + 30 + (64 * 3), 31 + (64 * 3), + 32 + (64 * 1), 33 + (64 * 1), + 54 + (64 * 2), 55 + (64 * 2), + 56 + (64 * 2), 61 + (64 * 2) })); + TEST_VEC_PERM (v4df, v4di, + ((v4df) { 5.1, 36.1, 7.1, 48.1 }), + ((v4df) { 4.1, 5.1, 6.1, 7.1 }), + ((v4df) { 12.1, 24.1, 36.1, 48.1 }), + ((v4di) { 1 + (8 * 3), 6 + (8 * 10), + 3 + (8 * 8), 7 + (8 * 2) })); + TEST_VEC_PERM (v8sf, v8si, + ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }), + ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), + ((v8sf) { 33.2, 34.2, 35.2, 36.2, + 37.2, 38.2, 39.2, 40.2 }), + ((v8si) { 9 + (16 * 1), 13 + (16 * 5), + 15 + (16 * 4), 7 + (16 * 4), + 6 + (16 * 3), 5 + (16 * 2), + 4 + (16 * 1), 10 + (16 * 0) })); + TEST_VEC_PERM (v16hf, v16hi, + ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, + 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), + ((v16hi) { 9 + (32 * 2), 13 + (32 * 2), + 15 + (32 * 8), 7 + (32 * 9), + 25 + (32 * 4), 26 + (32 * 3), + 27 + (32 * 1), 17 + (32 * 2), + 4 + (32 * 6), 31 + (32 * 7), + 0 + (32 * 8), 18 + (32 * 9), + 6 + (32 * 6), 5 + (32 * 7), + 4 + (32 * 2), 10 + (32 * 2) })); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c new file mode 100644 index 00000000000..4d46ff02192 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_1_run.c @@ -0,0 +1,79 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_vec_perm_1.c" + +#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, \ + VALUES1, VALUES2, MASK) \ +{ \ + TYPE expected_result = EXPECTED_RESULT; \ + TYPE values1 = VALUES1; \ + TYPE values2 = VALUES2; \ + MASK_TYPE mask = MASK; \ + TYPE dest; \ + dest = vec_perm_##TYPE (values1, values2, mask); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ +} + +int main (void) +{ + TEST_VEC_PERM (v4di, v4di, + ((v4di) { 5, 36, 7, 48 }), + ((v4di) { 4, 5, 6, 7 }), + ((v4di) { 12, 24, 36, 48 }), + ((v4di) { 1, 6, 3, 7 })); + TEST_VEC_PERM (v8si, v8si, + ((v8si) { 34, 38, 40, 10, 9, 8, 7, 35 }), + ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 }), + ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (v16hi, v16hi, + ((v16hi) { 12, 16, 18, 10, 42, 43, 44, 34, + 7, 48, 3, 35, 9, 8, 7, 13 }), + ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 }), + ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); + TEST_VEC_PERM (v32qi, v32qi, + ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24, + 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 12, 24, 36, 48, 12, 24 }), + ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 }), + ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61, + 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61 })); + TEST_VEC_PERM (v4df, v4di, + ((v4df) { 5.1, 36.1, 7.1, 48.1 }), + ((v4df) { 4.1, 5.1, 6.1, 7.1 }), + ((v4df) { 12.1, 24.1, 36.1, 48.1 }), + ((v4di) { 1, 6, 3, 7 })); + TEST_VEC_PERM (v8sf, v8si, + ((v8sf) { 34.2, 38.2, 40.2, 10.2, 9.2, 8.2, 7.2, 35.2 }), + ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), + ((v8sf) { 33.2, 34.2, 35.2, 36.2, + 37.2, 38.2, 39.2, 40.2 }), + ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (v16hf, v16hi, + ((v16hf) { 12.0, 16.0, 18.0, 10.0, 42.0, 43.0, 44.0, 34.0, + 7.0, 48.0, 3.0, 35.0, 9.0, 8.0, 7.0, 13.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 }), + ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c new file mode 100644 index 00000000000..31cff7ab113 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_reverse_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + a[i] = b[n - i - 1]; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\trev\tz[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c new file mode 100644 index 00000000000..342b1ddb44d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_2_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vec_perm_2.c" + +#define N 153 + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[N]; \ + for (unsigned int i = 0; i < N; ++i) \ + b[i] = i * 2 + i % 5; \ + vec_reverse_##TYPE (a, b, N); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE expected = (N - i - 1) * 2 + (N - i - 1) % 5; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c new file mode 100644 index 00000000000..4f70abd35e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_zip_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + TYPE *restrict c, long n) \ +{ \ + for (long i = 0; i < n; ++i) \ + { \ + a[i * 8] = c[i * 4]; \ + a[i * 8 + 1] = b[i * 4]; \ + a[i * 8 + 2] = c[i * 4 + 1]; \ + a[i * 8 + 3] = b[i * 4 + 1]; \ + a[i * 8 + 4] = c[i * 4 + 2]; \ + a[i * 8 + 5] = b[i * 4 + 2]; \ + a[i * 8 + 6] = c[i * 4 + 3]; \ + a[i * 8 + 7] = b[i * 4 + 3]; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 24 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 24 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 24 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 24 } } */ +/* Currently we can't use SLP for groups bigger than 128 bits. */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 36 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 36 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 36 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 36 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c new file mode 100644 index 00000000000..14d66f99383 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_3_run.c @@ -0,0 +1,30 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vec_perm_3.c" + +#define N (43 * 8) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[N], c[N]; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + b[i] = i * 2 + i % 5; \ + c[i] = i * 3; \ + } \ + vec_zip_##TYPE (a, b, c, N / 8); \ + for (unsigned int i = 0; i < N / 2; ++i) \ + { \ + TYPE expected1 = i * 3; \ + TYPE expected2 = i * 2 + i % 5; \ + if (a[i * 2] != expected1 || a[i * 2 + 1] != expected2) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c new file mode 100644 index 00000000000..5fbd59f08bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_uzp_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + TYPE *restrict c, long n) \ +{ \ + for (long i = 0; i < n; ++i) \ + { \ + a[i * 4] = c[i * 8]; \ + b[i * 4] = c[i * 8 + 1]; \ + a[i * 4 + 1] = c[i * 8 + 2]; \ + b[i * 4 + 1] = c[i * 8 + 3]; \ + a[i * 4 + 2] = c[i * 8 + 4]; \ + b[i * 4 + 2] = c[i * 8 + 5]; \ + a[i * 4 + 3] = c[i * 8 + 6]; \ + b[i * 4 + 3] = c[i * 8 + 7]; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* We could use a single uzp1 and uzp2 per function by implementing + SLP load permutation for variable width. XFAIL until then. */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 { xfail *-*-* } } } */ +/* Delete these if the tests above start passing instead. */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 24 } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 24 } } */ +/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 24 } } */ +/* { dg-final { scan-assembler-times {\tuzp2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 24 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c new file mode 100644 index 00000000000..404429208a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_4_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_vec_perm_4.c" + +#define N (43 * 8) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[N], c[N]; \ + for (unsigned int i = 0; i < N; ++i) \ + c[i] = i * 2 + i % 5; \ + vec_uzp_##TYPE (a, b, c, N / 8); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE expected = i * 2 + i % 5; \ + if ((i & 1 ? b[i / 2] : a[i / 2]) != expected) \ + __builtin_abort (); \ + } \ + } + +int +main (void) +{ + TEST_ALL (HARNESS) +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c new file mode 100644 index 00000000000..e76b3bc5abb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define VEC_PERM_CONST(TYPE, MASK) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_perm_##TYPE (TYPE values1, TYPE values2) \ +{ \ + return __builtin_shuffle (values1, values2, MASK); \ +} + +VEC_PERM_CONST (v4di, ((v4di) { 4, 3, 6, 1 })); +VEC_PERM_CONST (v8si, ((v8si) { 3, 9, 11, 12, 2, 4, 4, 2 })); +VEC_PERM_CONST (v16hi, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0, + 22, 1, 8, 9, 3, 24, 15, 1 })); +VEC_PERM_CONST (v32qi, ((v32qi) { 13, 31, 11, 2, 48, 28, 3, 4, + 54, 11, 30, 1, 0, 61, 2, 3, + 4, 5, 11, 63, 24, 11, 42, 39, + 2, 57, 22, 11, 6, 16, 18, 21 })); +VEC_PERM_CONST (v4df, ((v4di) { 7, 3, 2, 1 })); +VEC_PERM_CONST (v8sf, ((v8si) { 1, 9, 13, 11, 2, 5, 4, 2 })); +VEC_PERM_CONST (v16hf, ((v16hi) { 8, 27, 5, 4, 21, 12, 13, 0, + 22, 1, 8, 9, 3, 24, 15, 1 })); + +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c new file mode 100644 index 00000000000..b4f82091f7c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_overrun.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define VEC_PERM_CONST_OVERRUN(TYPE, MASK) \ +TYPE vec_perm_overrun_##TYPE (TYPE values1, TYPE values2) \ +{ \ + return __builtin_shuffle (values1, values2, MASK); \ +} + +VEC_PERM_CONST_OVERRUN (v4di, ((v4di) { 4 + (8 * 1), 3 + (8 * 1), + 6 + (8 * 2), 1 + (8 * 3) })); +VEC_PERM_CONST_OVERRUN (v8si, ((v8si) { 3 + (16 * 3), 9 + (16 * 4), + 11 + (16 * 5), 12 + (16 * 3), + 2 + (16 * 2), 4 + (16 * 1), + 4 + (16 * 2), 2 + (16 * 1) })); +VEC_PERM_CONST_OVERRUN (v16hi, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1), + 5 + (32 * 3), 4 + (32 * 3), + 21 + (32 * 1), 12 + (32 * 3), + 13 + (32 * 3), 0 + (32 * 1), + 22 + (32 * 2), 1 + (32 * 2), + 8 + (32 * 2), 9 + (32 * 1), + 3 + (32 * 2), 24 + (32 * 2), + 15 + (32 * 1), 1 + (32 * 1) })); +VEC_PERM_CONST_OVERRUN (v32qi, ((v32qi) { 13 + (64 * 2), 31 + (64 * 2), + 11 + (64 * 2), 2 + (64 * 1), + 48 + (64 * 1), 28 + (64 * 2), + 3 + (64 * 2), 4 + (64 * 3), + 54 + (64 * 1), 11 + (64 * 2), + 30 + (64 * 2), 1 + (64 * 1), + 0 + (64 * 1), 61 + (64 * 2), + 2 + (64 * 3), 3 + (64 * 2), + 4 + (64 * 3), 5 + (64 * 3), + 11 + (64 * 3), 63 + (64 * 1), + 24 + (64 * 1), 11 + (64 * 3), + 42 + (64 * 3), 39 + (64 * 2), + 2 + (64 * 2), 57 + (64 * 3), + 22 + (64 * 3), 11 + (64 * 2), + 6 + (64 * 2), 16 + (64 * 2), + 18 + (64 * 2), 21 + (64 * 3) })); +VEC_PERM_CONST_OVERRUN (v4df, ((v4di) { 7 + (8 * 1), 3 + (8 * 3), + 2 + (8 * 5), 1 + (8 * 3) })); +VEC_PERM_CONST_OVERRUN (v8sf, ((v8si) { 1 + (16 * 1), 9 + (16 * 2), + 13 + (16 * 2), 11 + (16 * 3), + 2 + (16 * 2), 5 + (16 * 2), + 4 + (16 * 4), 2 + (16 * 3) })); +VEC_PERM_CONST_OVERRUN (v16hf, ((v16hi) { 8 + (32 * 3), 27 + (32 * 1), + 5 + (32 * 3), 4 + (32 * 3), + 21 + (32 * 1), 12 + (32 * 3), + 13 + (32 * 3), 0 + (32 * 1), + 22 + (32 * 2), 1 + (32 * 2), + 8 + (32 * 2), 9 + (32 * 1), + 3 + (32 * 2), 24 + (32 * 2), + 15 + (32 * 1), 1 + (32 * 1) })); + +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c new file mode 100644 index 00000000000..7324c1da0a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_1_run.c @@ -0,0 +1,70 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_vec_perm_const_1.c" +#include "sve_vec_perm_const_1_overrun.c" + +#define TEST_VEC_PERM(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ +{ \ + TYPE expected_result = EXPECTED_RESULT; \ + TYPE values1 = VALUES1; \ + TYPE values2 = VALUES2; \ + TYPE dest; \ + dest = vec_perm_##TYPE (values1, values2); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ + TYPE dest2; \ + dest2 = vec_perm_overrun_##TYPE (values1, values2); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ +} + +int main (void) +{ + TEST_VEC_PERM (v4di, + ((v4di) { 12, 7, 36, 5 }), + ((v4di) { 4, 5, 6, 7 }), + ((v4di) { 12, 24, 36, 48 })); + TEST_VEC_PERM (v8si, + ((v8si) { 6, 34, 36, 37, 5, 7, 7, 5 }), + ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_VEC_PERM (v16hi, + ((v16hi) { 11, 44, 8, 7, 38, 15, 16, 3, + 39, 4, 11, 12, 6, 41, 18, 4 }), + ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18 }), + ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_VEC_PERM (v32qi, + ((v32qi) { 5, 7, 7, 6, 12, 4, 7, 4, + 36, 7, 6, 5, 4, 24, 6, 7, + 4, 5, 7, 48, 4, 7, 36, 48, + 6, 24, 6, 7, 6, 4, 6, 5 }), + ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_VEC_PERM (v4df, + ((v4df) { 48.5, 7.5, 6.5, 5.5 }), + ((v4df) { 4.5, 5.5, 6.5, 7.5 }), + ((v4df) { 12.5, 24.5, 36.5, 48.5 })); + TEST_VEC_PERM (v8sf, + ((v8sf) { 4.5, 34.5, 38.5, 36.5, 5.5, 8.5, 7.5, 5.5 }), + ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), + ((v8sf) { 33.5, 34.5, 35.5, 36.5, + 37.5, 38.5, 39.5, 40.5 })); + TEST_VEC_PERM (v16hf, + ((v16hf) { 11.0, 44.0, 8.0, 7.0, 38.0, 15.0, 16.0, 3.0, + 39.0, 4.0, 11.0, 12.0, 6.0, 41.0, 18.0, 4.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c new file mode 100644 index 00000000000..a4efb4fea79 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define VEC_PERM_SINGLE(TYPE, MASK) \ +TYPE vec_perm_##TYPE (TYPE values1, TYPE values2) \ +{ \ + return __builtin_shuffle (values1, values2, MASK); \ +} + +VEC_PERM_SINGLE (v4di, ((v4di) { 0, 3, 2, 1 })); +VEC_PERM_SINGLE (v8si, ((v8si) { 3, 7, 1, 0, 2, 4, 4, 2 })); +VEC_PERM_SINGLE (v16hi, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0, + 1, 1, 8, 9, 3, 14, 15, 1 })); +VEC_PERM_SINGLE (v32qi, ((v32qi) { 13, 21, 11, 2, 8, 28, 3, 4, + 14, 11, 30, 1, 0, 31, 2, 3, + 4, 5, 11, 23, 24, 11, 12, 9, + 2, 7, 22, 11, 6, 16, 18, 21 })); +VEC_PERM_SINGLE (v4df, ((v4di) { 3, 3, 1, 1 })); +VEC_PERM_SINGLE (v8sf, ((v8si) { 4, 5, 6, 0, 2, 7, 4, 2 })); +VEC_PERM_SINGLE (v16hf, ((v16hi) { 8, 7, 5, 4, 11, 12, 13, 0, + 1, 1, 8, 9, 3, 14, 15, 1 })); + +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c new file mode 100644 index 00000000000..fbae30c8d1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_const_single_1_run.c @@ -0,0 +1,65 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_vec_perm_const_single_1.c" + +#define TEST_VEC_PERM(TYPE, EXPECTED_RESULT, VALUES1, VALUES2) \ +{ \ + TYPE expected_result = EXPECTED_RESULT; \ + TYPE values1 = VALUES1; \ + TYPE values2 = VALUES2; \ + TYPE dest; \ + dest = vec_perm_##TYPE (values1, values2); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ +} + +int main (void) +{ + TEST_VEC_PERM (v4di, + ((v4di) { 4, 7, 6, 5 }), + ((v4di) { 4, 5, 6, 7 }), + ((v4di) { 12, 24, 36, 48 })); + TEST_VEC_PERM (v8si, + ((v8si) { 6, 10, 4, 3, 5, 7, 7, 5 }), + ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((v8si) { 33, 34, 35, 36, 37, 38, 39, 40 })); + TEST_VEC_PERM (v16hi, + ((v16hi) { 11, 10, 8, 7, 14, 15, 16, 3, + 4, 4, 11, 12, 6, 17, 18, 4 }), + ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((v16hi) { 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48 })); + TEST_VEC_PERM (v32qi, + ((v32qi) { 5, 5, 7, 6, 4, 4, 7, 4, + 6, 7, 6, 5, 4, 7, 6, 7, + 4, 5, 7, 7, 4, 7, 4, 5, + 6, 7, 6, 7, 6, 4, 6, 5 }), + ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((v32qi) { 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48, + 12, 24, 36, 48, 12, 24, 36, 48 })); + TEST_VEC_PERM (v4df, + ((v4df) { 7.5, 7.5, 5.5, 5.5 }), + ((v4df) { 4.5, 5.5, 6.5, 7.5 }), + ((v4df) { 12.5, 24.5, 36.5, 48.5 })); + TEST_VEC_PERM (v8sf, + ((v8sf) { 7.5, 8.5, 9.5, 3.5, 5.5, 10.5, 7.5, 5.5 }), + ((v8sf) { 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5 }), + ((v8sf) { 33.5, 34.5, 35.5, 36.5, + 37.5, 38.5, 39.5, 40.5 })); + TEST_VEC_PERM (v16hf, + ((v16hf) { 11.0, 10.0, 8.0, 7.0, 14.0, 15.0, 16.0, 3.0, + 4.0, 4.0, 11.0, 12.0, 6.0, 17.0, 18.0, 4.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hf) { 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, + 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0 })); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c new file mode 100644 index 00000000000..a82b57dc378 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define VEC_PERM(TYPE, MASKTYPE) \ +TYPE vec_perm_##TYPE (TYPE values, MASKTYPE mask) \ +{ \ + return __builtin_shuffle (values, mask); \ +} + +VEC_PERM (v4di, v4di) +VEC_PERM (v8si, v8si) +VEC_PERM (v16hi, v16hi) +VEC_PERM (v32qi, v32qi) +VEC_PERM (v4df, v4di) +VEC_PERM (v8sf, v8si) +VEC_PERM (v16hf, v16hi) + +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\ttbl\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c new file mode 100644 index 00000000000..539c99d4f61 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_vec_perm_single_1_run.c @@ -0,0 +1,65 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O -march=armv8-a+sve" } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" { target aarch64_sve256_hw } } */ + +#include "sve_vec_perm_single_1.c" +extern void abort (void); + +#define TEST_VEC_PERM(TYPE, MASK_TYPE, EXPECTED_RESULT, VALUES, MASK) \ +{ \ + TYPE expected_result = EXPECTED_RESULT; \ + TYPE values = VALUES; \ + MASK_TYPE mask = MASK; \ + TYPE dest; \ + dest = vec_perm_##TYPE (values, mask); \ + if (__builtin_memcmp (&dest, &expected_result, sizeof (TYPE)) != 0) \ + __builtin_abort (); \ +} + +int main (void) +{ + TEST_VEC_PERM (v4di, v4di, + ((v4di) { 5, 6, 7, 5 }), + ((v4di) { 4, 5, 6, 7 }), + ((v4di) { 1, 6, 3, 5 })); + TEST_VEC_PERM (v8si, v8si, + ((v8si) { 4, 8, 10, 10, 9, 8, 7, 5 }), + ((v8si) { 3, 4, 5, 6, 7, 8, 9, 10 }), + ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (v16hi, v16hi, + ((v16hi) { 12, 16, 18, 10, 12, 13, 14, 4, + 7, 18, 3, 5, 9, 8, 7, 13 }), + ((v16hi) { 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18 }), + ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); + TEST_VEC_PERM (v32qi, v32qi, + ((v32qi) { 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 4, 5, 6, 7, 4, 5, + 5, 6, 7, 4, 5, 6, 4, 5, + 6, 7, 4, 5, 6, 7, 4, 5 }), + ((v32qi) { 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7, + 4, 5, 6, 7, 4, 5, 6, 7 }), + ((v32qi) { 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61, + 5, 6, 7, 8, 9, 10, 28, 29, + 30, 31, 32, 33, 54, 55, 56, 61 })); + TEST_VEC_PERM (v4df, v4di, + ((v4df) { 5.1, 6.1, 7.1, 5.1 }), + ((v4df) { 4.1, 5.1, 6.1, 7.1 }), + ((v4di) { 1, 6, 3, 5 })); + TEST_VEC_PERM (v8sf, v8si, + ((v8sf) { 4.2, 8.2, 10.2, 10.2, 9.2, 8.2, 7.2, 5.2 }), + ((v8sf) { 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 9.2, 10.2 }), + ((v8si) { 9, 13, 15, 7, 6, 5, 4, 10 })); + TEST_VEC_PERM (v16hf, v16hi, + ((v16hf) { 12.0, 16.0, 18.0, 10.0, 12.0, 13.0, 14.0, 4.0, + 7.0, 18.0, 3.0, 5.0, 9.0, 8.0, 7.0, 13.0 }), + ((v16hf) { 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 }), + ((v16hi) { 9, 13, 15, 7, 25, 26, 27, 17, + 4, 31, 0, 18, 6, 5, 4, 10 })); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c new file mode 100644 index 00000000000..c54db87fa21 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_1.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_while_##TYPE (TYPE *restrict a, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + a[i] += 1; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c new file mode 100644 index 00000000000..62f82cc43f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_2.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_while_##TYPE (TYPE *restrict a, unsigned int n) \ +{ \ + for (unsigned int i = 0; i < n; ++i) \ + a[i] += 1; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c new file mode 100644 index 00000000000..ace7ebc5a0f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_3.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_while_##TYPE (TYPE *restrict a, long n) \ +{ \ + for (long i = 0; i < n; ++i) \ + a[i] += 1; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-not {\tuqdec} } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_4.c b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c new file mode 100644 index 00000000000..0717eac1ff6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_4.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((weak)) \ +vec_while_##TYPE (TYPE *restrict a, unsigned long n) \ +{ \ + for (unsigned long i = 0; i < n; ++i) \ + a[i] += 1; \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-times {\tuqdec} 2 } } */ +/* { dg-final { scan-assembler-times {\tuqdecb\tx[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, xzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, x[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, xzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0, x[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c new file mode 100644 index 00000000000..ead821b43ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_1.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +int +loop (short b) +{ + int c = 0; +l1: + b++; + c |= b; + if (b) + goto l1; + return c; +} + +/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c new file mode 100644 index 00000000000..1a3502a0f94 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_2.c @@ -0,0 +1,16 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +int +loop (short b) +{ + int c = 0; +l1: + b++; + c |= b; + if (b < 32767) + goto l1; +return c; +} + +/* { dg-final { scan-assembler-times {\tadd\tx[0-9], x[0-9], 1\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c new file mode 100644 index 00000000000..125fc31a464 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_while_maxiter_3.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -march=armv8-a+sve --save-temps" } */ + +int +loop (short b) +{ + int c = 0; +l1: + b++; + c |= b; + if (b < 32766) + goto l1; +return c; +} + +/* { dg-final { scan-assembler-not {\tmov\tx[0-9], 65536\n} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tx[0-9], 0\n} } } */ +/* { dg-final { scan-assembler-not {\tcsel\tx[0-9], x[0-9], x[0-9], ne\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c new file mode 100644 index 00000000000..918313f62bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_zip1_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#ifndef BIAS +#define BIAS 0 +#endif + +#include <stdint.h> + +typedef int64_t v4di __attribute__((vector_size (32))); +typedef int32_t v8si __attribute__((vector_size (32))); +typedef int16_t v16hi __attribute__((vector_size (32))); +typedef int8_t v32qi __attribute__((vector_size (32))); +typedef double v4df __attribute__((vector_size (32))); +typedef float v8sf __attribute__((vector_size (32))); +typedef _Float16 v16hf __attribute__((vector_size (32))); + +#define MASK_2(X, Y) X, Y + X +#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 1, Y) +#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 2, Y) +#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 4, Y) +#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 8, Y) + +#define INDEX_4 v4di +#define INDEX_8 v8si +#define INDEX_16 v16hi +#define INDEX_32 v32qi + +#define PERMUTE(TYPE, NUNITS) \ + TYPE permute_##TYPE (TYPE values1, TYPE values2) \ + { \ + return __builtin_shuffle \ + (values1, values2, \ + ((INDEX_##NUNITS) { MASK_##NUNITS (BIAS * (NUNITS / 2), \ + NUNITS) })); \ + } + +#define TEST_ALL(T) \ + T (v4di, 4) \ + T (v8si, 8) \ + T (v16hi, 16) \ + T (v32qi, 32) \ + T (v4df, 4) \ + T (v8sf, 8) \ + T (v16hf, 16) + +TEST_ALL (PERMUTE) + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_zip2_1.c b/gcc/testsuite/gcc.target/aarch64/sve_zip2_1.c new file mode 100644 index 00000000000..40a899bc40a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_zip2_1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O -march=armv8-a+sve -msve-vector-bits=256" } */ + +#define BIAS 1 +#include "sve_zip1_1.c" + +/* { dg-final { scan-assembler-not {\ttbl\t} } } */ + +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c index 3d55ecfdb32..7bdd7baff64 100644 --- a/gcc/testsuite/gcc.target/aarch64/uaddw-1.c +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ int t6(int len, void * dummy, unsigned short * __restrict x) diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-2.c b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c index fd3b578c0bb..77fec533c68 100644 --- a/gcc/testsuite/gcc.target/aarch64/uaddw-2.c +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ int t6(int len, void * dummy, unsigned short * __restrict x) diff --git a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c index 499af511521..826606d705e 100644 --- a/gcc/testsuite/gcc.target/aarch64/uaddw-3.c +++ b/gcc/testsuite/gcc.target/aarch64/uaddw-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ int t6(int len, void * dummy, char * __restrict x) diff --git a/gcc/testsuite/gcc.target/aarch64/vect-abs-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-abs-compile.c index 19082d73ea8..959332937ab 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-abs-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-abs-compile.c @@ -6,7 +6,7 @@ #include "vect-abs.x" -/* { dg-final { scan-assembler "abs\\tv\[0-9\]+\.16b" } } */ -/* { dg-final { scan-assembler "abs\\tv\[0-9\]+\.8h" } } */ +/* { dg-final { scan-assembler "abs\\tv\[0-9\]+\.16b" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler "abs\\tv\[0-9\]+\.8h" { xfail *-*-* } } } */ /* { dg-final { scan-assembler "abs\\tv\[0-9\]+\.4s" } } */ /* { dg-final { scan-assembler "abs\\tv\[0-9\]+\.2d" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c b/gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c index 69afff1dd14..9f18eb6a52f 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-add-sub-cond.c @@ -1,7 +1,7 @@ /* Make sure that vector comaprison results are not unnecessarily ANDed with vectors of 1. */ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a" } */ #define COUNT1(X) if (X) count += 1 #define COUNT2(X) if (X) count -= 1 diff --git a/gcc/testsuite/gcc.target/aarch64/vect-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-compile.c index 33130aab55d..b27f01a338a 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-compile.c @@ -1,6 +1,5 @@ - /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ #include "vect.x" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-faddv-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-faddv-compile.c index cce9240343f..47a7dc385fc 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-faddv-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-faddv-compile.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -ffast-math" } */ +/* { dg-options "-O3 -ffast-math -march=armv8-a" } */ #include "vect-faddv.x" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c index 4640f571715..e7a15c022b7 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model -march=armv8-a" } */ #define FTYPE double #define ITYPE long diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c index 6d5fdb59c81..5c3b5d7249d 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -march=armv8-a" } */ #define FTYPE float #define ITYPE int diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c index f5b6329142d..28c6f8ffd1d 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model -march=armv8-a" } */ #define FTYPE double #define ITYPE long diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c index 9561b7159de..608a7ce97f9 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -march=armv8-a" } */ #define FTYPE float #define ITYPE int diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c index 28d7ab6c443..5fdf1ebc730 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model -march=armv8-a" } */ #define FTYPE double #define ITYPE long diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c index 20abbd544ca..6c7622e7871 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -march=armv8-a" } */ #define FTYPE float #define ITYPE int diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin-compile.c index 1285a506320..2aec6475786 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin-compile.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -ffast-math" } */ +/* { dg-options "-O3 -ffast-math -march=armv8-a" } */ #include "vect-fmax-fmin.x" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv-compile.c index 975cef9c584..0de6dc1e785 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fmaxv-fminv-compile.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -ffast-math -fno-vect-cost-model" } */ +/* { dg-options "-O3 -ffast-math -fno-vect-cost-model -march=armv8-a" } */ #include "vect-fmaxv-fminv.x" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c index bfd327cb346..0ccec00155c 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model -march=armv8-a" } */ #define N 32 diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c index 53aa66cff54..14e67f9c50e 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model -march=armv8-a" } */ #define N 32 diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c index 8eb8a65eb99..a9865060e07 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model -march=armv8-a" } */ #define N 32 diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c index 90d911cae6e..9943b2101ad 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model -march=armv8-a" } */ #define N 32 diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c index 47ef100e87b..c1985fd8c36 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c @@ -1,7 +1,5 @@ - - /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ #include "vect-fp.x" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c b/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c index 4711c612002..85096870dba 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile-fp.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fno-vect-cost-model" } */ +/* { dg-options "-O3 -fno-vect-cost-model -march=armv8-a" } */ #include "stdint.h" #include "vect-ld1r.x" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile.c index 761777f794c..a1faf1910fe 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-ld1r-compile.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fno-vect-cost-model" } */ +/* { dg-options "-O3 -fno-vect-cost-model -march=armv8-a" } */ #include "stdint.h" #include "vect-ld1r.x" diff --git a/gcc/testsuite/gcc.target/aarch64/vect-movi.c b/gcc/testsuite/gcc.target/aarch64/vect-movi.c index 53bb491ee64..5450ce263b6 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-movi.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-movi.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 --save-temps -fno-inline" } */ +/* { dg-options "-O3 --save-temps -fno-inline -march=armv8-a" } */ extern void abort (void); @@ -45,10 +45,21 @@ mvni_msl16 (int *__restrict a) a[i] = 0xff540000; } +static void +movi_float_lsl24 (float * a) +{ + int i; + + /* { dg-final { scan-assembler {\tmovi\tv[0-9]+\.[42]s, 0x43, lsl 24\n} } } */ + for (i = 0; i < N; i++) + a[i] = 128.0; +} + int main (void) { int a[N] = { 0 }; + float b[N] = { 0 }; int i; #define CHECK_ARRAY(a, val) \ @@ -68,6 +79,9 @@ main (void) mvni_msl16 (a); CHECK_ARRAY (a, 0xff540000); + movi_float_lsl24 (b); + CHECK_ARRAY (b, 128.0); + return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/vect-mull-compile.c b/gcc/testsuite/gcc.target/aarch64/vect-mull-compile.c index e90c97ff326..4b87fcce4a9 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-mull-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-mull-compile.c @@ -1,6 +1,5 @@ - /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8-a" } */ #define N 16 diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-or_1.c b/gcc/testsuite/gcc.target/aarch64/vect-reduc-or_1.c index 6261e9d1ea6..e500a74a41f 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-reduc-or_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-or_1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model -march=armv8-a" } */ /* Write a reduction loop to be reduced using whole vector right shift. */ extern void abort (void); diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c index 7501825b0cb..85e3e0714d2 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c @@ -8,7 +8,7 @@ extern float fabsf (float); extern double fabs (double); #define NUM_TESTS 16 -#define DELTA 0.000001 +#define DELTA 0.0001 int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76, -4, 34, 110, -110, 6, 4, 75, -34}; diff --git a/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c b/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c index 659b949c6c4..6d134b15b14 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */ +/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf -march=armv8-a" } */ typedef signed char S8_t; typedef signed short S16_t; diff --git a/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c b/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c index c191d2eba10..856d34153c6 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf" } */ +/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model -fno-ipa-icf -march=armv8-a" } */ typedef signed char S8_t; typedef signed short S16_t; diff --git a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c index bf43f1cd72e..c7c15ee5c4a 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c @@ -49,5 +49,4 @@ f12 (void) return sum; } - /* { dg-final { scan-assembler-not "sp" } } */ diff --git a/gcc/testsuite/gfortran.dg/ieee/ieee_8.f90 b/gcc/testsuite/gfortran.dg/ieee/ieee_8.f90 index a47f9c16b91..75458c2eb55 100644 --- a/gcc/testsuite/gfortran.dg/ieee/ieee_8.f90 +++ b/gcc/testsuite/gfortran.dg/ieee/ieee_8.f90 @@ -1,4 +1,4 @@ -! { dg-do run { xfail aarch64*-*-gnu arm*-*-gnueabi arm*-*-gnueabihf } } +! { dg-do run { xfail arm*-*-gnueabi arm*-*-gnueabihf } } ! XFAIL because of PR libfortran/78449. module foo diff --git a/gcc/testsuite/gfortran.dg/reassoc_10.f b/gcc/testsuite/gfortran.dg/reassoc_10.f index 3720d8f414e..4a0a6c23f03 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_10.f +++ b/gcc/testsuite/gfortran.dg/reassoc_10.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Q,Dvdph) implicit none diff --git a/gcc/testsuite/gfortran.dg/reassoc_7.f b/gcc/testsuite/gfortran.dg/reassoc_7.f index 04d2e678fa6..fce93a8a8d0 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_7.f +++ b/gcc/testsuite/gfortran.dg/reassoc_7.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Dvdph) implicit none diff --git a/gcc/testsuite/gfortran.dg/reassoc_8.f b/gcc/testsuite/gfortran.dg/reassoc_8.f index a8aaa6008e8..3a169010cdd 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_8.f +++ b/gcc/testsuite/gfortran.dg/reassoc_8.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Dvdph) implicit none diff --git a/gcc/testsuite/gfortran.dg/reassoc_9.f b/gcc/testsuite/gfortran.dg/reassoc_9.f index 5d9d15fa735..5ed53db1fda 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_9.f +++ b/gcc/testsuite/gfortran.dg/reassoc_9.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Dvdph) implicit none diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f index 7e2816b8b4f..2e0840d256b 100644 --- a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f @@ -42,5 +42,5 @@ C ! vectorized loop. If vector factor is 2, the vectorized loop can ! be predictive commoned, we check if predictive commoning PHI node ! is created with vector(2) type. -! { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } -! { dg-final { scan-tree-dump "vectp_u.*__lsm.* = PHI <.*vectp_u.*__lsm" "pcom" } } +! { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" { xfail vect_variable_length } } } +! { dg-final { scan-tree-dump "vectp_u.*__lsm.* = PHI <.*vectp_u.*__lsm" "pcom" { xfail vect_variable_length } } } diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 index 8e18be5eebd..c86cf008dd2 100644 --- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 +++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 @@ -704,5 +704,6 @@ CALL track('KERNEL ') RETURN END SUBROUTINE kernel -! { dg-final { scan-tree-dump-times "vectorized 21 loops" 1 "vect" { target { vect_intdouble_cvt } } } } ! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { ! vect_intdouble_cvt } } } } +! { dg-final { scan-tree-dump-times "vectorized 21 loops" 1 "vect" { target { vect_intdouble_cvt && { ! vect_ieee_add_reduc } } } } } +! { dg-final { scan-tree-dump-times "vectorized 25 loops" 1 "vect" { target { vect_intdouble_cvt && vect_ieee_add_reduc } } } } diff --git a/gcc/testsuite/gfortran.dg/vect/vect-alias-check-1.F90 b/gcc/testsuite/gfortran.dg/vect/vect-alias-check-1.F90 new file mode 100644 index 00000000000..ea9ba85de7c --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/vect-alias-check-1.F90 @@ -0,0 +1,102 @@ +! { dg-do run } +! { dg-additional-options "-fno-inline" } + +#define N 200 + +#define TEST_VALUE(I) ((I) * 5 / 2) + +subroutine setup(a) + real :: a(N) + do i = 1, N + a(i) = TEST_VALUE(i) + end do +end subroutine + +subroutine check(a, x, gap) + real :: a(N), temp, x + integer :: gap + do i = 1, N - gap + temp = a(i + gap) + x + if (a(i) /= temp) call abort + end do + do i = N - gap + 1, N + temp = TEST_VALUE(i) + if (a(i) /= temp) call abort + end do +end subroutine + +subroutine testa(a, x, base, n) + real :: a(n), x + integer :: base, n + do i = n, 2, -1 + a(base + i - 1) = a(base + i) + x + end do +end subroutine testa + +subroutine testb(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 4, -1 + a(base + i - 3) = a(base + i) + x + end do +end subroutine testb + +subroutine testc(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 8, -1 + a(base + i - 7) = a(base + i) + x + end do +end subroutine testc + +subroutine testd(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 16, -1 + a(base + i - 15) = a(base + i) + x + end do +end subroutine testd + +subroutine teste(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 32, -1 + a(base + i - 31) = a(base + i) + x + end do +end subroutine teste + +subroutine testf(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 64, -1 + a(base + i - 63) = a(base + i) + x + end do +end subroutine testf + +program main + real :: a(N) + + call setup(a) + call testa(a, 91.0, 0, N) + call check(a, 91.0, 1) + + call setup(a) + call testb(a, 55.0, 0, N) + call check(a, 55.0, 3) + + call setup(a) + call testc(a, 72.0, 0, N) + call check(a, 72.0, 7) + + call setup(a) + call testd(a, 69.0, 0, N) + call check(a, 69.0, 15) + + call setup(a) + call teste(a, 44.0, 0, N) + call check(a, 44.0, 31) + + call setup(a) + call testf(a, 39.0, 0, N) + call check(a, 39.0, 63) +end program diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index d7ef04f5221..b2096723426 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2889,6 +2889,13 @@ proc check_effective_target_base_quadfloat_support { } { return 1 } +# Return 1 if the target supports all four forms of fused multiply-add +# (fma, fms, fnma, and fnms) for both float and double. + +proc check_effective_target_all_scalar_fma { } { + return [istarget aarch64*-*-*] +} + # Return 1 if the target supports compiling fixed-point, # 0 otherwise. @@ -3290,7 +3297,8 @@ proc check_effective_target_vect_peeling_profitable { } { } else { set et_vect_peeling_profitable_saved($et_index) 1 if { ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) } { + && [check_effective_target_s390_vx]) + || [check_effective_target_vect_element_align_preferred] } { set et_vect_peeling_profitable_saved($et_index) 0 } } @@ -3349,6 +3357,35 @@ proc check_effective_target_aarch64_little_endian { } { }] } +# Return 1 if this is an AArch64 target supporting SVE. +proc check_effective_target_aarch64_sve { } { + if { ![istarget aarch64*-*-*] } { + return 0 + } + return [check_no_compiler_messages aarch64_sve assembly { + #if !defined (__ARM_FEATURE_SVE) + #error FOO + #endif + }] +} + +# Return the size in bits of an SVE vector, or 0 if the size is variable. +proc aarch64_sve_bits { } { + return [check_cached_effective_target aarch64_sve_bits { + global tool + + set src dummy[pid].c + set f [open $src "w"] + puts $f "int bits = __ARM_FEATURE_SVE_BITS;" + close $f + set output [${tool}_target_compile $src "" preprocess ""] + file delete $src + + regsub {.*bits = ([^;]*);.*} $output {\1} bits + expr { $bits } + }] +} + # Return 1 if this is a compiler supporting ARC atomic operations proc check_effective_target_arc_atomic { } { return [check_no_compiler_messages arc_atomic assembly { @@ -4274,6 +4311,49 @@ proc check_effective_target_arm_neon_hw { } { } [add_options_for_arm_neon ""]] } +# Return true if this is an AArch64 target that can run SVE code. + +proc check_effective_target_aarch64_sve_hw { } { + if { ![istarget aarch64*-*-*] } { + return 0 + } + return [check_runtime aarch64_sve_hw_available { + int + main (void) + { + asm volatile ("ptrue p0.b"); + return 0; + } + }] +} + +# Return true if this is an AArch64 target that can run SVE code and +# if its SVE vectors have exactly BITS bits. + +proc aarch64_sve_hw_bits { bits } { + if { ![check_effective_target_aarch64_sve_hw] } { + return 0 + } + return [check_runtime aarch64_sve${bits}_hw [subst { + int + main (void) + { + int res; + asm volatile ("cntd %0" : "=r" (res)); + if (res * 64 != $bits) + __builtin_abort (); + return 0; + } + }]] +} + +# Return true if this is an AArch64 target that can run SVE code and +# if its SVE vectors have exactly 256 bits. + +proc check_effective_target_aarch64_sve256_hw { } { + return [aarch64_sve_hw_bits 256] +} + proc check_effective_target_arm_neonv2_hw { } { return [check_runtime arm_neon_hwv2_available { #include "arm_neon.h" @@ -5530,7 +5610,8 @@ proc check_effective_target_vect_perm { } { } else { set et_vect_perm_saved($et_index) 0 if { [is-effective-target arm_neon] - || [istarget aarch64*-*-*] + || ([istarget aarch64*-*-*] + && ![check_effective_target_vect_variable_length]) || [istarget powerpc*-*-*] || [istarget spu-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] @@ -5547,6 +5628,86 @@ proc check_effective_target_vect_perm { } { return $et_vect_perm_saved($et_index) } +# Return 1 if, for some VF: +# +# - the target's default vector size is VF * ELEMENT_BITS bits +# +# - it is possible to implement the equivalent of: +# +# int<ELEMENT_BITS>_t s1[COUNT][COUNT * VF], s2[COUNT * VF]; +# for (int i = 0; i < COUNT; ++i) +# for (int j = 0; j < COUNT * VF; ++j) +# s1[i][j] = s2[j - j % COUNT + i] +# +# using only a single 2-vector permute for each vector in s1. +# +# E.g. for COUNT == 3 and vector length 4, the two arrays would be: +# +# s2 | a0 a1 a2 a3 | b0 b1 b2 b3 | c0 c1 c2 c3 +# ------+-------------+-------------+------------ +# s1[0] | a0 a0 a0 a3 | a3 a3 b2 b2 | b2 c1 c1 c1 +# s1[1] | a1 a1 a1 b0 | b0 b0 b3 b3 | b3 c2 c2 c2 +# s1[2] | a2 a2 a2 b1 | b1 b1 c0 c0 | c0 c3 c3 c3 +# +# Each s1 permute requires only two of a, b and c. +# +# The distance between the start of vector n in s1[0] and the start +# of vector n in s2 is: +# +# A = (n * VF) % COUNT +# +# The corresponding value for the end of vector n is: +# +# B = (n * VF + VF - 1) % COUNT +# +# Subtracting i from each value gives the corresponding difference +# for s1[i]. The condition being tested by this function is false +# iff A - i > 0 and B - i < 0 for some i and n, such that the first +# element for s1[i] comes from vector n - 1 of s2 and the last element +# comes from vector n + 1 of s2. The condition is therefore true iff +# A <= B for all n. This is turn means the condition is true iff: +# +# (n * VF) % COUNT + (VF - 1) % COUNT < COUNT +# +# for all n. COUNT - (n * VF) % COUNT is bounded by gcd (VF, COUNT), +# and will be that value for at least one n in [0, COUNT), so we want: +# +# (VF - 1) % COUNT < gcd (VF, COUNT) + +proc vect_perm_supported { count element_bits } { + set vector_bits [lindex [available_vector_sizes] 0] + if { $vector_bits <= 0 } { + return 0 + } + set vf [expr { $vector_bits / $element_bits }] + + # Compute gcd (VF, COUNT). + set gcd $vf + set temp1 $count + while { $temp1 > 0 } { + set temp2 [expr { $gcd % $temp1 }] + set gcd $temp1 + set temp1 $temp2 + } + return [expr { ($vf - 1) % $count < $gcd }] +} + +# Return 1 if the target supports SLP permutation of 3 vectors when each +# element has 32 bits. + +proc check_effective_target_vect_perm3_int { } { + return [expr { [check_effective_target_vect_perm] + && [vect_perm_supported 3 32] }] +} + +# Return 1 if the target supports SLP permutation of 5 vectors when each +# element has 32 bits. + +proc check_effective_target_vect_perm5_int { } { + return [expr { [check_effective_target_vect_perm] + && [vect_perm_supported 5 32] }] +} + # Return 1 if the target plus current options supports vector permutation # on byte-sized elements, 0 otherwise. # @@ -5563,7 +5724,8 @@ proc check_effective_target_vect_perm_byte { } { if { ([is-effective-target arm_neon] && [is-effective-target arm_little_endian]) || ([istarget aarch64*-*-*] - && [is-effective-target aarch64_little_endian]) + && [is-effective-target aarch64_little_endian] + && ![check_effective_target_vect_variable_length]) || [istarget powerpc*-*-*] || [istarget spu-*-*] || ([istarget mips-*.*] @@ -5578,6 +5740,14 @@ proc check_effective_target_vect_perm_byte { } { return $et_vect_perm_byte_saved($et_index) } +# Return 1 if the target supports SLP permutation of 3 vectors when each +# element has 8 bits. + +proc check_effective_target_vect_perm3_byte { } { + return [expr { [check_effective_target_vect_perm_byte] + && [vect_perm_supported 3 8] }] +} + # Return 1 if the target plus current options supports vector permutation # on short-sized elements, 0 otherwise. # @@ -5594,7 +5764,8 @@ proc check_effective_target_vect_perm_short { } { if { ([is-effective-target arm_neon] && [is-effective-target arm_little_endian]) || ([istarget aarch64*-*-*] - && [is-effective-target aarch64_little_endian]) + && [is-effective-target aarch64_little_endian] + && ![check_effective_target_vect_variable_length]) || [istarget powerpc*-*-*] || [istarget spu-*-*] || ([istarget mips*-*-*] @@ -5609,6 +5780,14 @@ proc check_effective_target_vect_perm_short { } { return $et_vect_perm_short_saved($et_index) } +# Return 1 if the target supports SLP permutation of 3 vectors when each +# element has 16 bits. + +proc check_effective_target_vect_perm3_short { } { + return [expr { [check_effective_target_vect_perm_short] + && [vect_perm_supported 3 16] }] +} + # Return 1 if the target plus current options supports folding of # copysign into XORSIGN. # @@ -5646,7 +5825,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } { } else { set et_vect_widen_sum_hi_to_si_pattern_saved($et_index) 0 if { [istarget powerpc*-*-*] - || [istarget aarch64*-*-*] + || ([istarget aarch64*-*-*] + && ![check_effective_target_aarch64_sve]) || [is-effective-target arm_neon] || [istarget ia64-*-*] } { set et_vect_widen_sum_hi_to_si_pattern_saved($et_index) 1 @@ -5758,7 +5938,8 @@ proc check_effective_target_vect_widen_mult_qi_to_hi { } { set et_vect_widen_mult_qi_to_hi_saved($et_index) 0 } if { [istarget powerpc*-*-*] - || [istarget aarch64*-*-*] + || ([istarget aarch64*-*-*] + && ![check_effective_target_aarch64_sve]) || [is-effective-target arm_neon] || ([istarget s390*-*-*] && [check_effective_target_s390_vx]) } { @@ -5796,7 +5977,8 @@ proc check_effective_target_vect_widen_mult_hi_to_si { } { if { [istarget powerpc*-*-*] || [istarget spu-*-*] || [istarget ia64-*-*] - || [istarget aarch64*-*-*] + || ([istarget aarch64*-*-*] + && ![check_effective_target_aarch64_sve]) || [istarget i?86-*-*] || [istarget x86_64-*-*] || [is-effective-target arm_neon] || ([istarget s390*-*-*] @@ -6254,6 +6436,29 @@ proc check_effective_target_vect_natural_alignment { } { return $et_vect_natural_alignment } +# Return true if fully-masked loops are supported. + +proc check_effective_target_vect_fully_masked { } { + return [check_effective_target_aarch64_sve] +} + +# Return 1 if the target doesn't prefer any alignment beyond element +# alignment during vectorization. + +proc check_effective_target_vect_element_align_preferred { } { + return [expr { [check_effective_target_aarch64_sve] + && [check_effective_target_vect_variable_length] }] +} + +# Return 1 if we can align stack data to the preferred vector alignment. + +proc check_effective_target_vect_align_stack_vars { } { + if { [check_effective_target_aarch64_sve] } { + return [check_effective_target_vect_variable_length] + } + return 1 +} + # Return 1 if vector alignment (for types of size 32 bit or less) is reachable, 0 otherwise. proc check_effective_target_vector_alignment_reachable { } { @@ -6303,6 +6508,15 @@ proc check_effective_target_vect_element_align { } { return $et_vect_element_align($et_index) } +# Return 1 if we expect to see unaligned accesses in at least some +# vector dumps. + +proc check_effective_target_vect_unaligned_possible { } { + return [expr { ![check_effective_target_vect_element_align_preferred] + && (![check_effective_target_vect_no_align] + || [check_effective_target_vect_hw_misalign]) }] +} + # Return 1 if the target supports vector LOAD_LANES operations, 0 otherwise. proc check_effective_target_vect_load_lanes { } { @@ -6322,6 +6536,12 @@ proc check_effective_target_vect_load_lanes { } { return $et_vect_load_lanes } +# Return 1 if the target supports vector masked stores. + +proc check_effective_target_vect_masked_store { } { + return [check_effective_target_aarch64_sve] +} + # Return 1 if the target supports vector conditional operations, 0 otherwise. proc check_effective_target_vect_condition { } { @@ -6581,46 +6801,53 @@ foreach N {2 3 4 8} { }] } +# Return the list of vector sizes (in bits) that each target supports. +# A vector length of "0" indicates variable-length vectors. + +proc available_vector_sizes { } { + set result {} + if { [istarget aarch64*-*-*] } { + if { [check_effective_target_aarch64_sve] } { + lappend result [aarch64_sve_bits] + } + lappend result 128 64 + } elseif { [istarget arm*-*-*] + && [check_effective_target_arm_neon_ok] } { + lappend result 128 64 + } elseif { (([istarget i?86-*-*] || [istarget x86_64-*-*]) + && ([check_avx_available] && ![check_prefer_avx128])) } { + lappend result 256 128 + } elseif { [istarget sparc*-*-*] } { + lappend result 64 + } else { + # The traditional default asumption. + lappend result 128 + } + return $result +} + # Return 1 if the target supports multiple vector sizes proc check_effective_target_vect_multiple_sizes { } { - global et_vect_multiple_sizes_saved - global et_index + return [expr { [llength [available_vector_sizes]] > 1 }] +} - set et_vect_multiple_sizes_saved($et_index) 0 - if { [istarget aarch64*-*-*] - || [is-effective-target arm_neon] - || (([istarget i?86-*-*] || [istarget x86_64-*-*]) - && ([check_avx_available] && ![check_prefer_avx128])) } { - set et_vect_multiple_sizes_saved($et_index) 1 - } +# Return true if variable-length vectors are supported. - verbose "check_effective_target_vect_multiple_sizes:\ - returning $et_vect_multiple_sizes_saved($et_index)" 2 - return $et_vect_multiple_sizes_saved($et_index) +proc check_effective_target_vect_variable_length { } { + return [expr { [lindex [available_vector_sizes] 0] == 0 }] } # Return 1 if the target supports vectors of 64 bits. proc check_effective_target_vect64 { } { - global et_vect64_saved - global et_index + return [expr { [lsearch -exact [available_vector_sizes] 64] >= 0 }] +} - if [info exists et_vect64_saved($et_index)] { - verbose "check_effective_target_vect64: using cached result" 2 - } else { - set et_vect64_saved($et_index) 0 - if { ([is-effective-target arm_neon] - && [check_effective_target_arm_little_endian]) - || [istarget aarch64*-*-*] - || [istarget sparc*-*-*] } { - set et_vect64_saved($et_index) 1 - } - } +# Return 1 if the target supports vectors of 256 bits. - verbose "check_effective_target_vect64:\ - returning $et_vect64_saved($et_index)" 2 - return $et_vect64_saved($et_index) +proc check_effective_target_vect256 { } { + return [expr { [lsearch -exact [available_vector_sizes] 256] >= 0 }] } # Return 1 if the target supports vector copysignf calls. @@ -6949,6 +7176,45 @@ proc check_effective_target_vect_call_roundf { } { return $et_vect_call_roundf_saved($et_index) } +# Return 1 if the target supports vector gather operations. + +proc check_effective_target_vect_gather { } { + return [check_effective_target_aarch64_sve] +} + +# Return 1 if the target supports vector scatter operations. + +proc check_effective_target_vect_scatter { } { + return [check_effective_target_aarch64_sve] +} + +# Return 1 if the target supports both vector gather and vector scatter +# operations. + +proc check_effective_target_vect_gather_scatter { } { + return [expr { [check_effective_target_vect_gather] + && [check_effective_target_vect_scatter] }] +} + +# Return 1 if the target supports a non-reassociating form of floating-point +# addition reduction, i.e. one that is suitable for -fno-associative-math. + +proc check_effective_target_vect_ieee_add_reduc { } { + return [check_effective_target_aarch64_sve] +} + +# Return 1 if the target supports AND, OR and XOR reduction. + +proc check_effective_target_vect_logical_reduc { } { + return [check_effective_target_aarch64_sve] +} + +# Return 1 if the target supports last-selected-element reduction. + +proc check_effective_target_vect_last_reduc { } { + return [check_effective_target_aarch64_sve] +} + # Return 1 if the target supports section-anchors proc check_effective_target_section_anchors { } { @@ -7747,11 +8013,7 @@ proc check_avx_available { } { # Return true if 32- and 16-bytes vectors are available. proc check_effective_target_vect_sizes_32B_16B { } { - if { [check_avx_available] && ![check_prefer_avx128] } { - return 1; - } else { - return 0; - } + return [expr { [available_vector_sizes] == [list 256 128] }] } # Return true if 16- and 8-bytes vectors are available. @@ -8931,14 +9193,9 @@ proc check_effective_target_autoincdec { } { # proc check_effective_target_supports_stack_clash_protection { } { - # Temporary until the target bits are fully ACK'd. -# if { [istarget aarch*-*-*] } { -# return 1 -# } - if { [istarget x86_64-*-*] || [istarget i?86-*-*] || [istarget powerpc*-*-*] || [istarget rs6000*-*-*] - || [istarget s390*-*-*] } { + || [istarget aarch64*-**] || [istarget s390*-*-*] } { return 1 } return 0 |