summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2016-11-30 19:08:27 +0000
committerRichard Sandiford <richard.sandiford@linaro.org>2017-11-20 16:01:23 +0000
commitcd6f54736df9dbd2c1c34d07ea25acbdde36e8ca (patch)
tree9bfe0d3ede7d8fc222948b6c00b64c3083cb9d67
parent4fd4135ac75a5d16ef0220214c9692bbaad376e8 (diff)
downloadgcc-cd6f54736df9dbd2c1c34d07ea25acbdde36e8ca.tar.gz
Predicated comparison folds
[Branch only patch -- not intended for trunk in its current state] Add a couple of combiner patterns for predicated comparisons. I'm not sure that the floating-point ones are 100% correct for -ftrapping-math.
-rw-r--r--gcc/config/aarch64/aarch64-sve.md45
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c63
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c90
3 files changed, 198 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 534229c1c28..d735359b294 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1289,6 +1289,22 @@
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
+(define_insn "*fold_cond_<cmp_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_simd_imm_minus_one")
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
+ SVE_COND_INT_CMP)
+ (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_SVE"
+ "@
+ cmp<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, #%3
+ cmp<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, %3.<Vetype>"
+)
+
;; Predicated floating-point comparison (excluding FCMUO, which doesn't
;; allow #0.0 as an operand).
(define_insn "*vec_fcm<cmp_op><mode>"
@@ -1316,6 +1332,35 @@
"fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
+;; Predicated comparison.
+(define_insn "*cond_<cmp_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_simd_imm_minus_one")
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "w, Dz")]
+ SVE_COND_FP_CMP)
+ (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
+ "TARGET_SVE"
+ "@
+ fcm<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, %3.<Vetype>
+ fcm<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, #0.0"
+)
+
+(define_insn "*cond_unordered<mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_simd_imm_minus_one")
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
+ UNSPEC_COND_UO)
+ (match_operand:<VPRED> 4 "register_operand" "Upl")))]
+ "TARGET_SVE"
+ "fcmuo\t%0.<Vetype>, %4/z, %2.<Vetype>, %3.<Vetype>"
+)
+
;; vcond_mask operand order: true, false, mask
;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
;; SEL operand order: mask, true, false
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c
new file mode 100644
index 00000000000..51cd4646765
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+
+#include <stdint.h>
+
+#define DEF_MASK_LOAD(TYPE)\
+void maskload_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b, int n)\
+{\
+ for (int i = 0; i < n; i++)\
+ a[i] = a[i] < 4 ? b[i] : a[i];\
+}\
+
+#define DEF_MASK_STORE(TYPE)\
+void maskstore_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b, int n)\
+{\
+ for (int i = 0; i < n; i++)\
+ if (b[i] != 0)\
+ a[i] = b[i];\
+}\
+
+DEF_MASK_LOAD (int8_t)
+DEF_MASK_LOAD (int16_t)
+DEF_MASK_LOAD (int32_t)
+DEF_MASK_LOAD (int64_t)
+DEF_MASK_LOAD (uint8_t)
+DEF_MASK_LOAD (uint16_t)
+DEF_MASK_LOAD (uint32_t)
+DEF_MASK_LOAD (uint64_t)
+
+DEF_MASK_STORE (int8_t)
+DEF_MASK_STORE (int16_t)
+DEF_MASK_STORE (int32_t)
+DEF_MASK_STORE (int64_t)
+DEF_MASK_STORE (uint8_t)
+DEF_MASK_STORE (uint16_t)
+DEF_MASK_STORE (uint32_t)
+DEF_MASK_STORE (uint64_t)
+
+/* No scalar memory accesses. */
+/* { dg-final { scan-assembler-not {[wx][0-9]+, \[} } } */
+
+/* No scalar memory accesses. */
+/* { dg-final { scan-assembler-not {\tand\t} } } */
+
+/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #3} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #3} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #3} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3} 1 } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #3} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7]} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7]} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]} 4 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c
new file mode 100644
index 00000000000..22ab0f0e98a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c
@@ -0,0 +1,90 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+
+#include "sve_mask_ldst_1.c"
+
+#include <stdlib.h>
+
+#define NUM_ELEMS(TYPE) (73 + sizeof (TYPE))
+
+#define DEF_INIT_VECTOR(TYPE)\
+ TYPE a_##TYPE[NUM_ELEMS (TYPE) + 1];\
+ TYPE b_##TYPE[NUM_ELEMS (TYPE) + 1];\
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
+ {\
+ a_##TYPE[i] = (i & 0x1) ? 0 : 4;\
+ b_##TYPE[i] = (i & 0x1) ? 4 : 0;\
+ }\
+ a_##TYPE[NUM_ELEMS (TYPE)] = 101;\
+ b_##TYPE[NUM_ELEMS (TYPE)] = 4;
+
+#define TEST_MASK_LOAD(TYPE)\
+ maskload_##TYPE (a_##TYPE, b_##TYPE, NUM_ELEMS (TYPE));\
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
+ if (a_##TYPE[i] != 4)\
+ {\
+ result++;\
+ }\
+ if (a_##TYPE[NUM_ELEMS (TYPE)] != 101)\
+ abort ();
+
+#define TEST_MASK_STORE(TYPE)\
+ maskstore_##TYPE (a_##TYPE, b_##TYPE, NUM_ELEMS (TYPE));\
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\
+ if (a_##TYPE[i] != 4)\
+ {\
+ result++;\
+ }\
+ if (a_##TYPE[NUM_ELEMS (TYPE)] != 101)\
+ abort ();
+
+int __attribute__((optimize(1)))
+main (int argc, char **argv)
+{
+ int result = 0;
+
+ {
+ DEF_INIT_VECTOR (int8_t);
+ DEF_INIT_VECTOR (int16_t);
+ DEF_INIT_VECTOR (int32_t);
+ DEF_INIT_VECTOR (int64_t);
+ DEF_INIT_VECTOR (uint8_t);
+ DEF_INIT_VECTOR (uint16_t);
+ DEF_INIT_VECTOR (uint32_t);
+ DEF_INIT_VECTOR (uint64_t);
+
+ TEST_MASK_LOAD (int8_t);
+ TEST_MASK_LOAD (int16_t);
+ TEST_MASK_LOAD (int32_t);
+ TEST_MASK_LOAD (int64_t);
+ TEST_MASK_LOAD (uint8_t);
+ TEST_MASK_LOAD (uint16_t);
+ TEST_MASK_LOAD (uint32_t);
+ TEST_MASK_LOAD (uint64_t);
+ }
+
+ {
+ DEF_INIT_VECTOR (int8_t);
+ DEF_INIT_VECTOR (int16_t);
+ DEF_INIT_VECTOR (int32_t);
+ DEF_INIT_VECTOR (int64_t);
+ DEF_INIT_VECTOR (uint8_t);
+ DEF_INIT_VECTOR (uint16_t);
+ DEF_INIT_VECTOR (uint32_t);
+ DEF_INIT_VECTOR (uint64_t);
+
+ TEST_MASK_STORE (int8_t);
+ TEST_MASK_STORE (int16_t);
+ TEST_MASK_STORE (int32_t);
+ TEST_MASK_STORE (int64_t);
+ TEST_MASK_STORE (uint8_t);
+ TEST_MASK_STORE (uint16_t);
+ TEST_MASK_STORE (uint32_t);
+ TEST_MASK_STORE (uint64_t);
+ }
+
+ if (result != 0)
+ abort ();
+
+ return 0;
+}