diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2016-11-30 19:08:27 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@linaro.org> | 2017-11-20 16:01:23 +0000 |
commit | cd6f54736df9dbd2c1c34d07ea25acbdde36e8ca (patch) | |
tree | 9bfe0d3ede7d8fc222948b6c00b64c3083cb9d67 | |
parent | 4fd4135ac75a5d16ef0220214c9692bbaad376e8 (diff) | |
download | gcc-cd6f54736df9dbd2c1c34d07ea25acbdde36e8ca.tar.gz |
Predicated comparison folds
[Branch only patch -- not intended for trunk in its current state]
Add a couple of combiner patterns for predicated comparisons.
I'm not sure that the floating-point ones are 100% correct for
-ftrapping-math.
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 45 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c | 63 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c | 90 |
3 files changed, 198 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 534229c1c28..d735359b294 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1289,6 +1289,22 @@ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" ) +(define_insn "*fold_cond_<cmp_op><mode>" + [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") + (and:<VPRED> + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_simd_imm_minus_one") + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")] + SVE_COND_INT_CMP) + (match_operand:<VPRED> 4 "register_operand" "Upl, Upl"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SVE" + "@ + cmp<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, #%3 + cmp<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, %3.<Vetype>" +) + ;; Predicated floating-point comparison (excluding FCMUO, which doesn't ;; allow #0.0 as an operand). (define_insn "*vec_fcm<cmp_op><mode>" @@ -1316,6 +1332,35 @@ "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" ) +;; Predicated comparison. +(define_insn "*cond_<cmp_op><mode>" + [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") + (and:<VPRED> + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_simd_imm_minus_one") + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "w, Dz")] + SVE_COND_FP_CMP) + (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] + "TARGET_SVE" + "@ + fcm<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, %3.<Vetype> + fcm<cmp_op>\t%0.<Vetype>, %4/z, %2.<Vetype>, #0.0" +) + +(define_insn "*cond_unordered<mode>" + [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") + (and:<VPRED> + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_simd_imm_minus_one") + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] + UNSPEC_COND_UO) + (match_operand:<VPRED> 4 "register_operand" "Upl")))] + "TARGET_SVE" + "fcmuo\t%0.<Vetype>, %4/z, %2.<Vetype>, %3.<Vetype>" +) + ;; vcond_mask operand order: true, false, mask ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) ;; SEL operand order: mask, true, false diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c new file mode 100644 index 00000000000..51cd4646765 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include <stdint.h> + +#define DEF_MASK_LOAD(TYPE)\ +void maskload_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b, int n)\ +{\ + for (int i = 0; i < n; i++)\ + a[i] = a[i] < 4 ? b[i] : a[i];\ +}\ + +#define DEF_MASK_STORE(TYPE)\ +void maskstore_##TYPE (TYPE *__restrict__ a, TYPE *__restrict__ b, int n)\ +{\ + for (int i = 0; i < n; i++)\ + if (b[i] != 0)\ + a[i] = b[i];\ +}\ + +DEF_MASK_LOAD (int8_t) +DEF_MASK_LOAD (int16_t) +DEF_MASK_LOAD (int32_t) +DEF_MASK_LOAD (int64_t) +DEF_MASK_LOAD (uint8_t) +DEF_MASK_LOAD (uint16_t) +DEF_MASK_LOAD (uint32_t) +DEF_MASK_LOAD (uint64_t) + +DEF_MASK_STORE (int8_t) +DEF_MASK_STORE (int16_t) +DEF_MASK_STORE (int32_t) +DEF_MASK_STORE (int64_t) +DEF_MASK_STORE (uint8_t) +DEF_MASK_STORE (uint16_t) +DEF_MASK_STORE (uint32_t) +DEF_MASK_STORE (uint64_t) + +/* No scalar memory accesses. */ +/* { dg-final { scan-assembler-not {[wx][0-9]+, \[} } } */ + +/* No scalar memory accesses. */ +/* { dg-final { scan-assembler-not {\tand\t} } } */ + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z} 6 } } */ + +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmpls\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #3} 1 } } */ + +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.b, p[0-7]/z, z[0-9]+\.b, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, #3} 1 } } */ +/* { dg-final { scan-assembler-times {\tcmple\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, #3} 1 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7]} 4 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7]} 4 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]} 4 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c new file mode 100644 index 00000000000..22ab0f0e98a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_mask_ldst_1_run.c @@ -0,0 +1,90 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */ + +#include "sve_mask_ldst_1.c" + +#include <stdlib.h> + +#define NUM_ELEMS(TYPE) (73 + sizeof (TYPE)) + +#define DEF_INIT_VECTOR(TYPE)\ + TYPE a_##TYPE[NUM_ELEMS (TYPE) + 1];\ + TYPE b_##TYPE[NUM_ELEMS (TYPE) + 1];\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + {\ + a_##TYPE[i] = (i & 0x1) ? 0 : 4;\ + b_##TYPE[i] = (i & 0x1) ? 4 : 0;\ + }\ + a_##TYPE[NUM_ELEMS (TYPE)] = 101;\ + b_##TYPE[NUM_ELEMS (TYPE)] = 4; + +#define TEST_MASK_LOAD(TYPE)\ + maskload_##TYPE (a_##TYPE, b_##TYPE, NUM_ELEMS (TYPE));\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + if (a_##TYPE[i] != 4)\ + {\ + result++;\ + }\ + if (a_##TYPE[NUM_ELEMS (TYPE)] != 101)\ + abort (); + +#define TEST_MASK_STORE(TYPE)\ + maskstore_##TYPE (a_##TYPE, b_##TYPE, NUM_ELEMS (TYPE));\ + for (int i = 0; i < NUM_ELEMS (TYPE); i++ )\ + if (a_##TYPE[i] != 4)\ + {\ + result++;\ + }\ + if (a_##TYPE[NUM_ELEMS (TYPE)] != 101)\ + abort (); + +int __attribute__((optimize(1))) +main (int argc, char **argv) +{ + int result = 0; + + { + DEF_INIT_VECTOR (int8_t); + DEF_INIT_VECTOR (int16_t); + DEF_INIT_VECTOR (int32_t); + DEF_INIT_VECTOR (int64_t); + DEF_INIT_VECTOR (uint8_t); + DEF_INIT_VECTOR (uint16_t); + DEF_INIT_VECTOR (uint32_t); + DEF_INIT_VECTOR (uint64_t); + + TEST_MASK_LOAD (int8_t); + TEST_MASK_LOAD (int16_t); + TEST_MASK_LOAD (int32_t); + TEST_MASK_LOAD (int64_t); + TEST_MASK_LOAD (uint8_t); + TEST_MASK_LOAD (uint16_t); + TEST_MASK_LOAD (uint32_t); + TEST_MASK_LOAD (uint64_t); + } + + { + DEF_INIT_VECTOR (int8_t); + DEF_INIT_VECTOR (int16_t); + DEF_INIT_VECTOR (int32_t); + DEF_INIT_VECTOR (int64_t); + DEF_INIT_VECTOR (uint8_t); + DEF_INIT_VECTOR (uint16_t); + DEF_INIT_VECTOR (uint32_t); + DEF_INIT_VECTOR (uint64_t); + + TEST_MASK_STORE (int8_t); + TEST_MASK_STORE (int16_t); + TEST_MASK_STORE (int32_t); + TEST_MASK_STORE (int64_t); + TEST_MASK_STORE (uint8_t); + TEST_MASK_STORE (uint16_t); + TEST_MASK_STORE (uint32_t); + TEST_MASK_STORE (uint64_t); + } + + if (result != 0) + abort (); + + return 0; +} |