summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorienkovich <ienkovich@138bc75d-0d04-0410-961f-82ee72b054a4>2016-06-22 14:05:55 +0000
committerienkovich <ienkovich@138bc75d-0d04-0410-961f-82ee72b054a4>2016-06-22 14:05:55 +0000
commit82ccd33e92988687e82c9540e6d0f364740cdbda (patch)
treec1a0b13225e17bbb58b4370baf22278fd1c0ffec
parent25fe98a62764a2764ac002510b107a7e17ed7f06 (diff)
downloadgcc-82ccd33e92988687e82c9540e6d0f364740cdbda.tar.gz
gcc/
PR middle-end/71488 * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Support comparison of boolean vectors. * tree-vect-stmts.c (vectorizable_comparison): Vectorize comparison of boolean vectors using bitwise operations. gcc/testsuite/ PR middle-end/71488 * g++.dg/pr71488.C: New test. * gcc.dg/vect/vect-bool-cmp.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237706 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/g++.dg/pr71488.C24
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c252
-rw-r--r--gcc/tree-vect-patterns.c3
-rw-r--r--gcc/tree-vect-stmts.c96
6 files changed, 383 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e6e39121912..047f0594b84 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-06-22 Ilya Enkovich <ilya.enkovich@intel.com>
+
+ PR middle-end/71488
+ * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Support
+ comparison of boolean vectors.
+ * tree-vect-stmts.c (vectorizable_comparison): Vectorize comparison
+ of boolean vectors using bitwise operations.
+
2016-06-22 Andreas Schwab <schwab@suse.de>
* config/aarch64/aarch64-protos.h (aarch64_elf_asm_named_section):
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 62fdac4b381..662a15ba189 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-06-22 Ilya Enkovich <ilya.enkovich@intel.com>
+
+ PR middle-end/71488
+ * g++.dg/pr71488.C: New test.
+ * gcc.dg/vect/vect-bool-cmp.c: New test.
+
2016-06-22 Eric Botcazou <ebotcazou@adacore.com>
* gcc.dg/guality/param-5.c: New test.
diff --git a/gcc/testsuite/g++.dg/pr71488.C b/gcc/testsuite/g++.dg/pr71488.C
new file mode 100644
index 00000000000..d7d657ed06e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr71488.C
@@ -0,0 +1,24 @@
+// PR middle-end/71488
+// { dg-do run }
+// { dg-options "-O3 -std=c++11" }
+// { dg-additional-options "-march=westmere" { target i?86-*-* x86_64-*-* } }
+// { dg-require-effective-target c++11 }
+
+#include <valarray>
+
+int var_4 = 1;
+long long var_9 = 0;
+
+int main() {
+
+ std::valarray<std::valarray<long long>> v10;
+
+ v10.resize(1);
+ v10[0].resize(4);
+
+ for (int i = 0; i < 4; i++)
+ v10[0][i] = ((var_9 == 0) > unsigned (var_4 == 0)) + (var_9 == 0);
+
+ if (v10[0][0] != 2)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c
new file mode 100644
index 00000000000..a1e2a24accd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c
@@ -0,0 +1,252 @@
+/* PR71488 */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_pack_trunc } */
+/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
+
+int i1, i2;
+
+void __attribute__((noclone,noinline))
+fn1 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn2 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn3 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) > (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn4 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn5 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn6 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) >= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn7 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn8 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn9 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) < (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn10 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn11 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn12 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) <= (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn13 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn14 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn15 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) == (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn16 (int * __restrict__ p1, int * __restrict__ p2, int * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn17 (int * __restrict__ p1, int * __restrict__ p2, short * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+void __attribute__((noclone,noinline))
+fn18 (int * __restrict__ p1, int * __restrict__ p2, long long * __restrict__ p3, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ p1[i] = ((p2[i] == 0) != (unsigned)(p3[i] == 0)) + (p2[i] == 0);
+}
+
+int eq (int i1, int i2) { return i1 == i2; }
+int ne (int i1, int i2) { return i1 != i2; }
+int lt (int i1, int i2) { return i1 < i2; }
+int le (int i1, int i2) { return i1 <= i2; }
+int gt (int i1, int i2) { return i1 > i2; }
+int ge (int i1, int i2) { return i1 >= i2; }
+
+typedef int (*cmp_fn)(int, int);
+
+void
+check (int *p, cmp_fn fn)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ {
+ int t1 = ((i % 4) > 1) == 0;
+ int t2 = (i % 2) == 0;
+ int res = fn (t1, t2) + t1;
+ if (p[i] != res)
+ __builtin_abort ();
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ int i1[32], i2[32], res[32];
+ short s2[32];
+ long long l2[32];
+ int i;
+
+ for (i = 0; i < 32; i++)
+ {
+ l2[i] = i2[i] = s2[i] = i % 2;
+ i1[i] = (i % 4) > 1;
+ asm ("":::"memory");
+ }
+
+ fn1 (res, i1, i2, 32);
+ check (res, gt);
+ fn2 (res, i1, s2, 32);
+ check (res, gt);
+ fn3 (res, i1, l2, 32);
+ check (res, gt);
+
+ fn4 (res, i1, i2, 32);
+ check (res, ge);
+ fn5 (res, i1, s2, 32);
+ check (res, ge);
+ fn6 (res, i1, l2, 32);
+ check (res, ge);
+
+ fn7 (res, i1, i2, 32);
+ check (res, lt);
+ fn8 (res, i1, s2, 32);
+ check (res, lt);
+ fn9 (res, i1, l2, 32);
+ check (res, lt);
+
+ fn10 (res, i1, i2, 32);
+ check (res, le);
+ fn11 (res, i1, s2, 32);
+ check (res, le);
+ fn12 (res, i1, l2, 32);
+ check (res, le);
+
+ fn13 (res, i1, i2, 32);
+ check (res, eq);
+ fn14 (res, i1, s2, 32);
+ check (res, eq);
+ fn15 (res, i1, l2, 32);
+ check (res, eq);
+
+ fn16 (res, i1, i2, 32);
+ check (res, ne);
+ fn17 (res, i1, s2, 32);
+ check (res, ne);
+ fn18 (res, i1, l2, 32);
+ check (res, ne);
+}
+
+/* { dg-final { scan-tree-dump-times "VECTORIZED" 18 "vect" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 8a2221f9350..f0c515daaa2 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -3763,7 +3763,8 @@ vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
if (rhs_code != BIT_IOR_EXPR
&& rhs_code != BIT_XOR_EXPR
- && rhs_code != BIT_AND_EXPR)
+ && rhs_code != BIT_AND_EXPR
+ && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
return NULL;
rhs2 = gimple_assign_rhs2 (last_stmt);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index c74f14f0205..5c655029dd5 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -7756,7 +7756,7 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
unsigned nunits;
int ncopies;
- enum tree_code code;
+ enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
stmt_vec_info prev_stmt_info = NULL;
int i, j;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
@@ -7829,11 +7829,74 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
return false;
+ /* Can't compare mask and non-mask types. */
+ if (vectype1 && vectype2
+ && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
+ return false;
+
+ /* Boolean values may have another representation in vectors
+ and therefore we prefer bit operations over comparison for
+ them (which also works for scalar masks). We store opcodes
+ to use in bitop1 and bitop2. Statement is vectorized as
+ BITOP2 (rhs1 BITOP1 rhs2) or
+ rhs1 BITOP2 (BITOP1 rhs2)
+ depending on bitop1 and bitop2 arity. */
+ if (VECTOR_BOOLEAN_TYPE_P (vectype))
+ {
+ if (code == GT_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_AND_EXPR;
+ }
+ else if (code == GE_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_IOR_EXPR;
+ }
+ else if (code == LT_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_AND_EXPR;
+ std::swap (rhs1, rhs2);
+ }
+ else if (code == LE_EXPR)
+ {
+ bitop1 = BIT_NOT_EXPR;
+ bitop2 = BIT_IOR_EXPR;
+ std::swap (rhs1, rhs2);
+ }
+ else
+ {
+ bitop1 = BIT_XOR_EXPR;
+ if (code == EQ_EXPR)
+ bitop2 = BIT_NOT_EXPR;
+ }
+ }
+
if (!vec_stmt)
{
STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
- return expand_vec_cmp_expr_p (vectype, mask_type);
+ vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
+ dts, NULL, NULL);
+ if (bitop1 == NOP_EXPR)
+ return expand_vec_cmp_expr_p (vectype, mask_type);
+ else
+ {
+ machine_mode mode = TYPE_MODE (vectype);
+ optab optab;
+
+ optab = optab_for_tree_code (bitop1, vectype, optab_default);
+ if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
+ return false;
+
+ if (bitop2 != NOP_EXPR)
+ {
+ optab = optab_for_tree_code (bitop2, vectype, optab_default);
+ if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
+ return false;
+ }
+ return true;
+ }
}
/* Transform. */
@@ -7890,8 +7953,31 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
vec_rhs2 = vec_oprnds1[i];
new_temp = make_ssa_name (mask);
- new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (bitop1 == NOP_EXPR)
+ {
+ new_stmt = gimple_build_assign (new_temp, code,
+ vec_rhs1, vec_rhs2);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
+ else
+ {
+ if (bitop1 == BIT_NOT_EXPR)
+ new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
+ else
+ new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
+ vec_rhs2);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (bitop2 != NOP_EXPR)
+ {
+ tree res = make_ssa_name (mask);
+ if (bitop2 == BIT_NOT_EXPR)
+ new_stmt = gimple_build_assign (res, bitop2, new_temp);
+ else
+ new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
+ new_temp);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
+ }
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}