diff options
-rw-r--r-- | gcc/ChangeLog | 27 | ||||
-rw-r--r-- | gcc/fold-const.c | 4 | ||||
-rw-r--r-- | gcc/optabs-query.c | 90 | ||||
-rw-r--r-- | gcc/optabs-query.h | 4 | ||||
-rw-r--r-- | gcc/tree-ssa-forwprop.c | 2 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.c | 38 | ||||
-rw-r--r-- | gcc/tree-vect-generic.c | 4 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 5 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 4 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 10 |
10 files changed, 121 insertions, 67 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index be819e2a71c..61fddf233d1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,32 @@ 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> + * optabs-query.h (can_vec_perm_p): Delete. + (can_vec_perm_var_p, can_vec_perm_const_p): Declare. + * optabs-query.c (can_vec_perm_p): Split into... + (can_vec_perm_var_p, can_vec_perm_const_p): ...these two functions. + (can_mult_highpart_p): Use can_vec_perm_const_p to test whether a + particular selector is valid. + * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. + * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. + (vect_grouped_load_supported): Likewise. + (vect_shift_permute_load_chain): Likewise. + * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. + (vect_transform_slp_perm_load): Likewise. + * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. + (vectorizable_bswap): Likewise. + (vect_gen_perm_mask_checked): Likewise. + * fold-const.c (fold_ternary_loc): Likewise. Don't take + implementations of variable permutation vectors into account + when deciding which selector to use. + * tree-vect-loop.c (have_whole_vector_shift): Don't check whether + vec_perm_const_optab is supported; instead use can_vec_perm_const_p + with a false third argument. + * tree-vect-generic.c (lower_vec_perm): Use can_vec_perm_const_p + to test whether the constant selector is valid and can_vec_perm_var_p + to test whether a variable selector is valid. + +2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> + * optabs-query.h (can_vec_perm_p): Take a const vec_perm_indices *. * optabs-query.c (can_vec_perm_p): Likewise. * fold-const.c (fold_vec_perm): Take a const vec_perm_indices & diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 224c0854bdd..d9430ab473b 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -11778,8 +11778,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, argument permutation while still allowing an equivalent 2-argument version. */ if (need_mask_canon && arg2 == op2 - && !can_vec_perm_p (TYPE_MODE (type), false, &sel) - && can_vec_perm_p (TYPE_MODE (type), false, &sel2)) + && !can_vec_perm_const_p (TYPE_MODE (type), sel, false) + && can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) { need_mask_canon = need_mask_canon2; sel = sel2; diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index b8e7e314b0f..90925908d67 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -361,58 +361,86 @@ qimode_for_vec_perm (machine_mode mode) return opt_machine_mode (); } -/* Return true if VEC_PERM_EXPR of arbitrary input vectors can be - expanded using SIMD extensions of the CPU. SEL may be NULL, which - stands for an unknown constant. Note that additional permutations - representing whole-vector shifts may also be handled via the vec_shr - optab, but only where the second input vector is entirely constant - zeroes; this case is not dealt with here. */ +/* Return true if VEC_PERM_EXPRs with variable selector operands can be + expanded using SIMD extensions of the CPU. MODE is the mode of the + vectors being permuted. */ bool -can_vec_perm_p (machine_mode mode, bool variable, const vec_perm_indices *sel) +can_vec_perm_var_p (machine_mode mode) { - machine_mode qimode; - /* If the target doesn't implement a vector mode for the vector type, then no operations are supported. */ if (!VECTOR_MODE_P (mode)) return false; - if (!variable) - { - if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing - && (sel == NULL - || targetm.vectorize.vec_perm_const_ok == NULL - || targetm.vectorize.vec_perm_const_ok (mode, *sel))) - return true; - } - if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing) return true; /* We allow fallback to a QI vector mode, and adjust the mask. */ + machine_mode qimode; if (!qimode_for_vec_perm (mode).exists (&qimode)) return false; - /* ??? For completeness, we ought to check the QImode version of - vec_perm_const_optab. But all users of this implicit lowering - feature implement the variable vec_perm_optab. */ if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing) return false; /* In order to support the lowering of variable permutations, we need to support shifts and adds. */ - if (variable) + if (GET_MODE_UNIT_SIZE (mode) > 2 + && optab_handler (ashl_optab, mode) == CODE_FOR_nothing + && optab_handler (vashl_optab, mode) == CODE_FOR_nothing) + return false; + if (optab_handler (add_optab, qimode) == CODE_FOR_nothing) + return false; + + return true; +} + +/* Return true if the target directly supports VEC_PERM_EXPRs on vectors + of mode MODE using the selector SEL. ALLOW_VARIABLE_P is true if it + is acceptable to force the selector into a register and use a variable + permute (if the target supports that). + + Note that additional permutations representing whole-vector shifts may + also be handled via the vec_shr optab, but only where the second input + vector is entirely constant zeroes; this case is not dealt with here. */ + +bool +can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, + bool allow_variable_p) +{ + /* If the target doesn't implement a vector mode for the vector type, + then no operations are supported. */ + if (!VECTOR_MODE_P (mode)) + return false; + + /* It's probably cheaper to test for the variable case first. */ + if (allow_variable_p) + { + if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing) + return true; + + /* Unlike can_vec_perm_var_p, we don't need to test for optabs + related computing the QImode selector, since that happens at + compile time. */ + machine_mode qimode; + if (qimode_for_vec_perm (mode).exists (&qimode) + && direct_optab_handler (vec_perm_optab, qimode) != CODE_FOR_nothing) + return true; + } + + if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing) { - if (GET_MODE_UNIT_SIZE (mode) > 2 - && optab_handler (ashl_optab, mode) == CODE_FOR_nothing - && optab_handler (vashl_optab, mode) == CODE_FOR_nothing) - return false; - if (optab_handler (add_optab, qimode) == CODE_FOR_nothing) - return false; + if (targetm.vectorize.vec_perm_const_ok == NULL + || targetm.vectorize.vec_perm_const_ok (mode, sel)) + return true; + + /* ??? For completeness, we ought to check the QImode version of + vec_perm_const_optab. But all users of this implicit lowering + feature implement the variable vec_perm_optab. */ } - return true; + return false; } /* Find a widening optab even if it doesn't widen as much as we want. @@ -472,7 +500,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) sel.quick_push (!BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0)); - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) return 2; } } @@ -486,7 +514,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) return 3; } } diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 392429645cd..28f20e7d0fd 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -175,7 +175,9 @@ enum insn_code can_float_p (machine_mode, machine_mode, int); enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *); bool can_conditionally_move_p (machine_mode mode); opt_machine_mode qimode_for_vec_perm (machine_mode); -bool can_vec_perm_p (machine_mode, bool, const vec_perm_indices *); +bool can_vec_perm_var_p (machine_mode); +bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &, + bool = true); /* Find a widening optab even if it doesn't widen as much as we want. */ #define find_widening_optab_handler(A, B, C) \ find_widening_optab_handler_and_mode (A, B, C, NULL) diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 41ee4098950..783ceb689b8 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2108,7 +2108,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) { tree mask_type; - if (!can_vec_perm_p (TYPE_MODE (type), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (type), sel)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 014862a5b1b..bbbeef6f816 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -4600,11 +4600,11 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = 0; } - if (!can_vec_perm_p (mode, false, &sel)) + if (!can_vec_perm_const_p (mode, sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, - "permutaion op not supported by target.\n"); + "permutation op not supported by target.\n"); return false; } @@ -4617,11 +4617,11 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = nelt + j2++; } - if (!can_vec_perm_p (mode, false, &sel)) + if (!can_vec_perm_const_p (mode, sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, - "permutaion op not supported by target.\n"); + "permutation op not supported by target.\n"); return false; } } @@ -4637,11 +4637,11 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[i * 2] = i; sel[i * 2 + 1] = i + nelt; } - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) { for (i = 0; i < nelt; i++) sel[i] += nelt / 2; - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) return true; } } @@ -5179,7 +5179,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, sel[i] = 3 * i + k; else sel[i] = 0; - if (!can_vec_perm_p (mode, false, &sel)) + if (!can_vec_perm_const_p (mode, sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5192,7 +5192,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, sel[i] = i; else sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++); - if (!can_vec_perm_p (mode, false, &sel)) + if (!can_vec_perm_const_p (mode, sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5209,11 +5209,11 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, gcc_assert (pow2p_hwi (count)); for (i = 0; i < nelt; i++) sel[i] = i * 2; - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) { for (i = 0; i < nelt; i++) sel[i] = i * 2 + 1; - if (can_vec_perm_p (mode, false, &sel)) + if (can_vec_perm_const_p (mode, sel)) return true; } } @@ -5540,7 +5540,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = i * 2; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2 + 1; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5554,7 +5554,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = i * 2 + 1; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5568,7 +5568,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {4 5 6 7 8 9 10 11}. */ for (i = 0; i < nelt; i++) sel[i] = nelt / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5583,7 +5583,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = i; for (i = nelt / 2; i < nelt; i++) sel[i] = nelt + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5646,7 +5646,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, sel[i] = 3 * k + (l % 3); k++; } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5660,7 +5660,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {6 7 8 9 10 11 12 13}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5673,7 +5673,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + 1 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5686,7 +5686,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {3 4 5 6 7 8 9 10}. */ for (i = 0; i < nelt; i++) sel[i] = (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5699,7 +5699,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index c2258142eb4..7e78df8b5dd 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -1306,7 +1306,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi) sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) & (2 * elements - 1)); - if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int)) + if (can_vec_perm_const_p (TYPE_MODE (vect_type), sel_int)) { gimple_assign_set_rhs3 (stmt, mask); update_stmt (stmt); @@ -1337,7 +1337,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi) } } } - else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) + else if (can_vec_perm_var_p (TYPE_MODE (vect_type))) return; warning_at (loc, OPT_Wvector_operation_performance, diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 810fa5f3ce9..02f6f7f2c76 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3729,9 +3729,6 @@ have_whole_vector_shift (machine_mode mode) if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) return true; - if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing) - return false; - unsigned int i, nelt = GET_MODE_NUNITS (mode); auto_vec_perm_indices sel (nelt); @@ -3739,7 +3736,7 @@ have_whole_vector_shift (machine_mode mode) { sel.truncate (0); calc_vec_perm_mask_for_shift (i, nelt, &sel); - if (!can_vec_perm_p (mode, false, &sel)) + if (!can_vec_perm_const_p (mode, sel, false)) return false; } return true; diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 0ca42b4d12d..0f6005338df 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -901,7 +901,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, elt += count; sel.quick_push (elt); } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) { for (i = 0; i < group_size; ++i) if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) @@ -3646,7 +3646,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, if (index == nunits) { if (! noop_p - && ! can_vec_perm_p (mode, false, &mask)) + && ! can_vec_perm_const_p (mode, mask)) { if (dump_enabled_p ()) { diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index b0df11b930f..85167d3123a 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1720,7 +1720,7 @@ perm_mask_for_reverse (tree vectype) for (i = 0; i < nunits; ++i) sel.quick_push (nunits - 1 - i); - if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), sel)) return NULL_TREE; return vect_gen_perm_mask_checked (vectype, sel); } @@ -2502,7 +2502,7 @@ vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi, for (unsigned j = 0; j < word_bytes; ++j) elts.quick_push ((i + 1) * word_bytes - j - 1); - if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts)) + if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), elts)) return false; if (! vec_stmt) @@ -6519,7 +6519,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, /* Given a vector type VECTYPE, turns permutation SEL into the equivalent VECTOR_CST mask. No checks are made that the target platform supports the - mask, so callers may wish to test can_vec_perm_p separately, or use + mask, so callers may wish to test can_vec_perm_const_p separately, or use vect_gen_perm_mask_checked. */ tree @@ -6540,13 +6540,13 @@ vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel) return mask_elts.build (); } -/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p, +/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p, i.e. that the target supports the pattern _for arbitrary input vectors_. */ tree vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel) { - gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel)); + gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel)); return vect_gen_perm_mask_any (vectype, sel); } |