diff options
author | rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-07-10 08:25:20 +0000 |
---|---|---|
committer | rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-07-10 08:25:20 +0000 |
commit | ebf4f7640c6e22a7a4e84be1d1a67f194e9517a3 (patch) | |
tree | 92ca34cfac26d6253011a1acbd60501bca8cdf7a | |
parent | 64cd118d7079bfbd29c1b37f18f61a7306bfc3c5 (diff) | |
download | gcc-ebf4f7640c6e22a7a4e84be1d1a67f194e9517a3.tar.gz |
Move vector highpart emulation to the optabs layer
* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
(expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
* optabs.c (can_mult_highpart_p): New.
(expand_mult_highpart): New.
* expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
* tree-vect-generic.c (expand_vector_operations_1): Don't expand
by pieces if can_mult_highpart_p.
(expand_vector_divmod): Use can_mult_highpart_p and always
generate MULT_HIGHPART_EXPR.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
* tree-vect-stmts.c (vectorizable_operation): Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@189407 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/expmed.c | 32 | ||||
-rw-r--r-- | gcc/expr.c | 7 | ||||
-rw-r--r-- | gcc/optabs.c | 126 | ||||
-rw-r--r-- | gcc/optabs.h | 6 | ||||
-rw-r--r-- | gcc/tree-vect-generic.c | 113 | ||||
-rw-r--r-- | gcc/tree-vect-patterns.c | 23 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 171 |
8 files changed, 204 insertions, 286 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 89f8a7d3216..5ff4d81de5c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,17 @@ 2012-07-10 Richard Henderson <rth@redhat.com> + * expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart. + (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab. + * optabs.c (can_mult_highpart_p): New. + (expand_mult_highpart): New. + * expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it. + * tree-vect-generic.c (expand_vector_operations_1): Don't expand + by pieces if can_mult_highpart_p. + (expand_vector_divmod): Use can_mult_highpart_p and always + generate MULT_HIGHPART_EXPR. + * tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise. + * tree-vect-stmts.c (vectorizable_operation): Likewise. + * config/spu/spu-builtins.md (spu_mpy): Move to spu.md. (spu_mpyu, spu_mpyhhu, spu_mpyhh): Likewise. * config/spu/spu.md (vec_widen_smult_odd_v8hi): Rename from spu_mpy. diff --git a/gcc/expmed.c b/gcc/expmed.c index cec8d23da1a..4101f613f82 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -2381,8 +2381,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, const struct algorithm *, enum mult_variant); static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); static rtx extract_high_half (enum machine_mode, rtx); -static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); -static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, +static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); +static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, int, int); /* Compute and return the best algorithm for multiplying by T. The algorithm must cost less than cost_limit @@ -3477,7 +3477,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, return target; } -/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */ +/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ static rtx extract_high_half (enum machine_mode mode, rtx op) @@ -3495,11 +3495,11 @@ extract_high_half (enum machine_mode mode, rtx op) return convert_modes (mode, wider_mode, op, 0); } -/* Like expand_mult_highpart, but only consider using a multiplication +/* Like expmed_mult_highpart, but only consider using a multiplication optab. OP1 is an rtx for the constant operand. */ static rtx -expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, +expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp, int max_cost) { rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); @@ -3610,7 +3610,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, MAX_COST is the total allowed cost for the expanded RTL. */ static rtx -expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, +expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp, int max_cost) { enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); @@ -3633,7 +3633,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, mode == word_mode, however all the cost calculations in synth_mult etc. assume single-word operations. */ if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) - return expand_mult_highpart_optab (mode, op0, op1, target, + return expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, max_cost); extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1]; @@ -3651,7 +3651,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, { /* See whether the specialized multiplication optabs are cheaper than the shift/add version. */ - tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, + tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, alg.cost.cost + extra_cost); if (tem) return tem; @@ -3666,7 +3666,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, return tem; } - return expand_mult_highpart_optab (mode, op0, op1, target, + return expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, max_cost); } @@ -3940,7 +3940,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, In all cases but EXACT_DIV_EXPR, this multiplication requires the upper half of the product. Different strategies for generating the product are - implemented in expand_mult_highpart. + implemented in expmed_mult_highpart. If what we actually want is the remainder, we generate that by another by-constant multiplication and a subtraction. */ @@ -3990,7 +3990,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, mode for which we can do the operation with a library call. */ /* We might want to refine this now that we have division-by-constant - optimization. Since expand_mult_highpart tries so many variants, it is + optimization. Since expmed_mult_highpart tries so many variants, it is not straightforward to generalize this. Maybe we should make an array of possible modes in init_expmed? Save this for GCC 2.7. */ @@ -4155,7 +4155,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, = (shift_cost[speed][compute_mode][post_shift - 1] + shift_cost[speed][compute_mode][1] + 2 * add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, + t1 = expmed_mult_highpart (compute_mode, op0, GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); @@ -4187,7 +4187,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, extra_cost = (shift_cost[speed][compute_mode][pre_shift] + shift_cost[speed][compute_mode][post_shift]); - t2 = expand_mult_highpart (compute_mode, t1, + t2 = expmed_mult_highpart (compute_mode, t1, GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); @@ -4313,7 +4313,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, extra_cost = (shift_cost[speed][compute_mode][post_shift] + shift_cost[speed][compute_mode][size - 1] + add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, + t1 = expmed_mult_highpart (compute_mode, op0, GEN_INT (ml), NULL_RTX, 0, max_cost - extra_cost); if (t1 == 0) @@ -4348,7 +4348,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, extra_cost = (shift_cost[speed][compute_mode][post_shift] + shift_cost[speed][compute_mode][size - 1] + 2 * add_cost[speed][compute_mode]); - t1 = expand_mult_highpart (compute_mode, op0, mlr, + t1 = expmed_mult_highpart (compute_mode, op0, mlr, NULL_RTX, 0, max_cost - extra_cost); if (t1 == 0) @@ -4436,7 +4436,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, extra_cost = (shift_cost[speed][compute_mode][post_shift] + shift_cost[speed][compute_mode][size - 1] + 2 * add_cost[speed][compute_mode]); - t3 = expand_mult_highpart (compute_mode, t2, + t3 = expmed_mult_highpart (compute_mode, t2, GEN_INT (ml), NULL_RTX, 1, max_cost - extra_cost); if (t3 != 0) diff --git a/gcc/expr.c b/gcc/expr.c index c56b0e517a1..916dee09fe4 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8554,9 +8554,14 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, return expand_divmod (0, code, mode, op0, op1, target, unsignedp); case RDIV_EXPR: - case MULT_HIGHPART_EXPR: goto binop; + case MULT_HIGHPART_EXPR: + expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); + temp = expand_mult_highpart (mode, op0, op1, target, unsignedp); + gcc_assert (temp); + return temp; + case TRUNC_MOD_EXPR: case FLOOR_MOD_EXPR: case CEIL_MOD_EXPR: diff --git a/gcc/optabs.c b/gcc/optabs.c index fbea87976f4..e1ecc657484 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -7162,6 +7162,132 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2, return ops[0].value; } +/* Return non-zero if a highpart multiply is supported of can be synthisized. + For the benefit of expand_mult_highpart, the return value is 1 for direct, + 2 for even/odd widening, and 3 for hi/lo widening. */ + +int +can_mult_highpart_p (enum machine_mode mode, bool uns_p) +{ + optab op; + unsigned char *sel; + unsigned i, nunits; + + op = uns_p ? umul_highpart_optab : smul_highpart_optab; + if (optab_handler (op, mode) != CODE_FOR_nothing) + return 1; + + /* If the mode is an integral vector, synth from widening operations. */ + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return 0; + + nunits = GET_MODE_NUNITS (mode); + sel = XALLOCAVEC (unsigned char, nunits); + + op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab; + if (optab_handler (op, mode) != CODE_FOR_nothing) + { + op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab; + if (optab_handler (op, mode) != CODE_FOR_nothing) + { + for (i = 0; i < nunits; ++i) + sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0); + if (can_vec_perm_p (mode, false, sel)) + return 2; + } + } + + op = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab; + if (optab_handler (op, mode) != CODE_FOR_nothing) + { + op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; + if (optab_handler (op, mode) != CODE_FOR_nothing) + { + for (i = 0; i < nunits; ++i) + sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1); + if (can_vec_perm_p (mode, false, sel)) + return 3; + } + } + + return 0; +} + +/* Expand a highpart multiply. */ + +rtx +expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, + rtx target, bool uns_p) +{ + struct expand_operand eops[3]; + enum insn_code icode; + int method, i, nunits; + enum machine_mode wmode; + rtx m1, m2, perm; + optab tab1, tab2; + rtvec v; + + method = can_mult_highpart_p (mode, uns_p); + switch (method) + { + case 0: + return NULL_RTX; + case 1: + tab1 = uns_p ? umul_highpart_optab : smul_highpart_optab; + return expand_binop (mode, tab1, op0, op1, target, uns_p, + OPTAB_LIB_WIDEN); + case 2: + tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab; + tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab; + break; + case 3: + tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; + tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab; + if (BYTES_BIG_ENDIAN) + { + optab t = tab1; + tab1 = tab2; + tab2 = t; + } + break; + default: + gcc_unreachable (); + } + + icode = optab_handler (tab1, mode); + nunits = GET_MODE_NUNITS (mode); + wmode = insn_data[icode].operand[0].mode; + gcc_checking_assert (2 * GET_MODE_NUNITS (wmode) == nunits); + gcc_checking_assert (GET_MODE_SIZE (wmode) == GET_MODE_SIZE (mode)); + + create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode); + create_input_operand (&eops[1], op0, mode); + create_input_operand (&eops[2], op1, mode); + expand_insn (icode, 3, eops); + m1 = gen_lowpart (mode, eops[0].value); + + create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode); + create_input_operand (&eops[1], op0, mode); + create_input_operand (&eops[2], op1, mode); + expand_insn (optab_handler (tab2, mode), 3, eops); + m2 = gen_lowpart (mode, eops[0].value); + + v = rtvec_alloc (nunits); + if (method == 2) + { + for (i = 0; i < nunits; ++i) + RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1) + + ((i & 1) ? nunits : 0)); + } + else + { + for (i = 0; i < nunits; ++i) + RTVEC_ELT (v, i) = GEN_INT (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); + } + perm = gen_rtx_CONST_VECTOR (mode, v); + + return expand_vec_perm (mode, m1, m2, perm, target); +} /* Return true if there is a compare_and_swap pattern. */ diff --git a/gcc/optabs.h b/gcc/optabs.h index 37a6bfd9415..8b04e17e7b3 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -1014,6 +1014,12 @@ extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *); /* Generate code for VEC_PERM_EXPR. */ extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx); +/* Return non-zero if target supports a given highpart multiplication. */ +extern int can_mult_highpart_p (enum machine_mode, bool); + +/* Generate code for MULT_HIGHPART_EXPR. */ +extern rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, bool); + /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing if the target does not have such an insn. */ diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index e37c631becf..b0f41d123d7 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -454,10 +454,9 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, int dummy_int; unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type)); unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); - optab op; tree *vec; - unsigned char *sel = NULL; - tree cur_op, m1, m2, mulcst, perm_mask, wider_type, tem, decl_e, decl_o; + tree cur_op, mulcst, tem; + optab op; if (prec > HOST_BITS_PER_WIDE_INT) return NULL_TREE; @@ -745,54 +744,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) return NULL_TREE; - op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default); - if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) - wider_type = decl_e = decl_o = NULL_TREE; - else - { - wider_type = build_nonstandard_integer_type (prec * 2, unsignedp), - wider_type = build_vector_type (wider_type, nunits / 2); - if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT - || GET_MODE_BITSIZE (TYPE_MODE (wider_type)) - != GET_MODE_BITSIZE (TYPE_MODE (type))) - return NULL_TREE; - - sel = XALLOCAVEC (unsigned char, nunits); - - if (targetm.vectorize.builtin_mul_widen_even - && targetm.vectorize.builtin_mul_widen_odd - && (decl_e = targetm.vectorize.builtin_mul_widen_even (type)) - && (decl_o = targetm.vectorize.builtin_mul_widen_odd (type)) - && (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e))) - == TYPE_MODE (wider_type))) - { - for (i = 0; i < nunits; i++) - sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0); - if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) - decl_e = decl_o = NULL_TREE; - } - else - decl_e = decl_o = NULL_TREE; - - if (decl_e == NULL_TREE) - { - op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, - type, optab_default); - if (op == NULL - || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) - return NULL_TREE; - op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, - type, optab_default); - if (op == NULL - || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) - return NULL_TREE; - - for (i = 0; i < nunits; i++) - sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1); - if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) - return NULL_TREE; - } - } + if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) + return NULL_TREE; cur_op = op0; @@ -830,46 +783,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, for (i = 0; i < nunits; i++) vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]); mulcst = build_vector (type, vec); - if (wider_type == NULL_TREE) - cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); - else - { - for (i = 0; i < nunits; i++) - vec[i] = build_int_cst (TREE_TYPE (type), sel[i]); - perm_mask = build_vector (type, vec); - - if (decl_e != NULL_TREE) - { - gimple call; - - call = gimple_build_call (decl_e, 2, cur_op, mulcst); - m1 = create_tmp_reg (wider_type, NULL); - add_referenced_var (m1); - m1 = make_ssa_name (m1, call); - gimple_call_set_lhs (call, m1); - gsi_insert_seq_before (gsi, call, GSI_SAME_STMT); - - call = gimple_build_call (decl_o, 2, cur_op, mulcst); - m2 = create_tmp_reg (wider_type, NULL); - add_referenced_var (m2); - m2 = make_ssa_name (m2, call); - gimple_call_set_lhs (call, m2); - gsi_insert_seq_before (gsi, call, GSI_SAME_STMT); - } - else - { - m1 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_HI_EXPR - : VEC_WIDEN_MULT_LO_EXPR, - wider_type, cur_op, mulcst); - m2 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_LO_EXPR - : VEC_WIDEN_MULT_HI_EXPR, - wider_type, cur_op, mulcst); - } - m1 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m1); - m2 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m2); - cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, m1, m2, perm_mask); - } + cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); switch (mode) { @@ -1454,13 +1369,17 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi) if (compute_type == type) { compute_mode = TYPE_MODE (compute_type); - if (VECTOR_MODE_P (compute_mode) - && op != NULL - && optab_handler (op, compute_mode) != CODE_FOR_nothing) - return; - else - /* There is no operation in hardware, so fall back to scalars. */ - compute_type = TREE_TYPE (type); + if (VECTOR_MODE_P (compute_mode)) + { + if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) + return; + if (code == MULT_HIGHPART_EXPR + && can_mult_highpart_p (compute_mode, + TYPE_UNSIGNED (compute_type))) + return; + } + /* There is no operation in hardware, so fall back to scalars. */ + compute_type = TREE_TYPE (type); } gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 3f57e5dcf8b..ab9e927549d 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1642,10 +1642,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); optab optab; - tree dummy, q; - enum tree_code dummy_code; + tree q; int dummy_int, prec; - VEC (tree, heap) *dummy_vec; stmt_vec_info def_stmt_vinfo; if (!is_gimple_assign (last_stmt)) @@ -1814,23 +1812,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, || prec > HOST_BITS_PER_WIDE_INT) return NULL; - optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default); - if (optab == NULL - || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) - { - tree witype = build_nonstandard_integer_type (prec * 2, - TYPE_UNSIGNED (itype)); - tree vecwtype = get_vectype_for_scalar_type (witype); - - if (vecwtype == NULL_TREE) - return NULL; - if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, - vecwtype, vectype, - &dummy, &dummy, &dummy_code, - &dummy_code, &dummy_int, - &dummy_vec)) - return NULL; - } + if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype))) + return NULL; STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 8e0965a5428..9caf1c6728a 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3304,18 +3304,17 @@ static bool vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, slp_tree slp_node) { - tree vec_dest, vec_dest2 = NULL_TREE; - tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE; + tree vec_dest; tree scalar_dest; tree op0, op1 = NULL_TREE, op2 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype, wide_vectype = NULL_TREE; + tree vectype; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); enum tree_code code; enum machine_mode vec_mode; tree new_temp; int op_type; - optab optab, optab2 = NULL; + optab optab; int icode; tree def; gimple def_stmt; @@ -3332,8 +3331,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, tree vop0, vop1, vop2; bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); int vf; - unsigned char *sel = NULL; - tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE; if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) return false; @@ -3455,87 +3452,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, || code == RROTATE_EXPR) return false; - optab = optab_for_tree_code (code, vectype, optab_default); - /* Supportable by target? */ - if (!optab && code != MULT_HIGHPART_EXPR) + + vec_mode = TYPE_MODE (vectype); + if (code == MULT_HIGHPART_EXPR) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "no optab."); - return false; + if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype))) + icode = 0; + else + icode = CODE_FOR_nothing; } - vec_mode = TYPE_MODE (vectype); - icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing; - - if (icode == CODE_FOR_nothing - && code == MULT_HIGHPART_EXPR - && VECTOR_MODE_P (vec_mode) - && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN) - { - /* If MULT_HIGHPART_EXPR isn't supported by the backend, see - if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR - or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */ - unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest)); - unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest)); - tree wide_type - = build_nonstandard_integer_type (prec * 2, unsignedp); - wide_vectype - = get_same_sized_vectype (wide_type, vectype); - - sel = XALLOCAVEC (unsigned char, nunits_in); - if (VECTOR_MODE_P (TYPE_MODE (wide_vectype)) - && GET_MODE_SIZE (TYPE_MODE (wide_vectype)) - == GET_MODE_SIZE (vec_mode)) - { - if (targetm.vectorize.builtin_mul_widen_even - && (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype)) - && targetm.vectorize.builtin_mul_widen_odd - && (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype)) - && TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1))) - == TYPE_MODE (wide_vectype)) - { - for (i = 0; i < nunits_in; i++) - sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) - + ((i & 1) ? nunits_in : 0); - if (can_vec_perm_p (vec_mode, false, sel)) - icode = 0; - } - if (icode == CODE_FOR_nothing) - { - decl1 = NULL_TREE; - decl2 = NULL_TREE; - optab = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, - vectype, optab_default); - optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, - vectype, optab_default); - if (optab != NULL - && optab2 != NULL - && optab_handler (optab, vec_mode) != CODE_FOR_nothing - && optab_handler (optab2, vec_mode) != CODE_FOR_nothing - && insn_data[optab_handler (optab, vec_mode)].operand[0].mode - == TYPE_MODE (wide_vectype) - && insn_data[optab_handler (optab2, - vec_mode)].operand[0].mode - == TYPE_MODE (wide_vectype)) - { - for (i = 0; i < nunits_in; i++) - sel[i] = !BYTES_BIG_ENDIAN + 2 * i; - if (can_vec_perm_p (vec_mode, false, sel)) - icode = optab_handler (optab, vec_mode); - } - } - } - if (icode == CODE_FOR_nothing) + else + { + optab = optab_for_tree_code (code, vectype, optab_default); + if (!optab) { - if (optab_for_tree_code (code, vectype, optab_default) == NULL) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "no optab."); - return false; - } - wide_vectype = NULL_TREE; - optab2 = NULL; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "no optab."); + return false; } + icode = (int) optab_handler (optab, vec_mode); } if (icode == CODE_FOR_nothing) @@ -3575,16 +3511,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, fprintf (vect_dump, "transform binary/unary operation."); /* Handle def. */ - if (wide_vectype) - { - vec_dest = vect_create_destination_var (scalar_dest, wide_vectype); - vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype); - vec_dest3 = vect_create_destination_var (scalar_dest, vectype); - vec_dest4 = vect_create_destination_var (scalar_dest, vectype); - perm_mask = vect_gen_perm_mask (vectype, sel); - } - else - vec_dest = vect_create_destination_var (scalar_dest, vectype); + vec_dest = vect_create_destination_var (scalar_dest, vectype); /* Allocate VECs for vector operands. In case of SLP, vector operands are created in the previous stages of the recursion, so no allocation is @@ -3693,66 +3620,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); vop2 = ((op_type == ternary_op) ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); - if (wide_vectype) - { - tree new_temp2, vce; - - gcc_assert (code == MULT_HIGHPART_EXPR); - if (decl1 != NULL_TREE) - { - new_stmt = gimple_build_call (decl1, 2, vop0, vop1); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_call_set_lhs (new_stmt, new_temp); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - - new_stmt = gimple_build_call (decl2, 2, vop0, vop1); - new_temp2 = make_ssa_name (vec_dest2, new_stmt); - gimple_call_set_lhs (new_stmt, new_temp2); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - } - else - { - new_temp = make_ssa_name (vec_dest, NULL); - new_stmt - = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN - ? VEC_WIDEN_MULT_HI_EXPR - : VEC_WIDEN_MULT_LO_EXPR, - new_temp, vop0, vop1); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - - new_temp2 = make_ssa_name (vec_dest2, NULL); - new_stmt - = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN - ? VEC_WIDEN_MULT_LO_EXPR - : VEC_WIDEN_MULT_HI_EXPR, - new_temp2, vop0, vop1); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - } - - vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp); - new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, - vec_dest3, vce, - NULL_TREE); - new_temp = make_ssa_name (vec_dest3, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - - vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2); - new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, - vec_dest4, vce, - NULL_TREE); - new_temp2 = make_ssa_name (vec_dest4, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp2); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - - new_temp = permute_vec_elements (new_temp, new_temp2, - perm_mask, stmt, gsi); - new_stmt = SSA_NAME_DEF_STMT (new_temp); - if (slp_node) - VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), - new_stmt); - continue; - } new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, vop0, vop1, vop2); new_temp = make_ssa_name (vec_dest, new_stmt); |