diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2017-11-01 11:40:39 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@linaro.org> | 2017-11-20 16:01:23 +0000 |
commit | 164804dbc17d5bf70634127b342e221cda938b6b (patch) | |
tree | 4297a7476d76757971ba2893417e6c58d97a475a | |
parent | f233e68a6d1abd5fc3a4d96f35454778d1de3b2e (diff) | |
download | gcc-164804dbc17d5bf70634127b342e221cda938b6b.tar.gz |
Replace FMA_EXPR with one internal fn per optab
There are four optabs for various forms of fused multiply-add:
fma, fms, fnma and fnms. Of these, only fma had a direct gimple
representation. For the other three we relied on special pattern-
matching during expand, although tree-ssa-math-opts.c did have
some code to try to second-guess what expand would do.
This patch removes the old FMA_EXPR representation of fma and
introduces four new internal functions, one for each optab.
IFN_FMA is tied to BUILT_IN_FMA* while the other three are
independent directly-mapped internal functions. It's then
possible to do the pattern-matching in match.pd and
tree-ssa-math-opts.c (via folding) can select the exact
FMA-based operation.
The patch removes the gimple FE support for __FMA rather than mapping
it to the internal function. There's no reason now to treat it
differently from other internal functions (although the FE doesn't
handle those yet).
The BRIG & HSA parts are a best guess, but seem relatively simple.
43 files changed, 506 insertions, 294 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 325fe21c9f9..926ceb0b34b 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2767,7 +2767,7 @@ build/genmddump.o : genmddump.c $(RTL_BASE_H) $(BCONFIG_H) $(SYSTEM_H) \ $(CORETYPES_H) $(GTM_H) errors.h $(READ_MD_H) $(GENSUPPORT_H) build/genmatch.o : genmatch.c $(BCONFIG_H) $(SYSTEM_H) \ $(CORETYPES_H) errors.h $(HASH_TABLE_H) hash-map.h $(GGC_H) is-a.h \ - tree.def builtins.def internal-fn.def + tree.def builtins.def internal-fn.def case-cfn-macros.h build/gencfn-macros.o : gencfn-macros.c $(BCONFIG_H) $(SYSTEM_H) \ $(CORETYPES_H) errors.h $(HASH_TABLE_H) hash-set.h builtins.def \ internal-fn.def diff --git a/gcc/brig/brigfrontend/brig-basic-inst-handler.cc b/gcc/brig/brigfrontend/brig-basic-inst-handler.cc index d27fe951c57..63ea41d16d9 100644 --- a/gcc/brig/brigfrontend/brig-basic-inst-handler.cc +++ b/gcc/brig/brigfrontend/brig-basic-inst-handler.cc @@ -775,8 +775,6 @@ brig_basic_inst_handler::get_tree_code_for_hsa_opcode return CALL_EXPR; else return MAX_EXPR; - case BRIG_OPCODE_FMA: - return FMA_EXPR; case BRIG_OPCODE_ABS: return ABS_EXPR; case BRIG_OPCODE_SHL: @@ -811,6 +809,7 @@ brig_basic_inst_handler::get_tree_code_for_hsa_opcode /* Implement as 1/f (x). gcc should pattern detect that and use a native instruction, if available, for it. */ return TREE_LIST; + case BRIG_OPCODE_FMA: case BRIG_OPCODE_FLOOR: case BRIG_OPCODE_CEIL: case BRIG_OPCODE_SQRT: diff --git a/gcc/brig/brigfrontend/brig-code-entry-handler.cc b/gcc/brig/brigfrontend/brig-code-entry-handler.cc index 8fdb84c7ba8..d84e71b980e 100644 --- a/gcc/brig/brigfrontend/brig-code-entry-handler.cc +++ b/gcc/brig/brigfrontend/brig-code-entry-handler.cc @@ -763,6 +763,7 @@ brig_code_entry_handler::get_builtin_for_hsa_opcode case BRIG_OPCODE_NEXP2: builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2); break; + case BRIG_OPCODE_FMA: case BRIG_OPCODE_NFMA: builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA); break; diff --git a/gcc/builtins.c b/gcc/builtins.c index 41e397240dd..b3cad6fe4b5 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -8180,21 +8180,6 @@ fold_builtin_abs (location_t loc, tree arg, tree type) return fold_build1_loc (loc, ABS_EXPR, type, arg); } -/* Fold a call to fma, fmaf, or fmal with arguments ARG[012]. */ - -static tree -fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type) -{ - /* ??? Only expand to FMA_EXPR if it's directly supported. */ - if (validate_arg (arg0, REAL_TYPE) - && validate_arg (arg1, REAL_TYPE) - && validate_arg (arg2, REAL_TYPE) - && optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) - return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2); - - return NULL_TREE; -} - /* Fold a call to builtin carg(a+bi) -> atan2(b,a). */ static tree @@ -9094,10 +9079,6 @@ fold_builtin_3 (location_t loc, tree fndecl, CASE_FLT_FN (BUILT_IN_SINCOS): return fold_builtin_sincos (loc, arg0, arg1, arg2); - CASE_FLT_FN (BUILT_IN_FMA): - CASE_FLT_FN_FLOATN_NX (BUILT_IN_FMA): - return fold_builtin_fma (loc, arg0, arg1, arg2, type); - CASE_FLT_FN (BUILT_IN_REMQUO): if (validate_arg (arg0, REAL_TYPE) && validate_arg (arg1, REAL_TYPE) diff --git a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c index 8f624e3b9a2..aea675ffabb 100644 --- a/gcc/c/gimple-parser.c +++ b/gcc/c/gimple-parser.c @@ -903,27 +903,6 @@ c_parser_gimple_postfix_expression (c_parser *parser) expr.value = fold_convert (type, val); return expr; } - else if (strcmp (IDENTIFIER_POINTER (id), "__FMA") == 0) - { - c_parser_consume_token (parser); - auto_vec<tree> args; - - if (c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>")) - { - c_parser_gimple_expr_list (parser, &args); - c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, - "expected %<)%>"); - } - if (args.length () != 3) - { - error_at (loc, "invalid number of operands to __FMA"); - expr.value = error_mark_node; - return expr; - } - expr.value = build3_loc (loc, FMA_EXPR, TREE_TYPE (args[0]), - args[0], args[1], args[2]); - return expr; - } /* SSA name. */ unsigned version, ver_offset; diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 2563985d484..cc2212969af 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -4179,7 +4179,6 @@ expand_debug_expr (tree exp) case SAD_EXPR: case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: - case FMA_EXPR: goto ternary; case TRUTH_ANDIF_EXPR: @@ -5167,9 +5166,6 @@ expand_debug_expr (tree exp) } return NULL; - case FMA_EXPR: - return simplify_gen_ternary (FMA, mode, inner_mode, op0, op1, op2); - default: flag_unsupported: if (flag_checking) diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index 3862fa284ae..bf8ee003419 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -4433,7 +4433,6 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, non_constant_p, overflow_p); break; - case FMA_EXPR: case VEC_PERM_EXPR: r = cxx_eval_trinary_expression (ctx, t, lval, non_constant_p, overflow_p); @@ -5811,7 +5810,6 @@ potential_constant_expression_1 (tree t, bool want_rval, bool strict, bool now, case ARRAY_NOTATION_REF: return false; - case FMA_EXPR: case VEC_PERM_EXPR: for (i = 0; i < 3; ++i) if (!RECUR (TREE_OPERAND (t, i), true)) diff --git a/gcc/expr.c b/gcc/expr.c index 0fc4ad5ce61..799c9e393cc 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8774,67 +8774,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); - case FMA_EXPR: - { - optab opt = fma_optab; - gimple *def0, *def2; - - /* If there is no insn for FMA, emit it as __builtin_fma{,f,l} - call. */ - if (optab_handler (fma_optab, mode) == CODE_FOR_nothing) - { - tree fn = mathfn_built_in (TREE_TYPE (treeop0), BUILT_IN_FMA); - tree call_expr; - - gcc_assert (fn != NULL_TREE); - call_expr = build_call_expr (fn, 3, treeop0, treeop1, treeop2); - return expand_builtin (call_expr, target, subtarget, mode, false); - } - - def0 = get_def_for_expr (treeop0, NEGATE_EXPR); - /* The multiplication is commutative - look at its 2nd operand - if the first isn't fed by a negate. */ - if (!def0) - { - def0 = get_def_for_expr (treeop1, NEGATE_EXPR); - /* Swap operands if the 2nd operand is fed by a negate. */ - if (def0) - std::swap (treeop0, treeop1); - } - def2 = get_def_for_expr (treeop2, NEGATE_EXPR); - - op0 = op2 = NULL; - - if (def0 && def2 - && optab_handler (fnms_optab, mode) != CODE_FOR_nothing) - { - opt = fnms_optab; - op0 = expand_normal (gimple_assign_rhs1 (def0)); - op2 = expand_normal (gimple_assign_rhs1 (def2)); - } - else if (def0 - && optab_handler (fnma_optab, mode) != CODE_FOR_nothing) - { - opt = fnma_optab; - op0 = expand_normal (gimple_assign_rhs1 (def0)); - } - else if (def2 - && optab_handler (fms_optab, mode) != CODE_FOR_nothing) - { - opt = fms_optab; - op2 = expand_normal (gimple_assign_rhs1 (def2)); - } - - if (op0 == NULL) - op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL); - if (op2 == NULL) - op2 = expand_normal (treeop2); - op1 = expand_normal (treeop1); - - return expand_ternary_op (TYPE_MODE (type), opt, - op0, op1, op2, target, 0); - } - case MULT_EXPR: /* If this is a fixed-point operation, then we cannot use the code below because "expand_mult" doesn't support sat/no-sat fixed-point diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c index f23b1bf28bd..5d88a356d3b 100644 --- a/gcc/fold-const-call.c +++ b/gcc/fold-const-call.c @@ -1480,6 +1480,26 @@ fold_const_call_ssss (real_value *result, combined_fn fn, CASE_CFN_FMA_FN: return do_mpfr_arg3 (result, mpfr_fma, arg0, arg1, arg2, format); + case CFN_FMS: + { + real_value new_arg2 = real_value_negate (arg2); + return do_mpfr_arg3 (result, mpfr_fma, arg0, arg1, &new_arg2, format); + } + + case CFN_FNMA: + { + real_value new_arg0 = real_value_negate (arg0); + return do_mpfr_arg3 (result, mpfr_fma, &new_arg0, arg1, arg2, format); + } + + case CFN_FNMS: + { + real_value new_arg0 = real_value_negate (arg0); + real_value new_arg2 = real_value_negate (arg2); + return do_mpfr_arg3 (result, mpfr_fma, &new_arg0, arg1, + &new_arg2, format); + } + default: return false; } @@ -1593,20 +1613,3 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) return fold_const_call_1 (fn, type, arg0, arg1, arg2); } } - -/* Fold a fma operation with arguments ARG[012]. */ - -tree -fold_fma (location_t, tree type, tree arg0, tree arg1, tree arg2) -{ - REAL_VALUE_TYPE result; - if (real_cst_p (arg0) - && real_cst_p (arg1) - && real_cst_p (arg2) - && do_mpfr_arg3 (&result, mpfr_fma, TREE_REAL_CST_PTR (arg0), - TREE_REAL_CST_PTR (arg1), TREE_REAL_CST_PTR (arg2), - REAL_MODE_FORMAT (TYPE_MODE (type)))) - return build_real (type, result); - - return NULL_TREE; -} diff --git a/gcc/fold-const-call.h b/gcc/fold-const-call.h index 7ca8a3ce6b5..eb4b7acd5a1 100644 --- a/gcc/fold-const-call.h +++ b/gcc/fold-const-call.h @@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see tree fold_const_call (combined_fn, tree, tree); tree fold_const_call (combined_fn, tree, tree, tree); tree fold_const_call (combined_fn, tree, tree, tree, tree); -tree fold_fma (location_t, tree, tree, tree, tree); tree build_cmp_result (tree type, int res); #endif diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 912411f820f..f232b5a6d2a 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -3381,7 +3381,6 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags) case TRUTH_ORIF_EXPR: return OP_SAME (0) && OP_SAME (1); - case FMA_EXPR: case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: if (!OP_SAME (2)) @@ -11767,17 +11766,6 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, return NULL_TREE; - case FMA_EXPR: - /* For integers we can decompose the FMA if possible. */ - if (TREE_CODE (arg0) == INTEGER_CST - && TREE_CODE (arg1) == INTEGER_CST) - return fold_build2_loc (loc, PLUS_EXPR, type, - const_binop (MULT_EXPR, arg0, arg1), arg2); - if (integer_zerop (arg2)) - return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1); - - return fold_fma (loc, type, arg0, arg1, arg2); - case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { diff --git a/gcc/genmatch.c b/gcc/genmatch.c index 9da911a3695..06f94ee0dc1 100644 --- a/gcc/genmatch.c +++ b/gcc/genmatch.c @@ -241,6 +241,20 @@ enum internal_fn { IFN_LAST }; +enum combined_fn { +#define DEF_BUILTIN(ENUM, N, C, T, LT, B, F, NA, AT, IM, COND) \ + CFN_##ENUM = int (ENUM), +#include "builtins.def" + +#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ + CFN_##CODE = int (END_BUILTINS) + int (IFN_##CODE), +#include "internal-fn.def" + + CFN_LAST +}; + +#include "case-cfn-macros.h" + /* Return true if CODE represents a commutative tree code. Otherwise return false. */ bool @@ -288,7 +302,6 @@ commutative_ternary_tree_code (enum tree_code code) case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: case DOT_PROD_EXPR: - case FMA_EXPR: return true; default: @@ -450,6 +463,34 @@ is_a_helper <user_id *>::test (id_base *id) return id->kind == id_base::USER; } +/* If ID has a pair of consecutive, commutative operands, return the + index of the first, otherwise return -1. */ + +static int +commutative_op (id_base *id) +{ + if (operator_id *code = dyn_cast <operator_id *> (id)) + { + if (commutative_tree_code (code->code) + || commutative_ternary_tree_code (code->code)) + return 0; + return -1; + } + if (fn_id *fn = dyn_cast <fn_id *> (id)) + switch (fn->fn) + { + CASE_CFN_FMA: + case CFN_FMS: + case CFN_FNMA: + case CFN_FNMS: + return 0; + + default: + return -1; + } + return -1; +} + /* Add a predicate identifier to the hash. */ static predicate_id * @@ -994,9 +1035,10 @@ commutate (operand *op, vec<vec<user_id *> > &for_vec) } } ne->is_commutative = false; - // result[i].length () is 2 since e->operation is binary - for (unsigned j = result[i].length (); j; --j) - ne->append_op (result[i][j-1]); + ne->append_op (result[i][1]); + ne->append_op (result[i][0]); + for (unsigned j = 2; j < result[i].length (); ++j) + ne->append_op (result[i][j]); ret.safe_push (ne); } @@ -2755,24 +2797,18 @@ dt_operand::gen_gimple_expr (FILE *f, int indent) /* While the toplevel operands are canonicalized by the caller after valueizing operands of sub-expressions we have to re-canonicalize operand order. */ - if (operator_id *code = dyn_cast <operator_id *> (id)) + int opno = commutative_op (id); + if (opno >= 0) { - /* ??? We can't canonicalize tcc_comparison operands here - because that requires changing the comparison code which - we already matched... */ - if (commutative_tree_code (code->code) - || commutative_ternary_tree_code (code->code)) - { - char child_opname0[20], child_opname1[20]; - gen_opname (child_opname0, 0); - gen_opname (child_opname1, 1); - fprintf_indent (f, indent, - "if (tree_swap_operands_p (%s, %s))\n", - child_opname0, child_opname1); - fprintf_indent (f, indent, - " std::swap (%s, %s);\n", - child_opname0, child_opname1); - } + char child_opname0[20], child_opname1[20]; + gen_opname (child_opname0, opno); + gen_opname (child_opname1, opno + 1); + fprintf_indent (f, indent, + "if (tree_swap_operands_p (%s, %s))\n", + child_opname0, child_opname1); + fprintf_indent (f, indent, + " std::swap (%s, %s);\n", + child_opname0, child_opname1); } return n_braces; @@ -4213,11 +4249,11 @@ parser::parse_expr () e->operation->id, e->operation->nargs, e->ops.length ()); if (is_commutative) { - if (e->ops.length () == 2) + if (e->ops.length () >= 2) e->is_commutative = true; else - fatal_at (token, "only binary operators or function with " - "two arguments can be marked commutative"); + fatal_at (token, "only operators or functions with " + "at least two arguments can be marked commutative"); } e->expr_type = expr_type; return op; diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 6702366b394..52c4be5f741 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -490,27 +490,6 @@ dump_ternary_rhs (pretty_printer *buffer, gassign *gs, int spc, pp_greater (buffer); break; - case FMA_EXPR: - if (flags & TDF_GIMPLE) - { - pp_string (buffer, "__FMA ("); - dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false); - pp_comma (buffer); - dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false); - pp_comma (buffer); - dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false); - pp_right_paren (buffer); - } - else - { - dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false); - pp_string (buffer, " * "); - dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false); - pp_string (buffer, " + "); - dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false); - } - break; - case DOT_PROD_EXPR: pp_string (buffer, "DOT_PROD_EXPR <"); dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false); diff --git a/gcc/gimple-ssa-backprop.c b/gcc/gimple-ssa-backprop.c index 1daa0ceef0a..16363003115 100644 --- a/gcc/gimple-ssa-backprop.c +++ b/gcc/gimple-ssa-backprop.c @@ -375,6 +375,9 @@ backprop::process_builtin_call_use (gcall *call, tree rhs, usage_info *info) CASE_CFN_FMA: CASE_CFN_FMA_FN: + case CFN_FMS: + case CFN_FNMA: + case CFN_FNMS: /* In X * X + Y, where Y is distinct from X, the sign of X doesn't matter. */ if (gimple_call_arg (call, 0) == rhs @@ -420,15 +423,6 @@ backprop::process_assign_use (gassign *assign, tree rhs, usage_info *info) } break; - case FMA_EXPR: - /* In X * X + Y, where Y is distinct from X, the sign of X doesn't - matter. */ - if (gimple_assign_rhs1 (assign) == rhs - && gimple_assign_rhs2 (assign) == rhs - && gimple_assign_rhs3 (assign) != rhs) - info->flags.ignore_sign = true; - break; - case MULT_EXPR: /* In X * X, the sign of X doesn't matter. */ if (gimple_assign_rhs1 (assign) == rhs diff --git a/gcc/gimple.c b/gcc/gimple.c index c986a732004..58499bae7f7 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2095,8 +2095,7 @@ get_gimple_rhs_num_ops (enum tree_code code) || (SYM) == REALIGN_LOAD_EXPR \ || (SYM) == VEC_COND_EXPR \ || (SYM) == VEC_PERM_EXPR \ - || (SYM) == BIT_INSERT_EXPR \ - || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \ + || (SYM) == BIT_INSERT_EXPR) ? GIMPLE_TERNARY_RHS \ : ((SYM) == CONSTRUCTOR \ || (SYM) == OBJ_TYPE_REF \ || (SYM) == ASSERT_EXPR \ diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 55b51b0c648..f312fb5e261 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -11978,7 +11978,6 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, } break; - case FMA_EXPR: case VEC_PERM_EXPR: /* Classified as tcc_expression. */ goto expr_3; diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 8e613142399..a462ae5aa11 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -3174,23 +3174,6 @@ gen_hsa_insns_for_operation_assignment (gimple *assign, hsa_bb *hbb) case NEGATE_EXPR: opcode = BRIG_OPCODE_NEG; break; - case FMA_EXPR: - /* There is a native HSA instruction for scalar FMAs but not for vector - ones. */ - if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE) - { - hsa_op_reg *dest - = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); - hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); - hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb); - hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp, op1, op2, hbb); - gen_hsa_binary_operation (BRIG_OPCODE_ADD, dest, tmp, op3, hbb); - return; - } - opcode = BRIG_OPCODE_MAD; - break; case MIN_EXPR: opcode = BRIG_OPCODE_MIN; break; @@ -4486,6 +4469,57 @@ gen_hsa_divmod (gcall *call, hsa_bb *hbb) insn->set_output_in_type (dest, 0, hbb); } +/* Emit instructions that implement FMA, FMS, FNMA or FNMS call STMT. + Instructions are appended to basic block HBB. NEGATE1 is true for + FNMA and FNMS. NEGATE3 is true for FMS and FNMS. */ + +static void +gen_hsa_fma (gcall *call, hsa_bb *hbb, bool negate1, bool negate3) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + tree rhs1 = gimple_call_arg (call, 0); + tree rhs2 = gimple_call_arg (call, 1); + tree rhs3 = gimple_call_arg (call, 2); + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); + hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb); + + if (negate1) + { + hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); + gen_hsa_unary_operation (BRIG_OPCODE_NEG, tmp, op1, hbb); + op1 = tmp; + } + + /* There is a native HSA instruction for scalar FMAs but not for vector + ones. */ + if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE) + { + hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); + gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp, op1, op2, hbb); + gen_hsa_binary_operation (negate3 ? BRIG_OPCODE_SUB : BRIG_OPCODE_ADD, + dest, tmp, op3, hbb); + } + else + { + if (negate3) + { + hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); + gen_hsa_unary_operation (BRIG_OPCODE_NEG, tmp, op3, hbb); + op3 = tmp; + } + hsa_insn_basic *insn = new hsa_insn_basic (4, BRIG_OPCODE_MAD, + dest->m_type, dest, + op1, op2, op3); + hbb->append_insn (insn); + } +} + /* Set VALUE to a shadow kernel debug argument and append a new instruction to HBB basic block. */ @@ -5220,6 +5254,22 @@ gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb) gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); break; + case IFN_FMA: + gen_hsa_fma (stmt, hbb, false, false); + break; + + case IFN_FMS: + gen_hsa_fma (stmt, hbb, false, true); + break; + + case IFN_FNMA: + gen_hsa_fma (stmt, hbb, true, false); + break; + + case IFN_FNMS: + gen_hsa_fma (stmt, hbb, true, true); + break; + default: HSA_SORRY_ATV (gimple_location (stmt), "support for HSA does not implement internal function: %s", diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 95e14429e24..d0e5919a760 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -90,6 +90,7 @@ init_internal_fns () #define scatter_store_direct { 3, 1, false } #define unary_direct { 0, 0, true } #define binary_direct { 0, 0, true } +#define ternary_direct { 0, 0, true } #define cond_unary_direct { 1, 1, true } #define cond_binary_direct { 1, 1, true } #define while_direct { 0, 2, false } @@ -2954,6 +2955,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_binary_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 2) +#define expand_ternary_optab_fn(FN, STMT, OPTAB) \ + expand_direct_optab_fn (FN, STMT, OPTAB, 3) + #define expand_cond_unary_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 2) @@ -3036,6 +3040,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_unary_optab_supported_p direct_optab_supported_p #define direct_binary_optab_supported_p direct_optab_supported_p +#define direct_ternary_optab_supported_p direct_optab_supported_p #define direct_cond_unary_optab_supported_p direct_optab_supported_p #define direct_cond_binary_optab_supported_p direct_optab_supported_p #define direct_mask_load_optab_supported_p direct_optab_supported_p diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 2db417b053b..d1f8818bb00 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see - unary: a normal unary optab, such as vec_reverse_<mode> - binary: a normal binary optab, such as vec_interleave_lo_<mode> + - ternary: a normal ternary optab, such as fma<mode>4 - cond_binary: a conditional binary optab, such as add<mode>cc @@ -147,6 +148,10 @@ DEF_INTERNAL_OPTAB_FN (VEC_REVERSE, ECF_CONST | ECF_NOTHROW, DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, vec_shl_insert, binary) +DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) +DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) +DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary) + DEF_INTERNAL_COND_OPTAB_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_OPTAB_FN (SUB, ECF_CONST, sub, binary) DEF_INTERNAL_COND_OPTAB_FN (SMIN, ECF_CONST, smin, binary) @@ -219,6 +224,9 @@ DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONST, xorsign, binary) /* FP scales. */ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) +/* Ternary math functions. */ +DEF_INTERNAL_FLT_FN (FMA, ECF_CONST, fma, ternary) + /* Unary integer ops. */ DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary) DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary) diff --git a/gcc/match.pd b/gcc/match.pd index 71f2bd46c4f..f27073994b6 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4479,3 +4479,56 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || wi::geu_p (wi::to_wide (@rpos), wi::to_wide (@ipos) + isize)) (BIT_FIELD_REF @0 @rsize @rpos))))) + +(for fmas (FMA) + (simplify + (fmas:c (negate @0) @1 @2) + (IFN_FNMA @0 @1 @2)) + (simplify + (fmas @0 @1 (negate @2)) + (IFN_FMS @0 @1 @2)) + (simplify + (fmas:c (negate @0) @1 (negate @2)) + (IFN_FNMS @0 @1 @2)) + (simplify + (negate (fmas @0 @1 @2)) + (IFN_FNMS @0 @1 @2))) + +(simplify + (IFN_FMS:c (negate @0) @1 @2) + (IFN_FNMS @0 @1 @2)) +(simplify + (IFN_FMS @0 @1 (negate @2)) + (IFN_FMA @0 @1 @2)) +(simplify + (IFN_FMS:c (negate @0) @1 (negate @2)) + (IFN_FNMA @0 @1 @2)) +(simplify + (negate (IFN_FMS @0 @1 @2)) + (IFN_FNMA @0 @1 @2)) + +(simplify + (IFN_FNMA:c (negate @0) @1 @2) + (IFN_FMA @0 @1 @2)) +(simplify + (IFN_FNMA @0 @1 (negate @2)) + (IFN_FNMS @0 @1 @2)) +(simplify + (IFN_FNMA:c (negate @0) @1 (negate @2)) + (IFN_FMS @0 @1 @2)) +(simplify + (negate (IFN_FNMA @0 @1 @2)) + (IFN_FMS @0 @1 @2)) + +(simplify + (IFN_FNMS:c (negate @0) @1 @2) + (IFN_FMS @0 @1 @2)) +(simplify + (IFN_FNMS @0 @1 (negate @2)) + (IFN_FNMA @0 @1 @2)) +(simplify + (IFN_FNMS:c (negate @0) @1 (negate @2)) + (IFN_FMA @0 @1 @2)) +(simplify + (negate (IFN_FNMS @0 @1 @2)) + (IFN_FMA @0 @1 @2)) diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index beb7d26e002..6c49288c687 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -143,9 +143,6 @@ optab_for_tree_code (enum tree_code code, const_tree type, : (TYPE_SATURATING (type) ? ssmsub_widen_optab : smsub_widen_optab)); - case FMA_EXPR: - return fma_optab; - case REDUC_MAX_EXPR: return TYPE_UNSIGNED (type) ? reduc_umax_scal_optab : reduc_smax_scal_optab; diff --git a/gcc/testsuite/gcc.dg/fma-1.c b/gcc/testsuite/gcc.dg/fma-1.c new file mode 100644 index 00000000000..f9865775ac4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-1.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return a * b + c; +} + +double +f2 (double a, double b, double c) +{ + return a * b + c; +} + +/* { dg-final { scan-tree-dump-times { = FMA \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-2.c b/gcc/testsuite/gcc.dg/fma-2.c new file mode 100644 index 00000000000..79e873ad9c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-2.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return a * b - c; +} + +double +f2 (double a, double b, double c) +{ + return a * b - c; +} + +/* { dg-final { scan-tree-dump-times { = FMS \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c new file mode 100644 index 00000000000..931cdd49a26 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-3.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return c - a * b; +} + +double +f2 (double a, double b, double c) +{ + return c - a * b; +} + +/* { dg-final { scan-tree-dump-times { = FNMA \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c new file mode 100644 index 00000000000..d80d04acc99 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-4.c @@ -0,0 +1,15 @@ +/* { dg-options "-O2 -fdump-tree-widening_mul" } */ + +float +f1 (float a, float b, float c) +{ + return -(a * b) - c; +} + +double +f2 (double a, double b, double c) +{ + return -(a * b) - c; +} + +/* { dg-final { scan-tree-dump-times { = FNMS \(} 2 "widening_mul" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-5.c b/gcc/testsuite/gcc.dg/fma-5.c new file mode 100644 index 00000000000..b8f6deeeb49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-5.c @@ -0,0 +1,53 @@ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +float +f1 (float a, float b, float c) +{ + return __builtin_fmaf (a, b, -c); +} + +double +f2 (double a, double b, double c) +{ + return __builtin_fma (a, b, -c); +} + +void +f3 (float a, float b, float c, float d, float e, float *res) +{ + res[0] = __builtin_fmaf (a, b, -e); + res[1] = __builtin_fmaf (c, d, -e); +} + +void +f4 (double a, double b, double c, double d, double e, double *res) +{ + res[0] = __builtin_fma (a, b, -e); + res[1] = __builtin_fma (c, d, -e); +} + +float +f5 (float a, float b, float c) +{ + return -__builtin_fmaf (-a, b, c); +} + +double +f6 (double a, double b, double c) +{ + return -__builtin_fma (-a, b, c); +} + +float +f7 (float a, float b, float c) +{ + return -__builtin_fmaf (a, -b, c); +} + +double +f8 (double a, double b, double c) +{ + return -__builtin_fma (a, -b, c); +} + +/* { dg-final { scan-tree-dump-times { = FMS \(} 10 "optimized" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c new file mode 100644 index 00000000000..06845725783 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-6.c @@ -0,0 +1,67 @@ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +float +f1 (float a, float b, float c) +{ + return __builtin_fmaf (-a, b, c); +} + +double +f2 (double a, double b, double c) +{ + return __builtin_fma (-a, b, c); +} + +float +f3 (float a, float b, float c) +{ + return __builtin_fmaf (a, -b, c); +} + +double +f4 (double a, double b, double c) +{ + return __builtin_fma (a, -b, c); +} + +void +f5 (float a, float b, float c, float d, float e, float *res) +{ + res[0] = __builtin_fmaf (-a, b, c); + res[1] = __builtin_fmaf (-a, d, e); +} + +void +f6 (double a, double b, double c, double d, double e, double *res) +{ + res[0] = __builtin_fma (-a, b, c); + res[1] = __builtin_fma (-a, d, e); +} + +void +f7 (float a, float b, float c, float d, float e, float *res) +{ + res[0] = __builtin_fmaf (a, -b, c); + res[1] = __builtin_fmaf (d, -b, e); +} + +void +f8 (double a, double b, double c, double d, double e, double *res) +{ + res[0] = __builtin_fma (a, -b, c); + res[1] = __builtin_fma (d, -b, e); +} + +float +f9 (float a, float b, float c) +{ + return -__builtin_fmaf (a, b, -c); +} + +double +f10 (double a, double b, double c) +{ + return -__builtin_fma (a, b, -c); +} + +/* { dg-final { scan-tree-dump-times { = FNMA \(} 14 "optimized" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c new file mode 100644 index 00000000000..97b1bbd9f63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fma-7.c @@ -0,0 +1,67 @@ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +float +f1 (float a, float b, float c) +{ + return __builtin_fmaf (-a, b, -c); +} + +double +f2 (double a, double b, double c) +{ + return __builtin_fma (-a, b, -c); +} + +float +f3 (float a, float b, float c) +{ + return __builtin_fmaf (a, -b, -c); +} + +double +f4 (double a, double b, double c) +{ + return __builtin_fma (a, -b, -c); +} + +void +f5 (float a, float b, float c, float d, float *res) +{ + res[0] = __builtin_fmaf (-a, b, -c); + res[1] = __builtin_fmaf (-a, d, -c); +} + +void +f6 (double a, double b, double c, double d, double *res) +{ + res[0] = __builtin_fma (-a, b, -c); + res[1] = __builtin_fma (-a, d, -c); +} + +void +f7 (float a, float b, float c, float d, float *res) +{ + res[0] = __builtin_fmaf (a, -b, -c); + res[1] = __builtin_fmaf (d, -b, -c); +} + +void +f8 (double a, double b, double c, double d, double *res) +{ + res[0] = __builtin_fma (a, -b, -c); + res[1] = __builtin_fma (d, -b, -c); +} + +float +f9 (float a, float b, float c) +{ + return -__builtin_fmaf (a, b, c); +} + +double +f10 (double a, double b, double c) +{ + return -__builtin_fma (a, b, c); +} + +/* { dg-final { scan-tree-dump-times { = FNMS \(} 14 "optimized" { target all_scalar_fma } } } */ diff --git a/gcc/testsuite/gcc.dg/gimplefe-26.c b/gcc/testsuite/gcc.dg/gimplefe-26.c deleted file mode 100644 index bc2f3b1d4ca..00000000000 --- a/gcc/testsuite/gcc.dg/gimplefe-26.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do compile { target c99_runtime } } */ -/* { dg-options "-O -fgimple -fdump-tree-ssa-gimple" } */ - -#define foo(type, num) \ -type __GIMPLE () foo_##num (type a, type b, type c) \ -{ \ - type t0; \ - t0_1 = __FMA (a, b, c); \ - return t0_1; \ -} - -foo(float, 1) -foo(double, 2) -foo(long double, 3) - -/* { dg-final { scan-tree-dump-times "__FMA" 3 "ssa" } } */ diff --git a/gcc/testsuite/gfortran.dg/reassoc_10.f b/gcc/testsuite/gfortran.dg/reassoc_10.f index 3720d8f414e..4a0a6c23f03 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_10.f +++ b/gcc/testsuite/gfortran.dg/reassoc_10.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Q,Dvdph) implicit none diff --git a/gcc/testsuite/gfortran.dg/reassoc_7.f b/gcc/testsuite/gfortran.dg/reassoc_7.f index 04d2e678fa6..fce93a8a8d0 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_7.f +++ b/gcc/testsuite/gfortran.dg/reassoc_7.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Dvdph) implicit none diff --git a/gcc/testsuite/gfortran.dg/reassoc_8.f b/gcc/testsuite/gfortran.dg/reassoc_8.f index a8aaa6008e8..3a169010cdd 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_8.f +++ b/gcc/testsuite/gfortran.dg/reassoc_8.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Dvdph) implicit none diff --git a/gcc/testsuite/gfortran.dg/reassoc_9.f b/gcc/testsuite/gfortran.dg/reassoc_9.f index 5d9d15fa735..5ed53db1fda 100644 --- a/gcc/testsuite/gfortran.dg/reassoc_9.f +++ b/gcc/testsuite/gfortran.dg/reassoc_9.f @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-O3 -ffast-math -fdump-tree-optimized" } +! { dg-options "-O3 -ffast-math -ffp-contract=off -fdump-tree-optimized" } SUBROUTINE S55199(P,Dvdph) implicit none diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index c6e9b70a2ab..fc656b96b6a 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2889,6 +2889,13 @@ proc check_effective_target_base_quadfloat_support { } { return 1 } +# Return 1 if the target supports all four forms of fused multiply-add +# (fma, fms, fnma, and fnms) for both float and double. + +proc check_effective_target_all_scalar_fma { } { + return [istarget aarch64*-*-*] +} + # Return 1 if the target supports compiling fixed-point, # 0 otherwise. diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index bd34a13589e..d75ea80c956 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4217,20 +4217,6 @@ verify_gimple_assign_ternary (gassign *stmt) } break; - case FMA_EXPR: - if (!useless_type_conversion_p (lhs_type, rhs1_type) - || !useless_type_conversion_p (lhs_type, rhs2_type) - || !useless_type_conversion_p (lhs_type, rhs3_type)) - { - error ("type mismatch in fused multiply-add expression"); - debug_generic_expr (lhs_type); - debug_generic_expr (rhs1_type); - debug_generic_expr (rhs2_type); - debug_generic_expr (rhs3_type); - return true; - } - break; - case VEC_COND_EXPR: if (!VECTOR_BOOLEAN_TYPE_P (rhs1_type) || may_ne (TYPE_VECTOR_SUBPARTS (rhs1_type), diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c index 855b5a4ee58..21b2fa9c959 100644 --- a/gcc/tree-eh.c +++ b/gcc/tree-eh.c @@ -2511,8 +2511,7 @@ operation_could_trap_p (enum tree_code op, bool fp_operation, bool honor_trapv, if (TREE_CODE_CLASS (op) != tcc_comparison && TREE_CODE_CLASS (op) != tcc_unary - && TREE_CODE_CLASS (op) != tcc_binary - && op != FMA_EXPR) + && TREE_CODE_CLASS (op) != tcc_binary) return false; return operation_could_trap_helper_p (op, fp_operation, honor_trapv, @@ -2738,8 +2737,7 @@ stmt_could_throw_1_p (gassign *stmt) if (TREE_CODE_CLASS (code) == tcc_comparison || TREE_CODE_CLASS (code) == tcc_unary - || TREE_CODE_CLASS (code) == tcc_binary - || code == FMA_EXPR) + || TREE_CODE_CLASS (code) == tcc_binary) { if (TREE_CODE_CLASS (code) == tcc_comparison) t = TREE_TYPE (gimple_assign_rhs1 (stmt)); diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 6a32425801e..1cc6a87343d 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3821,7 +3821,6 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights, case MINUS_EXPR: case MULT_EXPR: case MULT_HIGHPART_EXPR: - case FMA_EXPR: case ADDR_SPACE_CONVERT_EXPR: case FIXED_CONVERT_EXPR: diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index bf06eb35bb8..00a94c5f15c 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -2919,16 +2919,6 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags, pp_string (pp, " > "); break; - case FMA_EXPR: - pp_string (pp, " FMA_EXPR < "); - dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); - pp_string (pp, ", "); - dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false); - pp_string (pp, ", "); - dump_generic_node (pp, TREE_OPERAND (node, 2), spc, flags, false); - pp_string (pp, " > "); - break; - case OACC_PARALLEL: pp_string (pp, "#pragma acc parallel"); goto dump_omp_clauses_body; @@ -3596,7 +3586,6 @@ op_code_prio (enum tree_code code) case CEIL_MOD_EXPR: case FLOOR_MOD_EXPR: case ROUND_MOD_EXPR: - case FMA_EXPR: return 13; case TRUTH_NOT_EXPR: diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 223deef60bd..59393d190ef 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -493,7 +493,6 @@ stmt_cost (gimple *stmt) case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: case DOT_PROD_EXPR: - case FMA_EXPR: case TRUNC_DIV_EXPR: case CEIL_DIV_EXPR: case FLOOR_DIV_EXPR: diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 62b4a09ce59..ca2e6e2e098 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -3552,6 +3552,14 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, return true; } +/* gimple_fold callback that "valueizes" everything. */ + +static tree +aggressive_valueize (tree val) +{ + return val; +} + /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 with uses in additions and subtractions to form fused multiply-add operations. Returns true if successful and MUL_STMT should be removed. */ @@ -3562,7 +3570,7 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) tree mul_result = gimple_get_lhs (mul_stmt); tree type = TREE_TYPE (mul_result); gimple *use_stmt, *neguse_stmt; - gassign *fma_stmt; + gcall *fma_stmt; use_operand_p use_p; imm_use_iterator imm_iter; @@ -3577,7 +3585,8 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) /* If the target doesn't support it, don't generate it. We assume that if fma isn't available then fms, fnma or fnms are not either. */ - if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) + optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt)); + if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type)) return false; /* If the multiplication has zero uses, it is kept around probably because @@ -3668,8 +3677,8 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) that a mult / subtract pair. */ if (use_code == MINUS_EXPR && !negate_p && gimple_assign_rhs1 (use_stmt) == result - && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing - && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) + && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type) + && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)) { tree rhs2 = gimple_assign_rhs2 (use_stmt); @@ -3703,6 +3712,7 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) enum tree_code use_code; tree addop, mulop1 = op1, result = mul_result; bool negate_p = false; + gimple_seq seq = NULL; if (is_gimple_debug (use_stmt)) continue; @@ -3726,11 +3736,7 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) addop = gimple_assign_rhs2 (use_stmt); /* a * b - c -> a * b + (-c) */ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) - addop = force_gimple_operand_gsi (&gsi, - build1 (NEGATE_EXPR, - type, addop), - true, NULL_TREE, true, - GSI_SAME_STMT); + addop = gimple_build (&seq, NEGATE_EXPR, type, addop); } else { @@ -3741,15 +3747,18 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) } if (negate_p) - mulop1 = force_gimple_operand_gsi (&gsi, - build1 (NEGATE_EXPR, - type, mulop1), - true, NULL_TREE, true, - GSI_SAME_STMT); - - fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), - FMA_EXPR, mulop1, op2, addop); + mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1); + + if (seq) + gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); + fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop); + gimple_call_set_lhs (fma_stmt, gimple_assign_lhs (use_stmt)); + gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (use_stmt)); gsi_replace (&gsi, fma_stmt, true); + /* Valueize aggressively so that we generate FMS, FNMA and FNMS + regardless of where the negation occurs. */ + if (fold_stmt (&gsi, aggressive_valueize)) + update_stmt (gsi_stmt (gsi)); widen_mul_stats.fmas_inserted++; } diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c index ed80442031e..15f4603176d 100644 --- a/gcc/tree-ssa-operands.c +++ b/gcc/tree-ssa-operands.c @@ -849,7 +849,6 @@ get_expr_operands (struct function *fn, gimple *stmt, tree *expr_p, int flags) case REALIGN_LOAD_EXPR: case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: - case FMA_EXPR: { get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 0), flags); get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 1), flags); diff --git a/gcc/tree.c b/gcc/tree.c index ba3927b5390..7e1ccfd9cbe 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -7311,7 +7311,6 @@ commutative_ternary_tree_code (enum tree_code code) case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: case DOT_PROD_EXPR: - case FMA_EXPR: return true; default: @@ -7602,7 +7601,6 @@ add_expr (const_tree t, inchash::hash &hstate, unsigned int flags) flags &= ~OEP_ADDRESS_OF; break; - case FMA_EXPR: case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: { diff --git a/gcc/tree.def b/gcc/tree.def index ae64313d192..a87e3fd6102 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1372,12 +1372,6 @@ DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3) by the second argument. */ DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2) -/* Fused multiply-add. - All operands and the result are of the same type. No intermediate - rounding is performed after multiplying operand one with operand two - before adding operand three. */ -DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3) - /* Widening vector multiplication. The two operands are vectors with N elements of size S. Multiplying the elements of the two vectors will result in N products of size 2*S. |