summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>2010-11-04 10:56:22 +0000
committerrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>2010-11-04 10:56:22 +0000
commitb9be572eda05cd23bbabd6409387d2c7ac782715 (patch)
tree5b089e25b51a9c3914b5055c84f8d83414072dab
parent2fee2038f99cb244aa1250f8ad42cae7fe2702ec (diff)
downloadgcc-b9be572eda05cd23bbabd6409387d2c7ac782715.tar.gz
2010-11-04 Richard Guenther <rguenther@suse.de>
Richard Henderson <rth@redhat.com> * tree.def (FMA_EXPR): New tree code. * expr.c (expand_expr_real_2): Add FMA_EXPR expansion code. * gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS. * tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types. * tree-inline.c (estimate_operator_cost): Handle FMA_EXPR. * gimple-pretty-print.c (dump_ternary_rhs): Likewise. * tree-ssa-math-opts.c (convert_mult_to_fma): New function. (execute_optimize_widening_mul): Call it. Reorganize to allow dead stmt removal. Move TODO flags ... (pass_optimize_widening_mul): ... here. * flag-types.h (enum fp_contract_mode): New enum. * common.opt (flag_fp_contract_mode): New variable. (-ffp-contract): New option. * opts.c (common_handle_option): Handle it. * doc/invoke.texi (-ffp-contract): Document. * tree.h (fold_fma): Declare. * builtins.c (fold_fma): New function. (fold_builtin_fma): Likewise. (fold_builtin_3): Call it for fma. * fold-const.c (fold_ternary_loc): Fold FMA_EXPR. * optabs.c (optab_for_tree_code): Handle FMA_EXPR. * config/i386/sse.md (fms<mode>4, fnma<mode>, fnms<mode>4): New expanders. * doc/md.texi (fms<mode>4, fnma<mode>, fnms<mode>4): Document new named patterns. * genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab. * optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms. (fms_optab, fnma_optab, fnms_optab): New defines. * gimplify.c (gimplify_expr): Handle binary truth expressions explicitly. Handle FMA_EXPR. * tree-vect-stmts.c (vectorizable_operation): Handle ternary operations. * gcc.target/i386/fma4-vector-2.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@166304 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog36
-rw-r--r--gcc/builtins.c39
-rw-r--r--gcc/common.opt8
-rw-r--r--gcc/config/i386/sse.md29
-rw-r--r--gcc/doc/invoke.texi14
-rw-r--r--gcc/doc/md.texi30
-rw-r--r--gcc/expr.c45
-rw-r--r--gcc/flag-types.h7
-rw-r--r--gcc/fold-const.c21
-rw-r--r--gcc/genopinit.c3
-rw-r--r--gcc/gimple-pretty-print.c8
-rw-r--r--gcc/gimple.c3
-rw-r--r--gcc/gimplify.c30
-rw-r--r--gcc/optabs.c3
-rw-r--r--gcc/optabs.h6
-rw-r--r--gcc/opts.c12
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/fma4-vector-2.c21
-rw-r--r--gcc/tree-cfg.c14
-rw-r--r--gcc/tree-inline.c1
-rw-r--r--gcc/tree-ssa-math-opts.c158
-rw-r--r--gcc/tree-vect-stmts.c66
-rw-r--r--gcc/tree.def6
-rw-r--r--gcc/tree.h1
24 files changed, 524 insertions, 42 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7c903436186..7a3cdb0535c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,39 @@
+2010-11-04 Richard Guenther <rguenther@suse.de>
+ Richard Henderson <rth@redhat.com>
+
+ * tree.def (FMA_EXPR): New tree code.
+ * expr.c (expand_expr_real_2): Add FMA_EXPR expansion code.
+ * gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS.
+ * tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types.
+ * tree-inline.c (estimate_operator_cost): Handle FMA_EXPR.
+ * gimple-pretty-print.c (dump_ternary_rhs): Likewise.
+ * tree-ssa-math-opts.c (convert_mult_to_fma): New function.
+ (execute_optimize_widening_mul): Call it. Reorganize to allow
+ dead stmt removal. Move TODO flags ...
+ (pass_optimize_widening_mul): ... here.
+ * flag-types.h (enum fp_contract_mode): New enum.
+ * common.opt (flag_fp_contract_mode): New variable.
+ (-ffp-contract): New option.
+ * opts.c (common_handle_option): Handle it.
+ * doc/invoke.texi (-ffp-contract): Document.
+ * tree.h (fold_fma): Declare.
+ * builtins.c (fold_fma): New function.
+ (fold_builtin_fma): Likewise.
+ (fold_builtin_3): Call it for fma.
+ * fold-const.c (fold_ternary_loc): Fold FMA_EXPR.
+ * optabs.c (optab_for_tree_code): Handle FMA_EXPR.
+ * config/i386/sse.md (fms<mode>4, fnma<mode>, fnms<mode>4):
+ New expanders.
+ * doc/md.texi (fms<mode>4, fnma<mode>, fnms<mode>4): Document new
+ named patterns.
+ * genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab.
+ * optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms.
+ (fms_optab, fnma_optab, fnms_optab): New defines.
+ * gimplify.c (gimplify_expr): Handle binary truth expressions
+ explicitly. Handle FMA_EXPR.
+ * tree-vect-stmts.c (vectorizable_operation): Handle ternary
+ operations.
+
2010-11-04 Artjoms Sinkarovs <artyom.shinakroff@gmail.com>
Richard Guenther <rguenther@suse.de>
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 31a869bf1f6..e193791ccc7 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -9266,6 +9266,40 @@ fold_builtin_abs (location_t loc, tree arg, tree type)
return fold_build1_loc (loc, ABS_EXPR, type, arg);
}
+/* Fold a fma operation with arguments ARG[012]. */
+
+tree
+fold_fma (location_t loc ATTRIBUTE_UNUSED,
+ tree type, tree arg0, tree arg1, tree arg2)
+{
+ if (TREE_CODE (arg0) == REAL_CST
+ && TREE_CODE (arg1) == REAL_CST
+ && TREE_CODE (arg2) == REAL_CST)
+ return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+
+ return NULL_TREE;
+}
+
+/* Fold a call to fma, fmaf, or fmal with arguments ARG[012]. */
+
+static tree
+fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
+{
+ if (validate_arg (arg0, REAL_TYPE)
+ && validate_arg(arg1, REAL_TYPE)
+ && validate_arg(arg2, REAL_TYPE))
+ {
+ tree tem = fold_fma (loc, type, arg0, arg1, arg2);
+ if (tem)
+ return tem;
+
+ /* ??? Only expand to FMA_EXPR if it's directly supported. */
+ if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
+ return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
+ }
+ return NULL_TREE;
+}
+
/* Fold a call to builtin fmin or fmax. */
static tree
@@ -10540,10 +10574,7 @@ fold_builtin_3 (location_t loc, tree fndecl,
return fold_builtin_sincos (loc, arg0, arg1, arg2);
CASE_FLT_FN (BUILT_IN_FMA):
- if (validate_arg (arg0, REAL_TYPE)
- && validate_arg(arg1, REAL_TYPE)
- && validate_arg(arg2, REAL_TYPE))
- return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+ return fold_builtin_fma (loc, arg0, arg1, arg2, type);
break;
CASE_FLT_FN (BUILT_IN_REMQUO):
diff --git a/gcc/common.opt b/gcc/common.opt
index cd8b0adfff1..551c3358f75 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -58,6 +58,10 @@ bool flag_warn_unused_result = false
Variable
int *param_values
+; Floating-point contraction mode, fast by default.
+Variable
+enum fp_contract_mode flag_fp_contract_mode = FP_CONTRACT_FAST
+
###
Driver
@@ -857,6 +861,10 @@ fforward-propagate
Common Report Var(flag_forward_propagate) Optimization
Perform a forward propagation pass on RTL
+ffp-contract=
+Common Joined RejectNegative
+-ffp-contract=[off|on|fast] Perform floating-point expression contraction.
+
; Nonzero means don't put addresses of constant functions in registers.
; Used for compiling the Unix kernel, where strange substitutions are
; done on the assembly output.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c359aed0791..717f7fe7c5b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1859,7 +1859,7 @@
;; Intrinsic FMA operations.
-;; The standard name for fma is only available with SSE math enabled.
+;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
@@ -1869,6 +1869,33 @@
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
"")
+(define_expand "fms<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand")
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
+(define_expand "fnma<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
+(define_expand "fnms<mode>4"
+ [(set (match_operand:FMAMODE 0 "register_operand")
+ (fma:FMAMODE
+ (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand")
+ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+ "")
+
;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 3aefa09c89f..fda884b76eb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -343,7 +343,7 @@ Objective-C and Objective-C++ Dialects}.
-fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol
-fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol
-ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
--fforward-propagate -ffunction-sections @gol
+-fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol
-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
-fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol
-finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol
@@ -5992,6 +5992,18 @@ loop unrolling.
This option is enabled by default at optimization levels @option{-O},
@option{-O2}, @option{-O3}, @option{-Os}.
+@item -ffp-contract=@var{style}
+@opindex ffp-contract
+@option{-ffp-contract=off} disables floating-point expression contraction.
+@option{-ffp-contract=fast} enables floating-point expression contraction
+such as forming of fused multiply-add operations if the target has
+native support for them.
+@option{-ffp-contract=on} enables floating-point expression contraction
+if allowed by the language standard. This is currently not implemented
+and treated equal to @option{-ffp-contract=off}.
+
+The default is @option{-ffp-contract=fast}.
+
@item -fomit-frame-pointer
@opindex fomit-frame-pointer
Don't keep the frame pointer in a register for functions that
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6de4f3658a6..8418564d91c 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3958,6 +3958,36 @@ pattern is used to implement the @code{fma}, @code{fmaf}, and
multiply followed by the add if the machine does not perform a
rounding step between the operations.
+@cindex @code{fms@var{m}4} instruction pattern
+@item @samp{fms@var{m}4}
+Like @code{fma@var{m}4}, except operand 3 subtracted from the
+product instead of added to the product. This is represented
+in the rtl as
+
+@smallexample
+(fma:@var{m} @var{op1} @var{op2} (neg:@var{m} @var{op3}))
+@end smallexample
+
+@cindex @code{fnma@var{m}4} instruction pattern
+@item @samp{fnma@var{m}4}
+Like @code{fma@var{m}4} except that the intermediate product
+is negated before being added to operand 3. This is represented
+in the rtl as
+
+@smallexample
+(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} @var{op3})
+@end smallexample
+
+@cindex @code{fnms@var{m}4} instruction pattern
+@item @samp{fnms@var{m}4}
+Like @code{fms@var{m}4} except that the intermediate product
+is negated before subtracting operand 3. This is represented
+in the rtl as
+
+@smallexample
+(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} (neg:@var{m} @var{op3}))
+@end smallexample
+
@cindex @code{min@var{m}3} instruction pattern
@cindex @code{max@var{m}3} instruction pattern
@item @samp{smin@var{m}3}, @samp{smax@var{m}3}
diff --git a/gcc/expr.c b/gcc/expr.c
index 56f6edaeda2..f29f6dc1244 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -7254,7 +7254,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
int ignore;
bool reduce_bit_field;
location_t loc = ops->location;
- tree treeop0, treeop1;
+ tree treeop0, treeop1, treeop2;
#define REDUCE_BIT_FIELD(expr) (reduce_bit_field \
? reduce_to_bit_field_precision ((expr), \
target, \
@@ -7267,6 +7267,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
treeop0 = ops->op0;
treeop1 = ops->op1;
+ treeop2 = ops->op2;
/* We should be called only on simple (binary or unary) expressions,
exactly those that are valid in gimple expressions that aren't
@@ -7624,7 +7625,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
- op2 = expand_normal (ops->op2);
+ op2 = expand_normal (treeop2);
target = expand_widen_pattern_expr (ops, op0, op1, op2,
target, unsignedp);
return target;
@@ -7711,6 +7712,46 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
+ case FMA_EXPR:
+ {
+ optab opt = fma_optab;
+ gimple def0, def2;
+
+ def0 = get_def_for_expr (treeop0, NEGATE_EXPR);
+ def2 = get_def_for_expr (treeop2, NEGATE_EXPR);
+
+ op0 = op2 = NULL;
+
+ if (def0 && def2
+ && optab_handler (fnms_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fnms_optab;
+ op0 = expand_normal (gimple_assign_rhs1 (def0));
+ op2 = expand_normal (gimple_assign_rhs1 (def2));
+ }
+ else if (def0
+ && optab_handler (fnma_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fnma_optab;
+ op0 = expand_normal (gimple_assign_rhs1 (def0));
+ }
+ else if (def2
+ && optab_handler (fms_optab, mode) != CODE_FOR_nothing)
+ {
+ opt = fms_optab;
+ op2 = expand_normal (gimple_assign_rhs1 (def2));
+ }
+
+ if (op0 == NULL)
+ op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL);
+ if (op2 == NULL)
+ op2 = expand_normal (treeop2);
+ op1 = expand_normal (treeop1);
+
+ return expand_ternary_op (TYPE_MODE (type), opt,
+ op0, op1, op2, target, 0);
+ }
+
case MULT_EXPR:
/* If this is a fixed-point operation, then we cannot use the code
below because "expand_mult" doesn't support sat/no-sat fixed-point
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 1a8edec3a37..4259985b02a 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -152,4 +152,11 @@ enum warn_strict_overflow_code
WARN_STRICT_OVERFLOW_MAGNITUDE = 5
};
+/* Floating-point contraction mode. */
+enum fp_contract_mode {
+ FP_CONTRACT_OFF = 0,
+ FP_CONTRACT_ON = 1,
+ FP_CONTRACT_FAST = 2
+};
+
#endif /* ! GCC_FLAG_TYPES_H */
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index da890f14c3d..b6a9814fbe3 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -13281,10 +13281,10 @@ contains_label_p (tree st)
tree
fold_ternary_loc (location_t loc, enum tree_code code, tree type,
- tree op0, tree op1, tree op2)
+ tree op0, tree op1, tree op2)
{
tree tem;
- tree arg0 = NULL_TREE, arg1 = NULL_TREE;
+ tree arg0 = NULL_TREE, arg1 = NULL_TREE, arg2 = NULL_TREE;
enum tree_code_class kind = TREE_CODE_CLASS (code);
gcc_assert (IS_EXPR_CODE_CLASS (kind)
@@ -13312,6 +13312,12 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
STRIP_NOPS (arg1);
}
+ if (op2)
+ {
+ arg2 = op2;
+ STRIP_NOPS (arg2);
+ }
+
switch (code)
{
case COMPONENT_REF:
@@ -13610,6 +13616,17 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
return NULL_TREE;
+ case FMA_EXPR:
+ /* For integers we can decompose the FMA if possible. */
+ if (TREE_CODE (arg0) == INTEGER_CST
+ && TREE_CODE (arg1) == INTEGER_CST)
+ return fold_build2_loc (loc, PLUS_EXPR, type,
+ const_binop (MULT_EXPR, arg0, arg1), arg2);
+ if (integer_zerop (arg2))
+ return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1);
+
+ return fold_fma (loc, type, arg0, arg1, arg2);
+
default:
return NULL_TREE;
} /* switch (code) */
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index 6e0a714f49e..eee9ef826da 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -160,6 +160,9 @@ static const char * const optabs[] =
"set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
"set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
"set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
+ "set_optab_handler (fms_optab, $A, CODE_FOR_$(fms$a4$))",
+ "set_optab_handler (fnma_optab, $A, CODE_FOR_$(fnma$a4$))",
+ "set_optab_handler (fnms_optab, $A, CODE_FOR_$(fnms$a4$))",
"set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
"set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
"set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index c74dd0ec7a7..057f35b9815 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -400,6 +400,14 @@ dump_ternary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags)
pp_character (buffer, '>');
break;
+ case FMA_EXPR:
+ dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+ pp_string (buffer, " * ");
+ dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+ pp_string (buffer, " + ");
+ dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+ break;
+
default:
gcc_unreachable ();
}
diff --git a/gcc/gimple.c b/gcc/gimple.c
index 1bb241a62b3..6704456c0cc 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2529,7 +2529,8 @@ get_gimple_rhs_num_ops (enum tree_code code)
|| (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \
: (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \
: ((SYM) == WIDEN_MULT_PLUS_EXPR \
- || (SYM) == WIDEN_MULT_MINUS_EXPR) ? GIMPLE_TERNARY_RHS \
+ || (SYM) == WIDEN_MULT_MINUS_EXPR \
+ || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \
: ((SYM) == COND_EXPR \
|| (SYM) == CONSTRUCTOR \
|| (SYM) == OBJ_TYPE_REF \
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 94a6689c61f..d5a633c1b5e 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7170,6 +7170,16 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
ret = gimplify_omp_atomic (expr_p, pre_p);
break;
+ case TRUTH_AND_EXPR:
+ case TRUTH_OR_EXPR:
+ case TRUTH_XOR_EXPR:
+ /* Classified as tcc_expression. */
+ goto expr_2;
+
+ case FMA_EXPR:
+ /* Classified as tcc_expression. */
+ goto expr_3;
+
case POINTER_PLUS_EXPR:
/* Convert ((type *)A)+offset into &A->field_of_type_and_offset.
The second is gimple immediate saving a need for extra statement.
@@ -7249,16 +7259,28 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
break;
}
+ expr_3:
+ {
+ enum gimplify_status r0, r1, r2;
+
+ r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+ r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
+ post_p, is_gimple_val, fb_rvalue);
+
+ ret = MIN (MIN (r0, r1), r2);
+ break;
+ }
+
case tcc_declaration:
case tcc_constant:
ret = GS_ALL_DONE;
goto dont_recalculate;
default:
- gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR
- || TREE_CODE (*expr_p) == TRUTH_OR_EXPR
- || TREE_CODE (*expr_p) == TRUTH_XOR_EXPR);
- goto expr_2;
+ gcc_unreachable ();
}
recalculate_side_effects (*expr_p);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 5d095c1a9b9..a96eea1cdc8 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -374,6 +374,9 @@ optab_for_tree_code (enum tree_code code, const_tree type,
: (TYPE_SATURATING (type)
? ssmsub_widen_optab : smsub_widen_optab));
+ case FMA_EXPR:
+ return fma_optab;
+
case REDUC_MAX_EXPR:
return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 8b9c9a730e0..c4dfa60b83d 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -192,6 +192,9 @@ enum optab_index
OTI_atan2,
/* Floating multiply/add */
OTI_fma,
+ OTI_fms,
+ OTI_fnma,
+ OTI_fnms,
/* Move instruction. */
OTI_mov,
@@ -435,6 +438,9 @@ enum optab_index
#define pow_optab (&optab_table[OTI_pow])
#define atan2_optab (&optab_table[OTI_atan2])
#define fma_optab (&optab_table[OTI_fma])
+#define fms_optab (&optab_table[OTI_fms])
+#define fnma_optab (&optab_table[OTI_fnma])
+#define fnms_optab (&optab_table[OTI_fnms])
#define mov_optab (&optab_table[OTI_mov])
#define movstrict_optab (&optab_table[OTI_movstrict])
diff --git a/gcc/opts.c b/gcc/opts.c
index ce2618e04cc..b2019c67a38 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1901,6 +1901,18 @@ common_handle_option (struct gcc_options *opts,
return false;
break;
+ case OPT_ffp_contract_:
+ if (!strcmp (arg, "on"))
+ /* Not implemented, fall back to conservative FP_CONTRACT_OFF. */
+ flag_fp_contract_mode = FP_CONTRACT_OFF;
+ else if (!strcmp (arg, "off"))
+ flag_fp_contract_mode = FP_CONTRACT_OFF;
+ else if (!strcmp (arg, "fast"))
+ flag_fp_contract_mode = FP_CONTRACT_FAST;
+ else
+ error ("unknown floating point contraction style \"%s\"", arg);
+ break;
+
case OPT_fexcess_precision_:
if (!strcmp (arg, "fast"))
flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e76da86e091..a025ce4ba94 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2010-11-04 Richard Guenther <rguenther@suse.de>
+ Richard Henderson <rth@redhat.com>
+
+ * gcc.target/i386/fma4-vector-2.c: New testcase.
+
2010-11-04 Artjoms Sinkarovs <artyom.shinakroff@gmail.com>
Richard Guenther <rguenther@suse.de>
diff --git a/gcc/testsuite/gcc.target/i386/fma4-vector-2.c b/gcc/testsuite/gcc.target/i386/fma4-vector-2.c
new file mode 100644
index 00000000000..2f3ec96dc96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma4-vector-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */
+
+float r[256], s[256];
+float x[256];
+float y[256];
+float z[256];
+
+void foo (void)
+{
+ int i;
+ for (i = 0; i < 256; ++i)
+ {
+ r[i] = x[i] * y[i] - z[i];
+ s[i] = x[i] * y[i] + z[i];
+ }
+}
+
+/* { dg-final { scan-assembler "vfmaddps" } } */
+/* { dg-final { scan-assembler "vfmsubps" } } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index e31a50d9466..3b46283e7e6 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3655,6 +3655,20 @@ verify_gimple_assign_ternary (gimple stmt)
}
break;
+ case FMA_EXPR:
+ if (!useless_type_conversion_p (lhs_type, rhs1_type)
+ || !useless_type_conversion_p (lhs_type, rhs2_type)
+ || !useless_type_conversion_p (lhs_type, rhs3_type))
+ {
+ error ("type mismatch in fused multiply-add expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+ break;
+
default:
gcc_unreachable ();
}
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index cf8a68e9f93..88806beddd3 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3283,6 +3283,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
case POINTER_PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
+ case FMA_EXPR:
case ADDR_SPACE_CONVERT_EXPR:
case FIXED_CONVERT_EXPR:
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index a814f6f0288..96140f06f63 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1494,6 +1494,123 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
return true;
}
+/* Combine the multiplication at MUL_STMT with uses in additions and
+ subtractions to form fused multiply-add operations. Returns true
+ if successful and MUL_STMT should be removed. */
+
+static bool
+convert_mult_to_fma (gimple mul_stmt)
+{
+ tree mul_result = gimple_assign_lhs (mul_stmt);
+ tree type = TREE_TYPE (mul_result);
+ gimple use_stmt, fma_stmt;
+ use_operand_p use_p;
+ imm_use_iterator imm_iter;
+
+ if (FLOAT_TYPE_P (type)
+ && flag_fp_contract_mode == FP_CONTRACT_OFF)
+ return false;
+
+ /* We don't want to do bitfield reduction ops. */
+ if (INTEGRAL_TYPE_P (type)
+ && (TYPE_PRECISION (type)
+ != GET_MODE_PRECISION (TYPE_MODE (type))))
+ return false;
+
+ /* If the target doesn't support it, don't generate it. We assume that
+ if fma isn't available then fms, fnma or fnms are not either. */
+ if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* Make sure that the multiplication statement becomes dead after
+ the transformation, thus that all uses are transformed to FMAs.
+ This means we assume that an FMA operation has the same cost
+ as an addition. */
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
+ {
+ enum tree_code use_code;
+
+ use_stmt = USE_STMT (use_p);
+
+ if (!is_gimple_assign (use_stmt))
+ return false;
+ use_code = gimple_assign_rhs_code (use_stmt);
+ /* ??? We need to handle NEGATE_EXPR to eventually form fnms. */
+ if (use_code != PLUS_EXPR
+ && use_code != MINUS_EXPR)
+ return false;
+
+ /* For now restrict this operations to single basic blocks. In theory
+ we would want to support sinking the multiplication in
+ m = a*b;
+ if ()
+ ma = m + c;
+ else
+ d = m;
+ to form a fma in the then block and sink the multiplication to the
+ else block. */
+ if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
+ return false;
+
+ /* We can't handle a * b + a * b. */
+ if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
+ return false;
+
+ /* If the target doesn't support a * b - c then drop the ball. */
+ if (gimple_assign_rhs1 (use_stmt) == mul_result
+ && use_code == MINUS_EXPR
+ && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* If the target doesn't support -a * b + c then drop the ball. */
+ if (gimple_assign_rhs2 (use_stmt) == mul_result
+ && use_code == MINUS_EXPR
+ && optab_handler (fnma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+ return false;
+
+ /* We don't yet generate -a * b - c below yet. */
+ }
+
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
+ {
+ tree addop, mulop1;
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
+
+ mulop1 = gimple_assign_rhs1 (mul_stmt);
+ if (gimple_assign_rhs1 (use_stmt) == mul_result)
+ {
+ addop = gimple_assign_rhs2 (use_stmt);
+ /* a * b - c -> a * b + (-c) */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ addop = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, addop),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+ else
+ {
+ addop = gimple_assign_rhs1 (use_stmt);
+ /* a - b * c -> (-b) * c + a */
+ if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ mulop1 = force_gimple_operand_gsi (&gsi,
+ build1 (NEGATE_EXPR,
+ type, mulop1),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ }
+
+ fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
+ gimple_assign_lhs (use_stmt),
+ mulop1,
+ gimple_assign_rhs2 (mul_stmt),
+ addop);
+ gsi_replace (&gsi, fma_stmt, true);
+ }
+
+ return true;
+}
+
/* Find integer multiplications where the operands are extended from
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
where appropriate. */
@@ -1501,31 +1618,45 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
static unsigned int
execute_optimize_widening_mul (void)
{
- bool changed = false;
basic_block bb;
FOR_EACH_BB (bb)
{
gimple_stmt_iterator gsi;
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
gimple stmt = gsi_stmt (gsi);
enum tree_code code;
- if (!is_gimple_assign (stmt))
- continue;
+ if (is_gimple_assign (stmt))
+ {
+ code = gimple_assign_rhs_code (stmt);
+ switch (code)
+ {
+ case MULT_EXPR:
+ if (!convert_mult_to_widen (stmt)
+ && convert_mult_to_fma (stmt))
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
+ }
+ break;
+
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ convert_plusminus_to_widen (&gsi, stmt, code);
+ break;
- code = gimple_assign_rhs_code (stmt);
- if (code == MULT_EXPR)
- changed |= convert_mult_to_widen (stmt);
- else if (code == PLUS_EXPR || code == MINUS_EXPR)
- changed |= convert_plusminus_to_widen (&gsi, stmt, code);
+ default:;
+ }
+ }
+ gsi_next (&gsi);
}
}
- return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
- | TODO_verify_stmts : 0);
+ return 0;
}
static bool
@@ -1549,6 +1680,9 @@ struct gimple_opt_pass pass_optimize_widening_mul =
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- 0 /* todo_flags_finish */
+ TODO_verify_ssa
+ | TODO_verify_stmts
+ | TODO_dump_func
+ | TODO_update_ssa /* todo_flags_finish */
}
};
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 4961ccbccc7..2dbc0353421 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2343,7 +2343,8 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
/* Function vectorizable_operation.
- Check if STMT performs a binary or unary operation that can be vectorized.
+ Check if STMT performs a binary, unary or ternary operation that can
+ be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
@@ -2354,7 +2355,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
{
tree vec_dest;
tree scalar_dest;
- tree op0, op1 = NULL;
+ tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -2366,7 +2367,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
int icode;
tree def;
gimple def_stmt;
- enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
+ enum vect_def_type dt[3]
+ = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
gimple new_stmt = NULL;
stmt_vec_info prev_stmt_info;
int nunits_in;
@@ -2374,8 +2376,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
tree vectype_out;
int ncopies;
int j, i;
- VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
- tree vop0, vop1;
+ VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
+ tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf;
@@ -2401,10 +2403,11 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
/* Support only unary or binary operations. */
op_type = TREE_CODE_LENGTH (code);
- if (op_type != unary_op && op_type != binary_op)
+ if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
+ fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
+ op_type);
return false;
}
@@ -2441,7 +2444,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (nunits_out != nunits_in)
return false;
- if (op_type == binary_op)
+ if (op_type == binary_op || op_type == ternary_op)
{
op1 = gimple_assign_rhs2 (stmt);
if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
@@ -2452,6 +2455,17 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
return false;
}
}
+ if (op_type == ternary_op)
+ {
+ op2 = gimple_assign_rhs3 (stmt);
+ if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
+ &dt[2]))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "use not simple.");
+ return false;
+ }
+ }
if (loop_vinfo)
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -2473,7 +2487,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|| code == RROTATE_EXPR)
return false;
- optab = optab_for_tree_code (code, vectype, optab_default);
+ optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */
if (!optab)
@@ -2534,8 +2548,10 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (!slp_node)
{
vec_oprnds0 = VEC_alloc (tree, heap, 1);
- if (op_type == binary_op)
+ if (op_type == binary_op || op_type == ternary_op)
vec_oprnds1 = VEC_alloc (tree, heap, 1);
+ if (op_type == ternary_op)
+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
}
/* In case the vectorization factor (VF) is bigger than the number
@@ -2597,22 +2613,40 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
/* Handle uses. */
if (j == 0)
{
- if (op_type == binary_op)
+ if (op_type == binary_op || op_type == ternary_op)
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
slp_node);
else
vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
slp_node);
+ if (op_type == ternary_op)
+ {
+ vec_oprnds2 = VEC_alloc (tree, heap, 1);
+ VEC_quick_push (tree, vec_oprnds2,
+ vect_get_vec_def_for_operand (op2, stmt, NULL));
+ }
}
else
- vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
+ {
+ vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
+ if (op_type == ternary_op)
+ {
+ tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
+ VEC_quick_push (tree, vec_oprnds2,
+ vect_get_vec_def_for_stmt_copy (dt[2],
+ vec_oprnd));
+ }
+ }
/* Arguments are ready. Create the new vector stmt. */
FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
{
- vop1 = ((op_type == binary_op)
- ? VEC_index (tree, vec_oprnds1, i) : NULL);
- new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
+ vop1 = ((op_type == binary_op || op_type == ternary_op)
+ ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
+ vop2 = ((op_type == ternary_op)
+ ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
+ new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
+ vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2633,6 +2667,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
VEC_free (tree, heap, vec_oprnds0);
if (vec_oprnds1)
VEC_free (tree, heap, vec_oprnds1);
+ if (vec_oprnds2)
+ VEC_free (tree, heap, vec_oprnds2);
return true;
}
diff --git a/gcc/tree.def b/gcc/tree.def
index 24729e8180d..791d699a0c5 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1092,6 +1092,12 @@ DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
is subtracted from t3. */
DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3)
+/* Fused multiply-add.
+ All operands and the result are of the same type. No intermediate
+ rounding is performed after multiplying operand one with operand two
+ before adding operand three. */
+DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
+
/* Whole vector left/right shift in bits.
Operand 0 is a vector to be shifted.
Operand 1 is an integer shift amount in bits. */
diff --git a/gcc/tree.h b/gcc/tree.h
index 2de56c78735..2392ada9897 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4954,6 +4954,7 @@ extern void fold_defer_overflow_warnings (void);
extern void fold_undefer_overflow_warnings (bool, const_gimple, int);
extern void fold_undefer_and_ignore_overflow_warnings (void);
extern bool fold_deferring_overflow_warnings_p (void);
+extern tree fold_fma (location_t, tree, tree, tree, tree);
enum operand_equal_flag
{