Support fused multiply-adds in fully-masked reductions

This patch adds support for fusing a conditional add or subtract with a multiplication, so that we can use fused multiply-add and multiply-subtract operations for fully-masked reductions. All current conditional X operations have the form "do X or don't do X to the first operand" (add/don't add to first operand, etc.). However, the FMA optabs and functions are ordered so that the accumulator comes last. There were two obvious ways of resolving this: break the convention for conditional operators and have "add/don't add to the final operand" or break the convention for FMA and put the accumulator first. The patch goes for the latter, but adds _REV to make it obvious that the operands are in a different order.
author: Richard Sandiford <richard.sandiford@linaro.org> 2017-03-30 11:38:10 +0000
committer: Richard Sandiford <richard.sandiford@linaro.org> 2017-11-20 16:01:23 +0000
commit: c9ef88a25bd1e416d31af8a7e184dc07a8e67006 (patch)
tree: 7f95dbd5b7e18e654266932975e3c781bc358e85
parent: 164804dbc17d5bf70634127b342e221cda938b6b (diff)
download: gcc-c9ef88a25bd1e416d31af8a7e184dc07a8e67006.tar.gz
11 files changed, 223 insertions, 70 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index ebf522726f4..7df6445e4ca 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1629,7 +1629,7 @@
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
 	   (match_operand:SVE_I 2 "register_operand" "0")
 	   (match_operand:SVE_I 3 "register_operand" "w")]
-	  SVE_COND_INT_OP))]
+	  SVE_COND_INT2_OP))]
   "TARGET_SVE"
   "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
@@ -2385,11 +2385,23 @@
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
 	   (match_operand:SVE_F 2 "register_operand" "0")
 	   (match_operand:SVE_F 3 "register_operand" "w")]
-	  SVE_COND_FP_OP))]
+	  SVE_COND_FP2_OP))]
   "TARGET_SVE"
   "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 )
 
+(define_insn "cond_<optab><mode>"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SVE_F 2 "register_operand" "0")
+	   (match_operand:SVE_F 3 "register_operand" "w")
+	   (match_operand:SVE_F 4 "register_operand" "w")]
+	  SVE_COND_FP3_OP))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+)
+
 (define_insn "*<optab><mode>3_cond"
   [(set (match_operand:SVE_F 0 "register_operand" "=w")
 	(sve_predicated_comm_fp_op:SVE_F
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index df5965d7fac..e48a6dd99d3 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -430,6 +430,8 @@
     UNSPEC_COND_AND	; Used in aarch64-sve.md.
     UNSPEC_COND_ORR	; Used in aarch64-sve.md.
     UNSPEC_COND_EOR	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMLA	; Used in aarch64-sve.md.
+    UNSPEC_COND_FMLS	; Used in aarch64-sve.md.
     UNSPEC_COND_LT	; Used in aarch64-sve.md.
     UNSPEC_COND_LE	; Used in aarch64-sve.md.
     UNSPEC_COND_EQ	; Used in aarch64-sve.md.
@@ -1433,14 +1435,16 @@
 
 (define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI])
 
-(define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB
-				      UNSPEC_COND_SMAX UNSPEC_COND_UMAX
-				      UNSPEC_COND_SMIN UNSPEC_COND_UMIN
-				      UNSPEC_COND_AND
-				      UNSPEC_COND_ORR
-				      UNSPEC_COND_EOR])
+(define_int_iterator SVE_COND_INT2_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB
+				       UNSPEC_COND_SMAX UNSPEC_COND_UMAX
+				       UNSPEC_COND_SMIN UNSPEC_COND_UMIN
+				       UNSPEC_COND_AND
+				       UNSPEC_COND_ORR
+				       UNSPEC_COND_EOR])
 
-(define_int_iterator SVE_COND_FP_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+(define_int_iterator SVE_COND_FP2_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+
+(define_int_iterator SVE_COND_FP3_OP [UNSPEC_COND_FMLA UNSPEC_COND_FMLS])
 
 (define_int_iterator SVE_COND_INT_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
 				       UNSPEC_COND_EQ UNSPEC_COND_NE
@@ -1483,7 +1487,9 @@
 			(UNSPEC_COND_UMIN "umin")
 			(UNSPEC_COND_AND "and")
 			(UNSPEC_COND_ORR "ior")
-			(UNSPEC_COND_EOR "xor")])
+			(UNSPEC_COND_EOR "xor")
+			(UNSPEC_COND_FMLA "fma_rev")
+			(UNSPEC_COND_FMLS "fnma_rev")])
 
 (define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
 			      (UNSPEC_UMINV "umin")
@@ -1702,4 +1708,6 @@
 			     (UNSPEC_COND_EOR "eor")])
 
 (define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd")
-			    (UNSPEC_COND_SUB "fsub")])
+			    (UNSPEC_COND_SUB "fsub")
+			    (UNSPEC_COND_FMLA "fmla")
+			    (UNSPEC_COND_FMLS "fmls")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 98cf869a09f..02248364359 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6416,6 +6416,32 @@ be in a normal C @samp{?:} condition.
 Operands 0, 2 and 3 all have mode @var{m}, while operand 1 has the mode
 returned by @code{TARGET_VECTORIZE_GET_MASK_MODE}.
 
+@cindex @code{cond_fma_rev@var{mode}} instruction pattern
+@item @samp{cond_fma_rev@var{mode}}
+Similar to @samp{cond_add@var{m}}, but compute:
+@smallexample
+op0 = op1 ? fma (op3, op4, op2) : op2;
+@end smallexample
+for scalars and:
+@smallexample
+op0[I] = op1[I] ? fma (op3[I], op4[I], op2[I]) : op2[I];
+@end smallexample
+for vectors.  The @samp{_rev} indicates that the addend (operand 2)
+comes first.
+
+@cindex @code{cond_fnma_rev@var{mode}} instruction pattern
+@item @samp{cond_fnma_rev@var{mode}}
+Similar to @samp{cond_fma_rev@var{m}}, but negate operand 3 before
+multiplying it.  That is, the instruction performs:
+@smallexample
+op0 = op1 ? fma (-op3, op4, op2) : op2;
+@end smallexample
+for scalars and:
+@smallexample
+op0[I] = op1[I] ? fma (-op3[I], op4[I], op2[I]) : op2[I];
+@end smallexample
+for vectors.
+
 @cindex @code{neg@var{mode}cc} instruction pattern
 @item @samp{neg@var{mode}cc}
 Similar to @samp{mov@var{mode}cc} but for conditional negation.  Conditionally
diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 06f94ee0dc1..ddbf4291479 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -485,6 +485,10 @@ commutative_op (id_base *id)
       case CFN_FNMS:
 	return 0;
 
+      case CFN_COND_FMA_REV:
+      case CFN_COND_FNMA_REV:
+	return 2;
+
       default:
 	return -1;
       }
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index d0e5919a760..a3c252511d0 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -93,6 +93,7 @@ init_internal_fns ()
 #define ternary_direct { 0, 0, true }
 #define cond_unary_direct { 1, 1, true }
 #define cond_binary_direct { 1, 1, true }
+#define cond_ternary_direct { 1, 1, true }
 #define while_direct { 0, 2, false }
 #define fold_extract_direct { 2, 2, false }
 #define firstfault_load_direct { -1, -1, false }
@@ -2964,6 +2965,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 #define expand_cond_binary_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 3)
 
+#define expand_cond_ternary_optab_fn(FN, STMT, OPTAB) \
+  expand_direct_optab_fn (FN, STMT, OPTAB, 4)
+
 #define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 3)
 
@@ -3043,6 +3047,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_ternary_optab_supported_p direct_optab_supported_p
 #define direct_cond_unary_optab_supported_p direct_optab_supported_p
 #define direct_cond_binary_optab_supported_p direct_optab_supported_p
+#define direct_cond_ternary_optab_supported_p direct_optab_supported_p
 #define direct_mask_load_optab_supported_p direct_optab_supported_p
 #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index d1f8818bb00..742038627ba 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -58,7 +58,8 @@ along with GCC; see the file COPYING3.  If not see
    - binary: a normal binary optab, such as vec_interleave_lo_<mode>
    - ternary: a normal ternary optab, such as fma<mode>4
 
-   - cond_binary: a conditional binary optab, such as add<mode>cc
+   - cond_binary: a conditional binary optab, such as cond_add<mode>
+   - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode>
 
    DEF_INTERNAL_COND_OPTAB_FN defines a conditional function COND_<NAME>,
    with optab cond_<OPTAB> and type cond_<TYPE>.  All these functions
@@ -152,6 +153,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
 
+DEF_INTERNAL_OPTAB_FN (COND_FMA_REV, ECF_CONST, cond_fma_rev, cond_ternary)
+DEF_INTERNAL_OPTAB_FN (COND_FNMA_REV, ECF_CONST, cond_fnma_rev, cond_ternary)
+
 DEF_INTERNAL_COND_OPTAB_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_OPTAB_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_OPTAB_FN (SMIN, ECF_CONST, smin, binary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index ee81ef7a34e..b507b2eb671 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -222,6 +222,8 @@ OPTAB_D (notcc_optab, "not$acc")
 OPTAB_D (movcc_optab, "mov$acc")
 OPTAB_D (cond_add_optab, "cond_add$a")
 OPTAB_D (cond_sub_optab, "cond_sub$a")
+OPTAB_D (cond_fma_rev_optab, "cond_fma_rev$a")
+OPTAB_D (cond_fnma_rev_optab, "cond_fnma_rev$a")
 OPTAB_D (cond_and_optab, "cond_and$a")
 OPTAB_D (cond_ior_optab, "cond_ior$a")
 OPTAB_D (cond_xor_optab, "cond_xor$a")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c
new file mode 100644
index 00000000000..9e997adedca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_4.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
+
+double
+f (double *restrict a, double *restrict b, int *lookup)
+{
+  double res = 0.0;
+  for (int i = 0; i < 512; ++i)
+    res += a[lookup[i]] * b[i];
+  return res;
+}
+
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+.d, p[0-7]/m, } 2 } } */
+/* Check that the vector instructions are the only instructions.  */
+/* { dg-final { scan-assembler-times {\tfmla\t} 2 } } */
+/* { dg-final { scan-assembler-not {\tfadd\t} } } */
+/* { dg-final { scan-assembler-times {\tfaddv\td0,} 1 } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_6.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_6.c
new file mode 100644
index 00000000000..e1f72941de4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_6.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
+
+#define REDUC(TYPE)						\
+  TYPE reduc_##TYPE (TYPE *x, TYPE *y, int count)		\
+  {								\
+    TYPE sum = 0;						\
+    for (int i = 0; i < count; ++i)				\
+      sum += x[i] * y[i];					\
+    return sum;							\
+  }
+
+REDUC (float)
+REDUC (double)
+
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_7.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_7.c
new file mode 100644
index 00000000000..851f52d2cbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_7.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
+
+#define REDUC(TYPE)						\
+  TYPE reduc_##TYPE (TYPE *x, TYPE *y, int count)		\
+  {								\
+    TYPE sum = 0;						\
+    for (int i = 0; i < count; ++i)				\
+      sum -= x[i] * y[i];					\
+    return sum;							\
+  }
+
+REDUC (float)
+REDUC (double)
+
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m} 1 } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index ca2e6e2e098..a600516ded8 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -3552,6 +3552,27 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
   return true;
 }
 
+/* If STMT is a call to IFN_COND_{ADD,SUB}, return the equivalent
+   fused multiply-add/subtract function, otherwise return IFN_LAST.  */
+
+static internal_fn
+fused_cond_internal_fn (gimple *stmt)
+{
+  gcall *call = dyn_cast <gcall *> (stmt);
+  if (!call || !gimple_call_internal_p (call))
+    return IFN_LAST;
+
+  switch (gimple_call_internal_fn (call))
+    {
+    case IFN_COND_ADD:
+      return IFN_COND_FMA_REV;
+    case IFN_COND_SUB:
+      return IFN_COND_FNMA_REV;
+    default:
+      return IFN_LAST;
+    }
+}
+
 /* gimple_fold callback that "valueizes" everything.  */
 
 static tree
@@ -3601,7 +3622,6 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
      as an addition.  */
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
     {
-      enum tree_code use_code;
       tree result = mul_result;
       bool negate_p = false;
 
@@ -3622,13 +3642,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
 	return false;
 
-      if (!is_gimple_assign (use_stmt))
-	return false;
-
-      use_code = gimple_assign_rhs_code (use_stmt);
-
       /* A negate on the multiplication leads to FNMA.  */
-      if (use_code == NEGATE_EXPR)
+      if (is_gimple_assign (use_stmt)
+	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
 	{
 	  ssa_op_iter iter;
 	  use_operand_p usep;
@@ -3650,51 +3666,60 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
 	  use_stmt = neguse_stmt;
 	  if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
 	    return false;
-	  if (!is_gimple_assign (use_stmt))
-	    return false;
 
-	  use_code = gimple_assign_rhs_code (use_stmt);
 	  negate_p = true;
 	}
 
-      switch (use_code)
-	{
-	case MINUS_EXPR:
-	  if (gimple_assign_rhs2 (use_stmt) == result)
-	    negate_p = !negate_p;
-	  break;
-	case PLUS_EXPR:
-	  break;
-	default:
-	  /* FMA can only be formed from PLUS and MINUS.  */
-	  return false;
-	}
-
-      /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
-	 by a MULT_EXPR that we'll visit later, we might be able to
-	 get a more profitable match with fnma.
-	 OTOH, if we don't, a negate / fma pair has likely lower latency
-	 that a mult / subtract pair.  */
-      if (use_code == MINUS_EXPR && !negate_p
-	  && gimple_assign_rhs1 (use_stmt) == result
-	  && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
-	  && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type))
+      if (gassign *assign = dyn_cast <gassign *> (use_stmt))
 	{
-	  tree rhs2 = gimple_assign_rhs2 (use_stmt);
-
-	  if (TREE_CODE (rhs2) == SSA_NAME)
+	  switch (gimple_assign_rhs_code (assign))
 	    {
-	      gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
-	      if (has_single_use (rhs2)
-		  && is_gimple_assign (stmt2)
-		  && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
+	    case MINUS_EXPR:
+	      if (gimple_assign_rhs2 (use_stmt) == result)
+		negate_p = !negate_p;
+	      /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is
+		 computed by a MULT_EXPR that we'll visit later, we
+		 might be able to get a more profitable match with fnma.
+		 OTOH, if we don't, a negate / fma pair has likely lower
+		 latency that a mult / subtract pair.  */
+	      else if (!negate_p
+		       && !direct_internal_fn_supported_p (IFN_FMS, type,
+							   opt_type)
+		       && direct_internal_fn_supported_p (IFN_FNMA, type,
+							  opt_type))
+		{
+		  tree rhs2 = gimple_assign_rhs2 (use_stmt);
+		  if (TREE_CODE (rhs2) == SSA_NAME)
+		    {
+		      gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
+		      if (has_single_use (rhs2)
+			  && is_gimple_assign (stmt2)
+			  && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
+			return false;
+		    }
+		}
+	      break;
+	    case PLUS_EXPR:
+	      break;
+	    default:
+	      /* FMA can only be formed from PLUS and MINUS.  */
 	      return false;
 	    }
-	}
 
-      /* We can't handle a * b + a * b.  */
-      if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
-	return false;
+	  /* We can't handle a * b + a * b.  */
+	  if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
+	    return false;
+	}
+      else
+	{
+	  internal_fn ifn = fused_cond_internal_fn (use_stmt);
+	  if (ifn == IFN_LAST)
+	    return false;
+	  if (result != gimple_call_arg (use_stmt, 2))
+	    return false;
+	  if (!direct_internal_fn_supported_p (ifn, type, opt_type))
+	    return false;
+	}
 
       /* While it is possible to validate whether or not the exact form
 	 that we've recognized is available in the backend, the assumption
@@ -3709,7 +3734,6 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
     {
       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
-      enum tree_code use_code;
       tree addop, mulop1 = op1, result = mul_result;
       bool negate_p = false;
       gimple_seq seq = NULL;
@@ -3717,8 +3741,8 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
       if (is_gimple_debug (use_stmt))
 	continue;
 
-      use_code = gimple_assign_rhs_code (use_stmt);
-      if (use_code == NEGATE_EXPR)
+      if (is_gimple_assign (use_stmt)
+	  && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
 	{
 	  result = gimple_assign_lhs (use_stmt);
 	  single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
@@ -3727,23 +3751,33 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
 
 	  use_stmt = neguse_stmt;
 	  gsi = gsi_for_stmt (use_stmt);
-	  use_code = gimple_assign_rhs_code (use_stmt);
 	  negate_p = true;
 	}
 
-      if (gimple_assign_rhs1 (use_stmt) == result)
+      internal_fn ifn;
+      if (gassign *assign = dyn_cast <gassign *> (use_stmt))
 	{
-	  addop = gimple_assign_rhs2 (use_stmt);
-	  /* a * b - c -> a * b + (-c)  */
-	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
-	    addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
+	  ifn = IFN_FMA;
+	  if (gimple_assign_rhs1 (assign) == result)
+	    {
+	      addop = gimple_assign_rhs2 (assign);
+	      /* a * b - c -> a * b + (-c)  */
+	      if (gimple_assign_rhs_code (assign) == MINUS_EXPR)
+		addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
+	    }
+	  else
+	    {
+	      addop = gimple_assign_rhs1 (assign);
+	      /* a - b * c -> (-b) * c + a */
+	      if (gimple_assign_rhs_code (assign) == MINUS_EXPR)
+		negate_p = !negate_p;
+	    }
 	}
       else
 	{
-	  addop = gimple_assign_rhs1 (use_stmt);
-	  /* a - b * c -> (-b) * c + a */
-	  if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
-	    negate_p = !negate_p;
+	  ifn = fused_cond_internal_fn (use_stmt);
+	  gcc_assert (ifn != IFN_LAST);
+	  addop = gimple_call_arg (use_stmt, 1);
 	}
 
       if (negate_p)
@@ -3751,8 +3785,14 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
 
       if (seq)
 	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
-      fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
-      gimple_call_set_lhs (fma_stmt, gimple_assign_lhs (use_stmt));
+
+      if (ifn == IFN_FMA)
+	fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
+      else
+	fma_stmt = gimple_build_call_internal (ifn, 4,
+					       gimple_call_arg (use_stmt, 0),
+					       addop, mulop1, op2);
+      gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (use_stmt));
       gsi_replace (&gsi, fma_stmt, true);
       /* Valueize aggressively so that we generate FMS, FNMA and FNMS
author	Richard Sandiford <richard.sandiford@linaro.org>	2017-03-30 11:38:10 +0000
committer	Richard Sandiford <richard.sandiford@linaro.org>	2017-11-20 16:01:23 +0000
commit	c9ef88a25bd1e416d31af8a7e184dc07a8e67006 (patch)
tree	7f95dbd5b7e18e654266932975e3c781bc358e85
parent	164804dbc17d5bf70634127b342e221cda938b6b (diff)
download	gcc-c9ef88a25bd1e416d31af8a7e184dc07a8e67006.tar.gz