diff options
author | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-07-20 16:16:53 +0000 |
---|---|---|
committer | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-07-20 16:16:53 +0000 |
commit | b58102e2496af586474635cef0981bd23d965d4a (patch) | |
tree | ea61f50fc075e518e0f6943f421bc5f9d8ee6666 /gcc | |
parent | 18540031a8b2070a56f5b7e94d6b24e8bd335c57 (diff) | |
download | gcc-b58102e2496af586474635cef0981bd23d965d4a.tar.gz |
Allow 4 operand FMAs on power7
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@176522 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 188 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/recip-3.c | 4 |
7 files changed, 181 insertions, 58 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b2f7e19c356..eb02423e54c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2011-07-20 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/vsx.md (vsx_fma*): Use 4 argument fma instructions + where we can use them from the standard and altivec instruction + sets, instead of always using the 3 operand VSX forms that require + the destination to overlap one of the inputs. + (vsx_fms*): Ditto. + (vsx_fnma*): Ditto. + (vsx_fnms*): Ditto. + + * config/rs6000/rs6000.md (fmadf4_fpr): Set fp_type fp_maddsub_d + for DF types. + (fmsdf4_fpr): Ditto. + (nfmadf4_fpr): Ditto. + (nfmsdf4_fpr): Ditto. + 2011-07-20 Sandra Loosemore <sandra@codesourcery.com> * genrecog.c (make_insn_sequence): Correct position numbering diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b34b70a27f4..288f291162b 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6288,7 +6288,7 @@ && VECTOR_UNIT_NONE_P (DFmode)" "{fma|fmadd} %0,%1,%2,%3" [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) + (set_attr "fp_type" "fp_maddsub_d")]) (define_insn "*fmsdf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") @@ -6299,7 +6299,7 @@ && VECTOR_UNIT_NONE_P (DFmode)" "{fms|fmsub} %0,%1,%2,%3" [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) + (set_attr "fp_type" "fp_maddsub_d")]) (define_insn "*nfmadf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") @@ -6310,7 +6310,7 @@ && VECTOR_UNIT_NONE_P (DFmode)" "{fnma|fnmadd} %0,%1,%2,%3" [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) + (set_attr "fp_type" "fp_maddsub_d")]) (define_insn "*nfmsdf4_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") @@ -6321,7 +6321,7 @@ && VECTOR_UNIT_NONE_P (DFmode)" "{fnms|fnmsub} %0,%1,%2,%3" [(set_attr "type" "fp") - (set_attr "fp_type" "fp_maddsub_s")]) + (set_attr "fp_type" "fp_maddsub_d")]) (define_expand "sqrtdf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index b4d1e8b7509..e859af36e11 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -524,46 +524,112 @@ [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) -;; Fused vector multiply/add instructions - -(define_insn "*vsx_fma<mode>4" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") - (fma:VSX_B - (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0") - (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))] - "VECTOR_UNIT_VSX_P (<MODE>mode)" +;; Fused vector multiply/add instructions Support the classical DF versions of +;; fma, which allows the target to be a separate register from the 3 inputs. +;; Under VSX, the target must be either the addend or the first multiply. +;; Where we can, also do the same for the Altivec V4SF fmas. + +(define_insn "*vsx_fmadf4" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") + (fma:DF + (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d") + (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") + (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))] + "VECTOR_UNIT_VSX_P (DFmode)" "@ - x<VSv>madda<VSs> %x0,%x1,%x2 - x<VSv>maddm<VSs> %x0,%x1,%x3 - x<VSv>madda<VSs> %x0,%x1,%x2 - x<VSv>maddm<VSs> %x0,%x1,%x3" - [(set_attr "type" "<VStype_mul>") - (set_attr "fp_type" "<VSfptype_mul>")]) + xsmaddadp %x0,%x1,%x2 + xsmaddmdp %x0,%x1,%x3 + xsmaddadp %x0,%x1,%x2 + xsmaddmdp %x0,%x1,%x3 + {fma|fmadd} %0,%1,%2,%3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_d")]) + +(define_insn "*vsx_fmav4sf4" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v") + (fma:V4SF + (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v") + (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v") + (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "@ + xvmaddasp %x0,%x1,%x2 + xvmaddmsp %x0,%x1,%x3 + xvmaddasp %x0,%x1,%x2 + xvmaddmsp %x0,%x1,%x3 + vmaddfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +(define_insn "*vsx_fmav2df4" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa") + (fma:V2DF + (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa") + (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0") + (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "@ + xvmaddadp %x0,%x1,%x2 + xvmaddmdp %x0,%x1,%x3 + xvmaddadp %x0,%x1,%x2 + xvmaddmdp %x0,%x1,%x3" + [(set_attr "type" "vecfloat")]) + +(define_insn "*vsx_fmsdf4" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") + (fma:DF + (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d") + (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") + (neg:DF + (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))] + "VECTOR_UNIT_VSX_P (DFmode)" + "@ + xsmsubadp %x0,%x1,%x2 + xsmsubmdp %x0,%x1,%x3 + xsmsubadp %x0,%x1,%x2 + xsmsubmdp %x0,%x1,%x3 + {fms|fmsub} %0,%1,%2,%3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_d")]) (define_insn "*vsx_fms<mode>4" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") - (fma:VSX_B - (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0") - (neg:VSX_B - (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") + (fma:VSX_F + (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0") + (neg:VSX_F + (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] "VECTOR_UNIT_VSX_P (<MODE>mode)" "@ x<VSv>msuba<VSs> %x0,%x1,%x2 x<VSv>msubm<VSs> %x0,%x1,%x3 x<VSv>msuba<VSs> %x0,%x1,%x2 x<VSv>msubm<VSs> %x0,%x1,%x3" - [(set_attr "type" "<VStype_mul>") - (set_attr "fp_type" "<VSfptype_mul>")]) + [(set_attr "type" "vecfloat")]) + +(define_insn "*vsx_nfmadf4" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") + (neg:DF + (fma:DF + (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d") + (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") + (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))] + "VECTOR_UNIT_VSX_P (DFmode)" + "@ + xsnmaddadp %x0,%x1,%x2 + xsnmaddmdp %x0,%x1,%x3 + xsnmaddadp %x0,%x1,%x2 + xsnmaddmdp %x0,%x1,%x3 + {fnma|fnmadd} %0,%1,%2,%3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_d")]) (define_insn "*vsx_nfma<mode>4" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") - (neg:VSX_B - (fma:VSX_B - (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0") - (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] + [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") + (neg:VSX_F + (fma:VSX_F + (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa") + (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0") + (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] "VECTOR_UNIT_VSX_P (<MODE>mode)" "@ x<VSv>nmadda<VSs> %x0,%x1,%x2 @@ -573,22 +639,56 @@ [(set_attr "type" "<VStype_mul>") (set_attr "fp_type" "<VSfptype_mul>")]) -(define_insn "*vsx_nfms<mode>4" - [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") - (neg:VSX_B - (fma:VSX_B - (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa") - (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0") - (neg:VSX_B - (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))] - "VECTOR_UNIT_VSX_P (<MODE>mode)" +(define_insn "*vsx_nfmsdf4" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d") + (neg:DF + (fma:DF + (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d") + (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d") + (neg:DF + (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))] + "VECTOR_UNIT_VSX_P (DFmode)" "@ - x<VSv>nmsuba<VSs> %x0,%x1,%x2 - x<VSv>nmsubm<VSs> %x0,%x1,%x3 - x<VSv>nmsuba<VSs> %x0,%x1,%x2 - x<VSv>nmsubm<VSs> %x0,%x1,%x3" - [(set_attr "type" "<VStype_mul>") - (set_attr "fp_type" "<VSfptype_mul>")]) + xsnmsubadp %x0,%x1,%x2 + xsnmsubmdp %x0,%x1,%x3 + xsnmsubadp %x0,%x1,%x2 + xsnmsubmdp %x0,%x1,%x3 + {fnms|fnmsub} %0,%1,%2,%3" + [(set_attr "type" "fp") + (set_attr "fp_type" "fp_maddsub_d")]) + +(define_insn "*vsx_nfmsv4sf4" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") + (neg:V4SF + (fma:V4SF + (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") + (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") + (neg:V4SF + (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "@ + xvnmsubasp %x0,%x1,%x2 + xvnmsubmsp %x0,%x1,%x3 + xvnmsubasp %x0,%x1,%x2 + xvnmsubmsp %x0,%x1,%x3 + vnmsubfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +(define_insn "*vsx_nfmsv2df4" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") + (neg:V2DF + (fma:V2DF + (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") + (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") + (neg:V2DF + (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "@ + xvnmsubadp %x0,%x1,%x2 + xvnmsubmdp %x0,%x1,%x3 + xvnmsubadp %x0,%x1,%x2 + xvnmsubmdp %x0,%x1,%x3" + [(set_attr "type" "vecfloat")]) ;; Vector conditional expressions (no scalar version for these instructions) (define_insn "vsx_eq<mode>" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index faa412beca0..5aa4d6b5fa4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2011-07-20 Michael Meissner <meissner@linux.vnet.ibm.com> + + * gcc.target/powerpc/ppc-fma-1.c: Adjust to allow non-VSX fmas to + be generated. + * gcc.target/powerpc/ppc-fma-2.c: Ditto. + * gcc.target/powerpc/recip-3.c: Ditto. + 2011-07-19 Jason Merrill <jason@redhat.com> PR c++/6709 (DR 743) diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c index 674115a285a..a3d532485e1 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c @@ -3,16 +3,16 @@ /* { dg-require-effective-target powerpc_vsx_ok } */ /* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math" } */ /* { dg-final { scan-assembler-times "xvmadd" 4 } } */ -/* { dg-final { scan-assembler-times "xsmadd" 2 } } */ +/* { dg-final { scan-assembler-times "xsmadd\|fmadd\ " 2 } } */ /* { dg-final { scan-assembler-times "fmadds" 2 } } */ /* { dg-final { scan-assembler-times "xvmsub" 2 } } */ -/* { dg-final { scan-assembler-times "xsmsub" 1 } } */ +/* { dg-final { scan-assembler-times "xsmsub\|fmsub\ " 1 } } */ /* { dg-final { scan-assembler-times "fmsubs" 1 } } */ /* { dg-final { scan-assembler-times "xvnmadd" 2 } } */ -/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */ +/* { dg-final { scan-assembler-times "xsnmadd\|fnmadd " 1 } } */ /* { dg-final { scan-assembler-times "fnmadds" 1 } } */ /* { dg-final { scan-assembler-times "xvnmsub" 2 } } */ -/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */ +/* { dg-final { scan-assembler-times "xsnmsub\|fnmsub " 1 } } */ /* { dg-final { scan-assembler-times "fnmsubs" 1 } } */ /* All functions should generate an appropriate (a * b) + c instruction diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c index 111b9cb098e..f732b9fa417 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c @@ -3,16 +3,16 @@ /* { dg-require-effective-target powerpc_vsx_ok } */ /* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math -ffp-contract=off" } */ /* { dg-final { scan-assembler-times "xvmadd" 2 } } */ -/* { dg-final { scan-assembler-times "xsmadd" 1 } } */ +/* { dg-final { scan-assembler-times "xsmadd\|fmadd\ " 1 } } */ /* { dg-final { scan-assembler-times "fmadds" 1 } } */ /* { dg-final { scan-assembler-times "xvmsub" 2 } } */ -/* { dg-final { scan-assembler-times "xsmsub" 1 } } */ +/* { dg-final { scan-assembler-times "xsmsub\|fmsub\ " 1 } } */ /* { dg-final { scan-assembler-times "fmsubs" 1 } } */ /* { dg-final { scan-assembler-times "xvnmadd" 2 } } */ -/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */ +/* { dg-final { scan-assembler-times "xsnmadd\|fnmadd\ " 1 } } */ /* { dg-final { scan-assembler-times "fnmadds" 1 } } */ /* { dg-final { scan-assembler-times "xvnmsub" 2 } } */ -/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */ +/* { dg-final { scan-assembler-times "xsnmsub\|fnmsub\ " 1 } } */ /* { dg-final { scan-assembler-times "fnmsubs" 1 } } */ /* Only the functions calling the bulitin should generate an appropriate (a * diff --git a/gcc/testsuite/gcc.target/powerpc/recip-3.c b/gcc/testsuite/gcc.target/powerpc/recip-3.c index c5ce539bb42..40658818047 100644 --- a/gcc/testsuite/gcc.target/powerpc/recip-3.c +++ b/gcc/testsuite/gcc.target/powerpc/recip-3.c @@ -1,9 +1,9 @@ /* { dg-do compile { target { { powerpc*-*-* } && { ! powerpc*-apple-darwin* } } } } */ /* { dg-options "-O2 -mrecip -ffast-math -mcpu=power7" } */ /* { dg-final { scan-assembler-times "xsrsqrtedp" 1 } } */ -/* { dg-final { scan-assembler-times "xsmsub.dp" 1 } } */ +/* { dg-final { scan-assembler-times "xsmsub.dp\|fmsub\ " 1 } } */ /* { dg-final { scan-assembler-times "xsmuldp" 4 } } */ -/* { dg-final { scan-assembler-times "xsnmsub.dp" 2 } } */ +/* { dg-final { scan-assembler-times "xsnmsub.dp\|fnmsub\ " 2 } } */ /* { dg-final { scan-assembler-times "frsqrtes" 1 } } */ /* { dg-final { scan-assembler-times "fmsubs" 1 } } */ /* { dg-final { scan-assembler-times "fmuls" 4 } } */ |