diff options
Diffstat (limited to 'gcc/config/rs6000/rs6000.md')
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 479 |
1 files changed, 425 insertions, 54 deletions
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 1e65ac1cde0..010e21f7413 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -127,6 +127,13 @@ UNSPEC_LFIWZX UNSPEC_FCTIWUZ UNSPEC_GRP_END_NOP + UNSPEC_P8V_FMRGOW + UNSPEC_P8V_MTVSRWZ + UNSPEC_P8V_RELOAD_FROM_GPR + UNSPEC_P8V_MTVSRD + UNSPEC_P8V_XXPERMDI + UNSPEC_P8V_RELOAD_FROM_VSX + UNSPEC_FUSION_GPR ]) ;; @@ -146,7 +153,7 @@ ;; Define an insn type attribute. This is used in function unit delay ;; computations. -(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt" +(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto" (const_string "integer")) ;; Define floating point instruction sub-types for use with Xfpu.md @@ -166,9 +173,14 @@ (const_int 4))) ;; Processor type -- this attribute must exactly match the processor_type -;; enumeration in rs6000.h. - -(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan" +;; enumeration in rs6000-opts.h. +(define_attr "cpu" + "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630, + ppc750,ppc7400,ppc7450, + ppc403,ppc405,ppc440,ppc476, + ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500, + power4,power5,power6,power7,power8, + rs64a,mpccore,cell,ppca2,titan" (const (symbol_ref "rs6000_cpu_attr"))) @@ -201,6 +213,7 @@ (include "power5.md") (include "power6.md") (include "power7.md") +(include "power8.md") (include "cell.md") (include "xfpu.md") (include "a2.md") @@ -227,6 +240,12 @@ ; extend modes for DImode (define_mode_iterator QHSI [QI HI SI]) +; QImode or HImode for small atomic ops +(define_mode_iterator QHI [QI HI]) + +; HImode or SImode for sign extended fusion ops +(define_mode_iterator HSI [HI SI]) + ; SImode or DImode, even if DImode doesn't fit in GPRs. (define_mode_iterator SDI [SI DI]) @@ -268,6 +287,15 @@ (define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128") (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) +; Iterators for 128 bit types for direct move +(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE") + (V16QI "") + (V8HI "") + (V4SI "") + (V4SF "") + (V2DI "") + (V2DF "")]) + ; Whether a floating point move is ok, don't allow SD without hardware FP (define_mode_attr fmove_ok [(SF "") (DF "") @@ -284,11 +312,16 @@ (define_mode_attr f32_lr [(SF "f") (SD "wz")]) (define_mode_attr f32_lm [(SF "m") (SD "Z")]) (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")]) +(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")]) ; Definitions for store from 32-bit fpr register (define_mode_attr f32_sr [(SF "f") (SD "wx")]) (define_mode_attr f32_sm [(SF "m") (SD "Z")]) (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")]) +(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")]) + +; Definitions for 32-bit fpr direct move +(define_mode_attr f32_dm [(SF "wn") (SD "wm")]) ; These modes do not fit in integer registers in 32-bit mode. ; but on e500v2, the gpr are 64 bit registers @@ -368,7 +401,7 @@ (define_insn "*zero_extend<mode>di2_internal1" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)" "@ l<wd>z%U1%X1 %0,%1 rldicl %0,%1,0,<dbits>" @@ -434,6 +467,29 @@ (const_int 0)))] "") +(define_insn "*zero_extendsidi2_lfiwzx" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm") + (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWZX" + "@ + lwz%U1%X1 %0,%1 + rldicl %0,%1,0,32 + mtvsrwz %x0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_u") + (const_string "load"))) + (const_string "*") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + (define_insn "extendqidi2" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))] @@ -581,10 +637,33 @@ "TARGET_POWERPC64" "") -(define_insn "" +(define_insn "*extendsidi2_lfiwax" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm") + (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWAX" + "@ + lwa%U1%X1 %0,%1 + extsw %0,%1 + mtvsrwa %x0,%1 + lfiwax %0,%y1 + lxsiwax %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_u") + (const_string "load_ext"))) + (const_string "exts") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))] - "TARGET_POWERPC64 && rs6000_gen_cell_microcode" + "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX" "@ lwa%U1%X1 %0,%1 extsw %0,%1" @@ -598,7 +677,7 @@ (const_string "load_ext"))) (const_string "exts")])]) -(define_insn "" +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))] "TARGET_POWERPC64 && !rs6000_gen_cell_microcode" @@ -2035,7 +2114,9 @@ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))] "TARGET_CMPB && TARGET_POPCNTB" - "prty<wd> %0,%1") + "prty<wd> %0,%1" + [(set_attr "length" "4") + (set_attr "type" "popcnt")]) (define_expand "parity<mode>2" [(set (match_operand:GPR 0 "gpc_reg_operand" "") @@ -4316,7 +4397,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -4348,7 +4429,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -5104,6 +5185,41 @@ "frsqrtes %0,%1" [(set_attr "type" "fp")]) +;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in +;; builtins.c and optabs.c that are not correct for IBM long double +;; when little-endian. +(define_expand "signbittf2" + [(set (match_dup 2) + (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" ""))) + (set (match_dup 3) + (subreg:DI (match_dup 2) 0)) + (set (match_dup 4) + (match_dup 5)) + (set (match_operand:SI 0 "gpc_reg_operand" "") + (match_dup 6))] + "!TARGET_IEEEQUAD + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && TARGET_LONG_DOUBLE_128" +{ + operands[2] = gen_reg_rtx (DFmode); + operands[3] = gen_reg_rtx (DImode); + if (TARGET_POWERPC64) + { + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63)); + operands[6] = gen_rtx_SUBREG (SImode, operands[4], + WORDS_BIG_ENDIAN ? 4 : 0); + } + else + { + operands[4] = gen_reg_rtx (SImode); + operands[5] = gen_rtx_SUBREG (SImode, operands[3], + WORDS_BIG_ENDIAN ? 0 : 4); + operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31)); + } +}) + (define_expand "copysign<mode>3" [(set (match_dup 3) (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ""))) @@ -5553,12 +5669,15 @@ ; We don't define lfiwax/lfiwzx with the normal definition, because we ; don't want to support putting SImode in FPR registers. (define_insn "lfiwax" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWAX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX" - "lfiwax %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwax %0,%y1 + lxsiwax %x0,%y1 + mtvsrwa %x0,%1" + [(set_attr "type" "fpload,fpload,mffgpr")]) ; This split must be run before register allocation because it allocates the ; memory slot that is needed to move values to/from the FPR. We don't allocate @@ -5580,7 +5699,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, false); else { @@ -5629,12 +5749,15 @@ (set_attr "type" "fpload")]) (define_insn "lfiwzx" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWZX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX" - "lfiwzx %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + mtvsrwz %x0,%1" + [(set_attr "type" "fpload,fpload,mftgpr")]) (define_insn_and_split "floatunssi<mode>2_lfiwzx" [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") @@ -5651,7 +5774,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, true); else { @@ -5942,7 +6066,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -6036,7 +6160,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -8285,6 +8409,18 @@ (compare:CC (match_dup 0) (const_int 0)))] "") + +;; Eqv operation. +(define_insn "*eqv<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (not:GPR + (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r"))))] + "" + "eqv %0,%1,%2" + [(set_attr "type" "integer") + (set_attr "length" "4")]) + ;; Now define ways of moving data around. @@ -8490,7 +8626,7 @@ cmp<wd>i %2,%0,0 mr. %0,%1 #" - [(set_attr "type" "cmp,compare,cmp") + [(set_attr "type" "cmp,fast_compare,cmp") (set_attr "length" "4,4,8")]) (define_split @@ -8680,8 +8816,8 @@ }") (define_insn "mov<mode>_hardfloat" - [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r") - (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))] + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r") + (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" @@ -8694,6 +8830,10 @@ xxlxor %x0,%x0,%x0 <f32_li> <f32_si> + <f32_lv> + <f32_sv> + mtvsrwz %x0,%1 + mfvsrwz %0,%x1 mt%0 %1 mf%1 %0 nop @@ -8732,16 +8872,20 @@ (match_test "update_address_mem (operands[0], VOIDmode)") (const_string "fpstore_u") (const_string "fpstore"))) + (const_string "fpload") + (const_string "fpstore") + (const_string "mftgpr") + (const_string "mffgpr") (const_string "mtjmpr") (const_string "mfjmpr") (const_string "*") (const_string "*") (const_string "*")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")]) (define_insn "*mov<mode>_softfloat" [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h") - (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))] + (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_SOFT_FLOAT || !TARGET_FPRS)" @@ -8954,8 +9098,8 @@ ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*mov<mode>_hardfloat64" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))] "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -8980,7 +9124,9 @@ # # mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9038,8 +9184,10 @@ (const_string "*") (const_string "*") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")]) (define_insn "*mov<mode>_softfloat64" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h") @@ -9154,8 +9302,8 @@ "&& reload_completed" [(pc)] { - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word), operands[1]); emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word), @@ -9384,8 +9532,8 @@ && TARGET_LONG_DOUBLE_128" " { - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; operands[3] = gen_reg_rtx (DFmode); operands[4] = gen_reg_rtx (CCFPmode); operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); @@ -9419,6 +9567,216 @@ }) +;; Power8 merge instructions to allow direct move to/from floating point +;; registers in 32-bit mode. We use TF mode to get two registers to move the +;; individual 32-bit parts across. Subreg doesn't work too well on the TF +;; value, since it is allocated in reload and not all of the flow information +;; is setup for it. We have two patterns to do the two moves between gprs and +;; fprs. There isn't a dependancy between the two, but we could potentially +;; schedule other instructions between the two instructions. TFmode is +;; currently limited to traditional FPR registers. If/when this is changed, we +;; will need to revist %L to make sure it works with VSX registers, or add an +;; %x version of %L. + +(define_insn "p8_fmrgow_<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=d") + (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")] + UNSPEC_P8V_FMRGOW))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "fmrgow %0,%1,%L1" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_mtvsrwz_1" + [(set (match_operand:TF 0 "register_operand" "=d") + (unspec:TF [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %x0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrwz_2" + [(set (match_operand:TF 0 "register_operand" "+d") + (unspec:TF [(match_dup 0) + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_fpr_from_gpr<mode>" + [(set (match_operand:FMOVE64X 0 "register_operand" "=ws") + (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=d"))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (SImode, src); + rtx gpr_lo_reg = gen_lowpart (SImode, src); + + emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_fmrgow_<mode> (dest, tmp)); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move 128 bit values from GPRs to VSX registers in 64-bit mode +(define_insn "p8_mtvsrd_1" + [(set (match_operand:TF 0 "register_operand" "=ws") + (unspec:TF [(match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrd_2" + [(set (match_operand:TF 0 "register_operand" "+ws") + (unspec:TF [(match_dup 0) + (match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_xxpermdi_<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")] + UNSPEC_P8V_XXPERMDI))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "xxpermdi %x0,%1,%L1,0" + [(set_attr "type" "vecperm")]) + +(define_insn_and_split "reload_vsx_from_gpr<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=ws"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DImode, src); + rtx gpr_lo_reg = gen_lowpart (DImode, src); + + emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a VSX from a GPR register. Because scalar floating point +;; type is stored internally as double precision in the VSX registers, we have +;; to convert it from the vector format. + +(define_insn_and_split "reload_vsx_from_gprsf" + [(set (match_operand:SF 0 "register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:DI 2 "register_operand" "=r"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0); + rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_move_insn (op0_di, op2); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "two")]) + +;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a +;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value, +;; and then doing a move of that. +(define_insn "p8_mfvsrd_3_<mode>" + [(set (match_operand:DF 0 "register_operand" "=r") + (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_gpr_from_vsx<mode>" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DFmode, dest); + rtx gpr_lo_reg = gen_lowpart (DFmode, dest); + + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src)); + emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3))); + emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a GPR from a VSX register. Because scalar floating point +;; type is stored internally as double precision, we have to convert it to the +;; vector format. + +(define_insn_and_split "reload_gpr_from_vsxsf" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:V4SF 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0); + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2)); + emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32))); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +(define_insn "p8_mfvsrd_4_disf" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + + ;; Next come the multi-word integer load and store and the load and store ;; multiple insns. @@ -9467,7 +9825,8 @@ [(set (match_operand:DI 0 "gpc_reg_operand" "") (match_operand:DI 1 "const_int_operand" ""))] "! TARGET_POWERPC64 && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 1))] " @@ -9485,13 +9844,14 @@ [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "") (match_operand:DIFD 1 "input_operand" ""))] "reload_completed && !TARGET_POWERPC64 - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) (define_insn "*movdi_internal64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg") - (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm") + (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))] "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], DImode) || gpc_reg_operand (operands[1], DImode))" @@ -9513,7 +9873,9 @@ nop xxlxor %x0,%x0,%x0 mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9562,8 +9924,10 @@ (const_string "*") (const_string "vecsimple") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")]) + (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")]) ;; Generate all one-bits and clear left or right. ;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber. @@ -9652,19 +10016,23 @@ (const_string "conditional")))]) (define_insn "*mov<mode>_ppc64" - [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r") - (match_operand:TI2 1 "input_operand" "r,Y,r"))] - "(TARGET_POWERPC64 - && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode)) + [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r") + (match_operand:TI2 1 "input_operand" "r,Y,r,F"))] + "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)))" - "#" - [(set_attr "type" "store,load,*")]) +{ + return rs6000_output_move_128bit (operands); +} + [(set_attr "type" "store,load,*,*") + (set_attr "length" "8")]) (define_split - [(set (match_operand:TI2 0 "gpc_reg_operand" "") + [(set (match_operand:TI2 0 "int_reg_operand" "") (match_operand:TI2 1 "const_double_operand" ""))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 + && (VECTOR_MEM_NONE_P (<MODE>mode) + || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] " @@ -9691,7 +10059,9 @@ [(set (match_operand:TI2 0 "nonimmediate_operand" "") (match_operand:TI2 1 "input_operand" ""))] "reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) @@ -12554,8 +12924,8 @@ (match_dup 13)] { REAL_VALUE_TYPE rv; - const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0; - const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode); + const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0; + const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode); operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word); operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word); @@ -14788,7 +15158,7 @@ (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))] "TARGET_POPCNTD" "bpermd %0,%1,%2" - [(set_attr "type" "integer")]) + [(set_attr "type" "popcnt")]) ;; Builtin fma support. Handle @@ -14931,3 +15301,4 @@ (include "spe.md") (include "dfp.md") (include "paired.md") +(include "crypto.md") |