diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 1422 |
1 files changed, 1008 insertions, 414 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index befbfe49569..d625f586d46 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -132,7 +132,7 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4" +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8" (const (symbol_ref "ix86_cpu"))) ;; A basic instruction type. Refinements due to arguments to be @@ -142,7 +142,7 @@ alu,alu1,negnot,imov,imovx,lea, incdec,ishift,ishift1,rotate,rotate1,imul,idiv, icmp,test,ibr,setcc,icmov, - push,pop,call,callv, + push,pop,call,callv,leave, str,cld, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp, sselog,sseiadd,sseishft,sseimul, @@ -152,7 +152,7 @@ ;; Main data type used by the insn (define_attr "mode" - "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI,V4SF,V2DF,V2SF" + "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF" (const_string "unknown")) ;; The CPU unit operations uses. @@ -170,7 +170,7 @@ ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" - (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv") + (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave") (const_int 0) (eq_attr "unit" "i387,sse,mmx") (const_int 0) @@ -232,9 +232,24 @@ (const_int 1) (const_int 0))) +;; Set when 0f opcode prefix is used. +(define_attr "prefix_rex" "" + (cond [(and (eq_attr "mode" "DI") + (eq_attr "type" "!push,pop,call,callv,leave,ibr")) + (const_int 1) + (and (eq_attr "mode" "QI") + (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)") + (const_int 0))) + (const_int 1) + (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)") + (const_int 0)) + (const_int 1) + ] + (const_int 0))) + ;; Set when modrm byte is used. (define_attr "modrm" "" - (cond [(eq_attr "type" "str,cld") + (cond [(eq_attr "type" "str,cld,leave") (const_int 0) (eq_attr "unit" "i387") (const_int 0) @@ -273,7 +288,8 @@ (attr "length_address")))] (plus (plus (attr "modrm") (plus (attr "prefix_0f") - (const_int 1))) + (plus (attr "prefix_rex") + (const_int 1)))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") @@ -288,7 +304,7 @@ (const_string "unknown") (eq_attr "type" "lea,fcmov,fpspc,cld") (const_string "none") - (eq_attr "type" "fistp") + (eq_attr "type" "fistp,leave") (const_string "both") (eq_attr "type" "push") (if_then_else (match_operand 1 "memory_operand" "") @@ -754,7 +770,13 @@ return "ftst\;fnstsw\t%0"; } [(set_attr "type" "multi") - (set_attr "mode" "unknownfp")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) ;; We may not use "#" to split and emit these, since the REG_DEAD notes ;; used to manage the reg stack popping would not be preserved. @@ -857,7 +879,13 @@ && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 0, 1);" [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) (define_insn "*cmpfp_2u_1" [(set (match_operand:HI 0 "register_operand" "=a") @@ -871,7 +899,13 @@ && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 2, 1);" [(set_attr "type" "multi") - (set_attr "mode" "unknownfp")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) ;; Patterns to match the SImode-in-memory ficom instructions. ;; @@ -911,7 +945,7 @@ ;; FP compares, step 2 ;; Move the fpsw to ax. -(define_insn "x86_fnstsw_1" +(define_insn "*x86_fnstsw_1" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(reg 18)] UNSPEC_FNSTSW))] "TARGET_80387" @@ -946,7 +980,13 @@ && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_i_sse" @@ -958,7 +998,10 @@ && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "fcmp,ssecmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_i_sse_only" @@ -969,7 +1012,10 @@ && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "ssecmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_iu" @@ -982,7 +1028,13 @@ && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_iu_sse" @@ -994,7 +1046,10 @@ && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" [(set_attr "type" "fcmp,ssecmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_iu_sse_only" @@ -1005,7 +1060,10 @@ && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" [(set_attr "type" "ssecmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) ;; Move instructions. @@ -2011,22 +2069,11 @@ { switch (which_alternative) { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (4); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - case 1: return "push{l}\t%1"; - case 2: - return "#"; default: + /* This insn should be already splitted before reg-stack. */ abort (); } } @@ -2040,23 +2087,11 @@ { switch (which_alternative) { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - case 1: return "push{q}\t%q1"; - case 2: - return "#"; - default: + /* This insn should be already splitted before reg-stack. */ abort (); } } @@ -2104,7 +2139,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2130,12 +2170,12 @@ case 4: return "mov{l}\t{%1, %0|%0, %1}"; case 5: - if (TARGET_SSE2 && !TARGET_ATHLON) + if (get_attr_mode (insn) == MODE_TI) return "pxor\t%0, %0"; else return "xorps\t%0, %0"; case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) + if (get_attr_mode (insn) == MODE_V4SF) return "movaps\t{%1, %0|%0, %1}"; else return "movss\t{%1, %0|%0, %1}"; @@ -2155,7 +2195,40 @@ } } [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") - (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) (define_insn "*swapsf" [(set (match_operand:SF 0 "register_operand" "+f") @@ -2188,26 +2261,8 @@ (match_operand:DF 1 "general_no_elim_operand" "f#Y,Fo#fY,*r#fY,Y#f"))] "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - case 3: - return "#"; - - default: - abort (); - } + /* This insn should be already splitted before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "DF,SI,SI,DF")]) @@ -2217,32 +2272,8 @@ (match_operand:DF 1 "general_no_elim_operand" "f#rY,rFo#fY,Y#rf"))] "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (TARGET_64BIT) - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - else - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - - case 1: - case 2: - return "#"; - - default: - abort (); - } + /* This insn should be already splitted before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "DF,SI,DF")]) @@ -2279,7 +2310,7 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m") (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES) + && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2290,7 +2321,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2316,31 +2352,84 @@ case 4: return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: - return "movsd\t{%1, %0|%0, %1}"; + return "movsd\t{%1, %0|%0, %1}"; default: abort(); } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For achitectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && !optimize_size && TARGET_INTEGER_DFMODE_MOVES + && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2351,7 +2440,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2378,16 +2472,34 @@ return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: return "movsd\t{%1, %0|%0, %1}"; @@ -2396,7 +2508,42 @@ } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For achitectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_split [(set (match_operand:DF 0 "nonimmediate_operand" "") @@ -2451,25 +2598,8 @@ (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] "!TARGET_64BIT && optimize_size" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (12); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - return "#"; - - default: - abort (); - } + /* This insn should be already splitted before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "XF,SI,SI")]) @@ -2479,25 +2609,8 @@ (match_operand:TF 1 "general_no_elim_operand" "f,Fo,*r"))] "optimize_size" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (16); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - return "#"; - - default: - abort (); - } + /* This insn should be already splitted before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "XF,SI,SI")]) @@ -2507,24 +2620,8 @@ (match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))] "!TARGET_64BIT && !optimize_size" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (12); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - return "#"; - - default: - abort (); - } + /* This insn should be already splitted before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "XF,SI")]) @@ -2534,30 +2631,8 @@ (match_operand:TF 1 "general_no_elim_operand" "f#r,rFo#f"))] "!optimize_size" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (16); - if (TARGET_64BIT) - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - else - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - return "#"; - - default: - abort (); - } + /* This insn should be already splitted before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "XF,SI")]) @@ -2610,7 +2685,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2657,7 +2737,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2704,7 +2789,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2751,7 +2841,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -3699,11 +3794,11 @@ (set_attr "mode" "SF,SF,SF,SF")]) (define_insn "*truncdfsf2_1_sse" - [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,Y") + [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m#fxr,?f#xr,?r#fx,?x#fr,Y#fr") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) + (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] - "TARGET_80387 && TARGET_SSE2" + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" { switch (which_alternative) { @@ -3713,7 +3808,30 @@ else return "fst%z0\t%y0"; case 4: - return "cvtsd2ss\t{%1, %0|%0, %1}"; + return "#"; + default: + abort (); + } +} + [(set_attr "type" "fmov,multi,multi,multi,ssecvt") + (set_attr "mode" "SF,SF,SF,SF,DF")]) + +(define_insn "*truncdfsf2_1_sse_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" +{ + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 4: + return "#"; default: abort (); } @@ -3724,8 +3842,8 @@ (define_insn "*truncdfsf2_2" [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] - "TARGET_80387 && TARGET_SSE2 + (match_operand:DF 1 "nonimmediate_operand" "mY,f#Y")))] + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) @@ -3744,7 +3862,30 @@ [(set_attr "type" "ssecvt,fmov") (set_attr "mode" "DF,SF")]) -(define_insn "truncdfsf2_3" +(define_insn "*truncdfsf2_2_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return "#"; + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort (); + } +} + [(set_attr "type" "ssecvt,fmov") + (set_attr "mode" "DF,SF")]) + +(define_insn "*truncdfsf2_3" [(set (match_operand:SF 0 "memory_operand" "=m") (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] @@ -3762,11 +3903,20 @@ [(set (match_operand:SF 0 "register_operand" "=Y") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "mY")))] - "!TARGET_80387 && TARGET_SSE2" + "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" "cvtsd2ss\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) +(define_insn "*truncdfsf2_sse_only_nooverlap" + [(set (match_operand:SF 0 "register_operand" "=&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY")))] + "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" + "#" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + (define_split [(set (match_operand:SF 0 "memory_operand" "") (float_truncate:SF @@ -3776,15 +3926,56 @@ [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] "") +; Avoid possible reformating penalty on the destination by first +; zeroing it out (define_split - [(set (match_operand:SF 0 "nonimmediate_operand" "") + [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" ""))) (clobber (match_operand 2 "" ""))] "TARGET_80387 && reload_completed - && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") + && SSE_REG_P (operands[0]) + && !STACK_REG_P (operands[1])" + [(const_int 0)] +{ + rtx src, dest; + if (!TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS) + emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1])); + else + { + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); + } + DONE; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed + && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" + [(const_int 0)] +{ + rtx src, dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); + DONE; +}) (define_split [(set (match_operand:SF 0 "register_operand" "") @@ -4468,7 +4659,7 @@ "") (define_insn "*floatsisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x") + [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f") (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" "@ @@ -4488,6 +4679,22 @@ (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) +; Avoid possible reformating penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ss (dest, dest, operands[1])); + DONE; +}) + (define_expand "floatdisf2" [(set (match_operand:SF 0 "register_operand" "") (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] @@ -4506,7 +4713,7 @@ (set_attr "fp_int_src" "true")]) (define_insn "*floatdisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x") + [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f") (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" "@ @@ -4526,6 +4733,22 @@ (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) +; Avoid possible reformating penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ssq (dest, dest, operands[1])); + DONE; +}) + (define_insn "floathidf2" [(set (match_operand:DF 0 "register_operand" "=f,f") (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] @@ -4544,7 +4767,7 @@ "") (define_insn "*floatsidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") + [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f") (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" "@ @@ -4582,7 +4805,7 @@ (set_attr "fp_int_src" "true")]) (define_insn "*floatdidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") + [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f") (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" "@ @@ -9269,12 +9492,15 @@ in register. */ rtx reg = gen_reg_rtx (SFmode); rtx dest = operands[0]; + rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode)); operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9293,7 +9519,7 @@ (define_insn "negsf2_ifs" [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x")) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed @@ -9314,7 +9540,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9324,13 +9550,15 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (xor:TI (subreg:TI (match_dup 1) 0) - (subreg:TI (match_dup 2) 0)))] + (xor:TI (match_dup 1) + (match_dup 2)))] { + operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0); if (operands_match_p (operands[0], operands[2])) { rtx tmp; @@ -9403,7 +9631,7 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg; #if HOST_BITS_PER_WIDE_INT >= 64 rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else @@ -9413,7 +9641,10 @@ operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9432,7 +9663,7 @@ (define_insn "negdf2_ifs" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf") (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y")) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9442,8 +9673,8 @@ (define_insn "*negdf2_ifs_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0"))) - (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9454,7 +9685,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (neg:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9464,7 +9695,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0]) && (!TARGET_64BIT || FP_REG_P (operands[0]))" @@ -9475,7 +9706,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9488,13 +9719,19 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (xor:TI (subreg:TI (match_dup 1) 0) - (subreg:TI (match_dup 2) 0)))] + (xor:TI (match_dup 1) + (match_dup 2)))] { + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0); + /* Avoid possible reformating on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); if (operands_match_p (operands[0], operands[2])) { rtx tmp; @@ -9727,14 +9964,18 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (SFmode); + rtx reg = gen_reg_rtx (V4SFmode); rtx dest = operands[0]; + rtx imm; operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode)); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9751,20 +9992,20 @@ "#") (define_insn "abssf2_ifs" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x")) + [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") + (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" + && register_operand (operands[1], VOIDmode)))" "#") (define_split [(set (match_operand:SF 0 "memory_operand" "") (abs:SF (match_operand:SF 1 "memory_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9774,7 +10015,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9784,12 +10025,23 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (match_dup 1) + (match_dup 2)))] +{ + operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0); + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9852,17 +10104,22 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg = gen_reg_rtx (V2DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); + rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode); #else - rtx imm = immed_double_const (0, 0x80000000, DImode); + rtx imm = immed_double_const (~0, ~0x80000000, DImode); #endif rtx dest = operands[0]; operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + + /* Produce LONG_DOUBLE with the proper immediate argument. */ + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9879,9 +10136,9 @@ "#") (define_insn "absdf2_ifs" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9890,9 +10147,9 @@ "#") (define_insn "*absdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9903,7 +10160,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (abs:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9913,7 +10170,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9923,12 +10180,27 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (match_dup 1) + (match_dup 2)))] +{ + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0); + /* Avoid possible reformating on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems @@ -13367,7 +13639,7 @@ (match_operand:SI 3 "" "")))])] "!TARGET_64BIT" { - ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3]); + ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0); DONE; }) @@ -13412,7 +13684,17 @@ (use (match_operand 2 "" ""))] "" { - ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL); + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0); + DONE; +}) + +(define_expand "sibcall" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1); DONE; }) @@ -13431,41 +13713,51 @@ (define_insn "*call_1" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) (match_operand 1 "" ""))] - "!TARGET_64BIT" + "!SIBLING_CALL_P (insn) && !TARGET_64BIT" { if (constant_call_address_operand (operands[0], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A0"; - else - return "call\t%A0"; + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,c,d,a")) + (match_operand 1 "" ""))] + "SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], QImode)) + return "jmp\t%P0"; + return "jmp\t%A0"; } [(set_attr "type" "call")]) (define_insn "*call_1_rex64" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) (match_operand 1 "" ""))] - "TARGET_64BIT" + "!SIBLING_CALL_P (insn) && TARGET_64BIT" { if (constant_call_address_operand (operands[0], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A0"; - else - return "call\t%A0"; + return "call\t%P0"; + return "call\t%A0"; } [(set_attr "type" "call")]) +(define_insn "*sibcall_1_rex64" + [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" "")) + (match_operand 1 "" ""))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t%P0" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1_rex64_v" + [(call (mem:QI (reg:DI 40)) + (match_operand 0 "" ""))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t*%%r11" + [(set_attr "type" "call")]) + + ;; Call subroutine, returning value in operand 0 (define_expand "call_value_pop" @@ -13478,7 +13770,7 @@ "!TARGET_64BIT" { ix86_expand_call (operands[0], operands[1], operands[2], - operands[3], operands[4]); + operands[3], operands[4], 0); DONE; }) @@ -13490,7 +13782,19 @@ ;; Operand 2 not used on the i386. "" { - ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL); + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0); + DONE; +}) + +(define_expand "sibcall_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 2 not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1); DONE; }) @@ -13513,7 +13817,7 @@ ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1), - NULL); + NULL, 0); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -13662,11 +13966,7 @@ (clobber (mem:BLK (scratch)))] "!TARGET_64BIT" "leave" - [(set_attr "length_immediate" "0") - (set_attr "length" "1") - (set_attr "modrm" "0") - (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + [(set_attr "type" "leave")]) (define_insn "leave_rex64" [(set (reg:DI 7) (plus:DI (reg:DI 6) (const_int 8))) @@ -13674,11 +13974,7 @@ (clobber (mem:BLK (scratch)))] "TARGET_64BIT" "leave" - [(set_attr "length_immediate" "0") - (set_attr "length" "1") - (set_attr "modrm" "0") - (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + [(set_attr "type" "leave")]) (define_expand "ffssi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") @@ -14312,6 +14608,24 @@ (const_string "fop"))) (set_attr "mode" "SF")]) +(define_insn "*fop_df_6" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "register_operand" "0,f")) + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + (define_insn "*fop_xf_1" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" @@ -14421,7 +14735,7 @@ (define_insn "*fop_xf_4" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + [(float_extend:XF (match_operand 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] "!TARGET_64BIT && TARGET_80387" "* return output_387_binary_op (insn, operands);" @@ -14437,7 +14751,7 @@ (define_insn "*fop_tf_4" [(set (match_operand:TF 0 "register_operand" "=f,f") (match_operator:TF 3 "binary_fp_operator" - [(float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + [(float_extend:TF (match_operand 1 "nonimmediate_operand" "fm,0")) (match_operand:TF 2 "register_operand" "0,f")]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" @@ -14455,7 +14769,7 @@ (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] "!TARGET_64BIT && TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -14472,7 +14786,7 @@ (match_operator:TF 3 "binary_fp_operator" [(match_operand:TF 1 "register_operand" "0,f") (float_extend:TF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -14487,41 +14801,10 @@ (define_insn "*fop_xf_6" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) - (match_operand:XF 2 "register_operand" "0,f")]))] - "!TARGET_64BIT && TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_tf_6" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) - (match_operand:TF 2 "register_operand" "0,f")]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_xf_7" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") + [(float_extend:XF + (match_operand 1 "register_operand" "0,f")) (float_extend:XF - (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] "!TARGET_64BIT && TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -14531,14 +14814,15 @@ (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "DF")]) + (set_attr "mode" "SF")]) -(define_insn "*fop_tf_7" +(define_insn "*fop_tf_6" [(set (match_operand:TF 0 "register_operand" "=f,f") (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,f") + [(float_extend:TF + (match_operand 1 "register_operand" "0,f")) (float_extend:TF - (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -14548,7 +14832,7 @@ (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "DF")]) + (set_attr "mode" "SF")]) (define_split [(set (match_operand 0 "register_operand" "") @@ -15949,9 +16233,9 @@ (define_expand "movhicc" [(set (match_operand:HI 0 "register_operand" "") (if_then_else:HI (match_operand 1 "comparison_operator" "") - (match_operand:HI 2 "nonimmediate_operand" "") - (match_operand:HI 3 "nonimmediate_operand" "")))] - "TARGET_CMOVE && TARGET_HIMODE_MATH" + (match_operand:HI 2 "general_operand" "") + (match_operand:HI 3 "general_operand" "")))] + "TARGET_HIMODE_MATH" "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") (define_insn "*movhicc_noc" @@ -15968,6 +16252,33 @@ [(set_attr "type" "icmov") (set_attr "mode" "HI")]) +(define_expand "movqicc" + [(set (match_operand:QI 0 "register_operand" "") + (if_then_else:QI (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "general_operand" "") + (match_operand:QI 3 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +(define_insn_and_split "*movqicc_noc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI (match_operator 1 "ix86_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") (const_int 0)]) + (match_operand:QI 2 "register_operand" "r,0") + (match_operand:QI 3 "register_operand" "0,r")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 2) + (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + (define_expand "movsfcc" [(set (match_operand:SF 0 "register_operand" "") (if_then_else:SF (match_operand 1 "comparison_operator" "") @@ -16035,7 +16346,7 @@ (define_split [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand 4 "" "") (const_int 0)]) + [(match_operand 4 "flags_reg_operand" "") (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "") (match_operand:DF 3 "nonimmediate_operand" "")))] "!TARGET_64BIT && reload_completed" @@ -16549,6 +16860,12 @@ (clobber (reg:CC 17))] "TARGET_SSE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16576,6 +16893,12 @@ (clobber (reg:CC 17))] "TARGET_SSE2 && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16637,6 +16960,14 @@ (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0) (subreg:TI (match_dup 7) 0)))] { + if (GET_MODE (operands[2]) == DFmode + && TARGET_SSE_PARTIAL_REGS && !optimize_size) + { + rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } /* If op2 == op3, op3 will be clobbered before it is used. This should be optimized out though. */ if (operands_match_p (operands[2], operands[3])) @@ -16743,8 +17074,22 @@ || const0_operand (operands[3], GET_MODE (operands[0])))" [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)])) (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6) - (subreg:TI (match_dup 7) 0)))] + (match_dup 7)))] { + if (TARGET_SSE_PARTIAL_REGS && !optimize_size + && GET_MODE (operands[2]) == DFmode) + { + if (REG_P (operands[2])) + { + rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + if (REG_P (operands[3])) + { + rtx op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + } PUT_MODE (operands[1], GET_MODE (operands[0])); if (!sse_comparison_operator (operands[1], VOIDmode)) { @@ -16764,6 +17109,8 @@ operands[7] = operands[2]; operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0); } + operands[7] = simplify_gen_subreg (TImode, operands[7], + GET_MODE (operands[7]), 0); }) (define_expand "allocate_stack_worker" @@ -17759,19 +18106,23 @@ [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) (match_operand:SI 2 "" "")))] - "!TARGET_64BIT" + "!SIBLING_CALL_P (insn) && !TARGET_64BIT" { if (constant_call_address_operand (operands[1], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%*%1"; - else - return "call\t%*%1"; + return "call\t%P1"; + return "call\t%*%1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,c,d,a")) + (match_operand:SI 2 "" "")))] + "SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], QImode)) + return "jmp\t%P1"; + return "jmp\t%*%1"; } [(set_attr "type" "callv")]) @@ -17779,21 +18130,29 @@ [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) (match_operand:DI 2 "" "")))] - "TARGET_64BIT" + "!SIBLING_CALL_P (insn) && TARGET_64BIT" { if (constant_call_address_operand (operands[1], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A1"; - else - return "call\t%A1"; + return "call\t%P1"; + return "call\t%A1"; } [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "" "")))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t%P1" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64_v" + [(set (match_operand 0 "" "") + (call (mem:QI (reg:DI 40)) + (match_operand:DI 1 "" "")))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t*%%r11" + [(set_attr "type" "callv")]) (define_insn "trap" [(trap_if (const_int 1) (const_int 5))] @@ -17838,7 +18197,7 @@ { operands[2] = gen_label_rtx (); output_asm_insn ("j%c0\t%l2\; int\t%1", operands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (operands[2])); RET; }) @@ -17851,28 +18210,93 @@ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) +(define_split + [(set (match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); + operands[2] = CONST0_RTX (V4SFmode); +}) + (define_insn "movv4si_internal" [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv2di_internal" [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movdqa\t{%1, %0|%0, %1}" + "TARGET_SSE2" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) + +(define_split + [(set (match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE2" + [(set (match_dup 0) + (vec_merge:V2DF + (vec_duplicate:V2DF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); + operands[2] = CONST0_RTX (V2DFmode); +}) (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") @@ -17922,28 +18346,85 @@ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movapd\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movapd\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "V2DF")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "V2DF"))] + (const_string "V2DF")))]) (define_insn "movv8hi_internal" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv16qi_internal" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_expand "movv2df" [(set (match_operand:V2DF 0 "general_operand" "") @@ -18160,26 +18641,83 @@ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") (match_operand:TI 1 "general_operand" "C,xm,x"))] "TARGET_SSE && !TARGET_64BIT" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,O,xm,x"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - # - # - xorps\t%0, %0 - movaps\\t{%1, %0|%0, %1} - movaps\\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -18329,11 +18867,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" @@ -18856,12 +19404,26 @@ ;; this insn. (define_insn "sse_clrv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(const_int 0)] UNSPEC_NOP))] + (match_operand:V4SF 1 "const0_operand" "X"))] "TARGET_SSE" - "xorps\t{%0, %0|%0, %0}" +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t{%0, %0|%0, %0}"; + else + return "xorps\t{%0, %0|%0, %0}"; +} [(set_attr "type" "sselog") (set_attr "memory" "none") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")))]) ;; Use xor, but don't show input operands so they aren't live before ;; this insn. @@ -19093,6 +19655,18 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -19883,7 +20457,7 @@ output_asm_insn (\"rex\", operands); output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); } - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (operands[3])); RET; } @@ -20720,7 +21294,7 @@ (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") (vec_duplicate:V4SF (float_truncate:V2SF - (match_operand:V2DF 2 "register_operand" "xm"))) + (match_operand:V2DF 2 "nonimmediate_operand" "xm"))) (const_int 14)))] "TARGET_SSE2" "cvtsd2ss\t{%2, %0|%0, %2}" @@ -20732,7 +21306,7 @@ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") (float_extend:V2DF (vec_select:V2SF - (match_operand:V4SF 2 "register_operand" "xm") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))) (const_int 2)))] @@ -21008,10 +21582,20 @@ (define_insn "sse2_clrti" [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] "TARGET_SSE2" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "sseiadd") +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t%0, %0"; + else + return "xorps\t%0, %0"; +} + [(set_attr "type" "ssemov") (set_attr "memory" "none") - (set_attr "mode" "TI")]) + (set (attr "mode") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")))]) ;; MMX unsigned averages/sum of absolute differences @@ -21716,11 +22300,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) -(define_insn "sse2_loadsd" +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF - (match_operand:DF 1 "memory_operand" "m") - (vec_duplicate:DF (float:DF (const_int 0))) + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE2" "movsd\t{%1, %0|%0, %1}" |