summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2000-04-17 21:39:30 +0000
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2000-04-17 21:39:30 +0000
commitd8fbfe3435cf181049488c0bf3677e98a2022e51 (patch)
treef968a9f00f97ceb6019eb8ab06dc60d7581752dc /gcc
parent124d766df38802d862e5580e8395953c6c19ec80 (diff)
downloadgcc-d8fbfe3435cf181049488c0bf3677e98a2022e51.tar.gz
* i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
(x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall): New global variables. (ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only alloved; fix load penalties for Athlon. * i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall): Declare. (TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY, TARGET_MEMORY_MISMATCH_STALL): New. * i386.md (athlon scheduling parameters): Fix latencies according to Athlon Optimization Manual. (sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to vector. (fsqrt instruction patterns): Set athlon_decode to direct. (movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines. (movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY and TARGET_PARTIAL_REGISTER_STALL machines. (pushdf_nointeger): New pattern. (pushdf_integer): Rename from pushdf. (movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines. (movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@33215 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog25
-rw-r--r--gcc/config/i386/i386.c38
-rw-r--r--gcc/config/i386/i386.h6
-rw-r--r--gcc/config/i386/i386.md233
4 files changed, 236 insertions, 66 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9fcbff80587..8d11d84a3c3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+Mon Apr 17 23:35:29 MET DST 2000 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
+ (x86_integer_DFmode_moves, x86_partial_reg_dependency,
+ x86_memory_mismatch_stall): New global variables.
+ (ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only
+ alloved; fix load penalties for Athlon.
+ * i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency,
+ x86_memory_mismatch_stall): Declare.
+ (TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY,
+ TARGET_MEMORY_MISMATCH_STALL): New.
+ * i386.md (athlon scheduling parameters): Fix latencies according to
+ Athlon Optimization Manual.
+ (sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to
+ vector.
+ (fsqrt instruction patterns): Set athlon_decode to direct.
+ (movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for
+ PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines.
+ (movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY
+ and TARGET_PARTIAL_REGISTER_STALL machines.
+ (pushdf_nointeger): New pattern.
+ (pushdf_integer): Rename from pushdf.
+ (movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines.
+ (movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines.
+
2000-04-17 Richard Henderson <rth@cygnus.com>
* loop.c (canonicalize_condition): Add WANT_REG argument.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 516c2744934..b2e81a47e2a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -163,12 +163,12 @@ struct processor_costs k6_cost = {
struct processor_costs athlon_cost = {
1, /* cost of an add instruction */
- 1, /* cost of a lea instruction */
+ 2, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
5, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 19, /* cost of a divide/mod */
+ 42, /* cost of a divide/mod */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
@@ -177,9 +177,9 @@ struct processor_costs athlon_cost = {
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
- {6, 6, 6}, /* cost of loading fp registers
+ {6, 6, 20}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 4} /* cost of loading integer registers */
+ {4, 4, 16} /* cost of loading integer registers */
};
struct processor_costs *ix86_cost = &pentium_cost;
@@ -222,6 +222,9 @@ const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
const int x86_add_esp_4 = m_ATHLON | m_K6;
const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
+const int x86_integer_DFmode_moves = ~m_ATHLON;
+const int x86_partial_reg_dependency = m_ATHLON;
+const int x86_memory_mismatch_stall = m_ATHLON;
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
@@ -6287,6 +6290,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
int cost;
{
enum attr_type insn_type, dep_insn_type;
+ enum attr_memory memory;
rtx set, set2;
int dep_insn_code_number;
@@ -6334,7 +6338,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
increase the cost here for non-imov insns. */
if (dep_insn_type != TYPE_IMOV
&& dep_insn_type != TYPE_FMOV
- && get_attr_memory (dep_insn) == MEMORY_LOAD)
+ && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
+ || memory == MEMORY_BOTH))
cost += 1;
/* INT->FP conversion is expensive. */
@@ -6359,7 +6364,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
/* Since we can't represent delayed latencies of load+operation,
increase the cost here for non-imov insns. */
- if (get_attr_memory (dep_insn) == MEMORY_LOAD)
+ if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
+ || memory == MEMORY_BOTH)
cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
/* INT->FP conversion is expensive. */
@@ -6368,19 +6374,15 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
break;
case PROCESSOR_ATHLON:
- /* Address Generation Interlock cause problems on the Athlon CPU because
- the loads and stores are done in order so once one load or store has
- to wait, others must too, so penalize the AGIs slightly by one cycle.
- We might experiment with this value later. */
- if (ix86_agi_dependant (insn, dep_insn, insn_type))
- cost += 1;
+ if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
+ || memory == MEMORY_BOTH)
+ {
+ if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
+ cost += 2;
+ else
+ cost += 3;
+ }
- /* Since we can't represent delayed latencies of load+operation,
- increase the cost here for non-imov insns. */
- if (dep_insn_type != TYPE_IMOV
- && dep_insn_type != TYPE_FMOV
- && get_attr_memory (dep_insn) == MEMORY_LOAD)
- cost += 2;
default:
break;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 50f08251420..8ae7be8d444 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -173,8 +173,9 @@ extern const int x86_use_cltd, x86_read_modify_write;
extern const int x86_read_modify, x86_split_long_moves;
extern const int x86_promote_QImode, x86_single_stringop;
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
-extern const int x86_promote_hi_regs;
+extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
+extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
@@ -206,6 +207,9 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
+#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
+#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
+#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 090d0ebd444..8b9b2530e78 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -738,7 +738,7 @@
;; communicates with all the execution units seperately instead.
(define_attr "athlon_decode" "direct,vector"
- (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str")
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
(const_string "vector")
(and (eq_attr "type" "push")
(match_operand 1 "memory_operand" ""))
@@ -766,7 +766,7 @@
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
- (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,str,cld"))
+ (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
1 1)
(define_function_unit "athlon_ieu" 3 0
@@ -777,12 +777,12 @@
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "imul"))
- 4 0)
+ 5 0)
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "idiv"))
- 27 0)
+ 42 0)
(define_function_unit "athlon_muldiv" 1 0
(and (eq_attr "cpu" "athlon")
@@ -792,56 +792,118 @@
(define_function_unit "athlon_muldiv" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "idiv"))
- 27 27)
+ 42 42)
-(define_attr "athlon_fpunits" "none,store,mul,add,muladd,all"
+(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
(cond [(eq_attr "type" "fop,fop1,fcmp")
(const_string "add")
- (eq_attr "type" "fmul,fdiv,fpspc,fsgn")
+ (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
(const_string "mul")
- (and (eq_attr "type" "fmov") (eq_attr "memory" "!none"))
+ (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
(const_string "store")
+ (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
+ (const_string "any")
(and (eq_attr "type" "fmov")
(ior (match_operand:SI 1 "register_operand" "")
(match_operand 1 "immediate_operand" "")))
(const_string "store")
(eq_attr "type" "fmov")
- (const_string "muladd")
- (eq_attr "type" "fcmov")
- (const_string "all")]
+ (const_string "muladd")]
(const_string "none")))
-(define_function_unit "athlon_fp_mul" 1 0
+;; We use latencies 1 for definitions. This is OK to model colisions
+;; in execution units. The real latencies are modeled in the "fp" pipeline.
+
+;; fsin, fcos: 96-192
+;; fsincos: 107-211
+;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
+(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "mul,all"))
- 4 1)
+ (eq_attr "type" "fpspc"))
+ 100 1)
-(define_function_unit "athlon_fp_add" 1 0
+;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
+(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "add,all"))
+ (eq_attr "type" "fdiv"))
+ 24 1)
+
+(define_function_unit "athlon_fp" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fop,fop1,fmul"))
4 1)
-(define_function_unit "athlon_fp_muladd" 2 0
+;; XFmode loads are slow.
+;; XFmode store is slow too (8 cycles), but we don't need to model it, because
+;; there are no dependent instructions.
+
+(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov")
- (eq_attr "athlon_fpunits" "muladd,mul,add,all")))
+ (match_operand:XF 1 "memory_operand" "")))
+ 10 1)
+
+(define_function_unit "athlon_fp" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fmov,fsgn"))
2 1)
+;; fcmp and ftst instructions
+(define_function_unit "athlon_fp" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "athlon_decode" "direct")))
+ 3 1)
+
+;; fcmpi instructions.
+(define_function_unit "athlon_fp" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "athlon_decode" "vector")))
+ 3 1)
+
+(define_function_unit "athlon_fp" 3 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fcmov"))
+ 7 1)
+
+(define_function_unit "athlon_fp_mul" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_fpunits" "mul"))
+ 1 1)
+
+(define_function_unit "athlon_fp_add" 1 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "athlon_fpunits" "add"))
+ 1 1)
+
(define_function_unit "athlon_fp_muladd" 2 0
(and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "!fmov")
- (eq_attr "athlon_fpunits" "muladd,mul,add,all")))
- 4 1)
+ (eq_attr "athlon_fpunits" "muladd,mul,add"))
+ 1 1)
(define_function_unit "athlon_fp_store" 1 0
(and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "store,all"))
+ (eq_attr "athlon_fpunits" "store"))
1 1)
-(define_function_unit "athlon_agu" 3 0
+;; We don't need to model the Adress Generation Unit, since we don't model
+;; the re-order buffer yet and thus we never schedule more than three operations
+;; at time. Later we may want to experiment with MD_SCHED macros modeling the
+;; decoders independently on the functional units.
+
+;(define_function_unit "athlon_agu" 3 0
+; (and (eq_attr "cpu" "athlon")
+; (and (eq_attr "memory" "!none")
+; (eq_attr "athlon_fpunits" "none")))
+; 1 1)
+
+;; Model load unit to avoid too long sequences of loads. We don't need to
+;; model store queue, since it is hardly going to be bottleneck.
+
+(define_function_unit "athlon_load" 2 0
(and (eq_attr "cpu" "athlon")
- (and (eq_attr "memory" "!none")
- (eq_attr "athlon_fpunits" "none")))
+ (eq_attr "memory" "load,both"))
1 1)
@@ -1255,6 +1317,7 @@
""
"sahf"
[(set_attr "length" "1")
+ (set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "one")])
;; Pentium Pro can do steps 1 through 3 in one go.
@@ -1390,6 +1453,7 @@
"xchg{l}\\t%1, %0"
[(set_attr "type" "imov")
(set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
(define_expand "movhi"
@@ -1437,8 +1501,10 @@
}"
[(set (attr "type")
(cond [(and (eq_attr "alternative" "0")
- (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
- (const_int 0)))
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_HIMODE_MATH")
+ (const_int 0))))
(const_string "imov")
(and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" ""))
@@ -1456,8 +1522,10 @@
(match_operand:HI 1 "aligned_operand" ""))
(const_string "0")
(and (eq_attr "alternative" "0")
- (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
- (const_int 0)))
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_HIMODE_MATH")
+ (const_int 0))))
(const_string "0")
]
(const_string "1")))
@@ -1547,9 +1615,19 @@
[(set_attr "type" "pop")
(set_attr "length_prefix" "1")])
+;; Situation is quite tricky about when to choose full sized (SImode) move
+;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
+;; partial register dependency machines (such as AMD Athlon), where QImode
+;; moves issue extra dependency and for partial register stalls machines
+;; that don't use QImode patterns (and QImode move cause stall on the next
+;; instruction).
+;;
+;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
+;; register stall machines with, where we use QImode instructions, since
+;; partial register stall can be caused there. Then we use movzx.
(define_insn "*movqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q,r,?r,m")
- (match_operand:QI 1 "general_operand" "qn,qm,rn,qm,qn"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
+ (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
"*
{
@@ -1560,26 +1638,50 @@
abort ();
return \"movz{bl|x}\\t{%1, %k0|%k0, %1}\";
default:
- if (which_alternative == 2)
+ if (which_alternative == 4 || which_alternative == 3
+ || (which_alternative == 1 && get_attr_length (insn) == 5)
+ || (which_alternative == 0
+ && ((TARGET_PARTIAL_REG_STALL && !TARGET_QIMODE_MATH)
+ || TARGET_PARTIAL_REG_DEPENDENCY)))
return \"mov{l}\\t{%k1, %k0|%k0, %k1}\";
else
return \"mov{b}\\t{%1, %0|%0, %1}\";
}
}"
[(set (attr "type")
- (cond [(eq_attr "alternative" "3")
+ (cond [(and (eq_attr "alternative" "3")
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_QIMODE_MATH")
+ (const_int 0))))
+ (const_string "imov")
+ (eq_attr "alternative" "3,5")
(const_string "imovx")
(and (ne (symbol_ref "TARGET_MOVX")
(const_int 0))
- (eq_attr "alternative" "1"))
+ (eq_attr "alternative" "2"))
(const_string "imovx")
]
(const_string "imov")))
; There's no place to override just the immediate length
(set (attr "length")
(cond [(and (eq_attr "type" "imov")
- (and (eq_attr "alternative" "2")
- (match_operand:HI 1 "immediate_operand" "")))
+ (and (match_operand:HI 1 "immediate_operand" "")
+ (eq_attr "alternative" "4")))
+ (const_string "5")
+ ;; Avoid extra dependency on partial register.
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "alternative" "1")
+ (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))))
+ (const_string "5")
+ ;; Avoid partial register stalls when not using QImode arithmetic
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "alternative" "1")
+ (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_QIMODE_MATH")
+ (const_int 0)))))
(const_string "5")
]
(const_string "*")))])
@@ -1904,10 +2006,38 @@
;; On the average, pushdf using integers can be still shorter. Allow this
;; pattern for optimize_size too.
-(define_insn "*pushdf"
+(define_insn "*pushdf_nointeger"
+ [(set (match_operand:DF 0 "push_operand" "=<,<,<")
+ (match_operand:DF 1 "general_no_elim_operand" "f,Fo#f,*r#f"))]
+ "!TARGET_INTEGER_DFMODE_MOVES"
+ "*
+{
+ switch (which_alternative)
+ {
+ case 0:
+ /* %%% We loose REG_DEAD notes for controling pops if we split late. */
+ operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx);
+ operands[2] = stack_pointer_rtx;
+ operands[3] = GEN_INT (8);
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return \"sub{l}\\t{%3, %2|%2, %3}\;fstp%z0\\t%y0\";
+ else
+ return \"sub{l}\\t{%3, %2|%2, %3}\;fst%z0\\t%y0\";
+
+ case 1:
+ case 2:
+ return \"#\";
+
+ default:
+ abort ();
+ }
+}"
+ [(set_attr "type" "multi")])
+
+(define_insn "*pushdf_integer"
[(set (match_operand:DF 0 "push_operand" "=<,<")
(match_operand:DF 1 "general_no_elim_operand" "f#r,rFo#f"))]
- ""
+ "TARGET_INTEGER_DFMODE_MOVES"
"*
{
switch (which_alternative)
@@ -1955,7 +2085,7 @@
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,*r,o")
(match_operand:DF 1 "general_operand" "fm,f,G,*roF,F*r"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
- && optimize_size
+ && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES)
&& (reload_in_progress || reload_completed
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| memory_operand (operands[0], DFmode))"
@@ -2002,7 +2132,7 @@
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o")
(match_operand:DF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
- && !optimize_size
+ && !optimize_size && TARGET_INTEGER_DFMODE_MOVES
&& (reload_in_progress || reload_completed
|| GET_CODE (operands[1]) != CONST_DOUBLE
|| memory_operand (operands[0], DFmode))"
@@ -2304,7 +2434,8 @@
else
return \"fxch\\t%0\";
}"
- [(set_attr "type" "fxch")])
+ [(set_attr "type" "fxch")
+ (set_attr "athlon_decode" "vector")])
;; Zero extension instructions
@@ -3202,6 +3333,7 @@
"TARGET_80387"
"fldcw\\t%0"
[(set_attr "length_opcode" "2")
+ (set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
;; Conversion between fixed point and floating point.
@@ -7691,6 +7823,7 @@
""
"leave"
[(set_attr "length" "1")
+ (set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
(define_expand "ffssi2"
@@ -8123,7 +8256,8 @@
(sqrt:SF (match_operand:SF 1 "register_operand" "0")))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt"
- [(set_attr "type" "fpspc")])
+ [(set_attr "type" "fpspc")
+ (set_attr "athlon_decode" "direct")])
(define_insn "sqrtdf2"
[(set (match_operand:DF 0 "register_operand" "=f")
@@ -8131,7 +8265,8 @@
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& (TARGET_IEEE_FP || flag_fast_math) "
"fsqrt"
- [(set_attr "type" "fpspc")])
+ [(set_attr "type" "fpspc")
+ (set_attr "athlon_decode" "direct")])
(define_insn "*sqrtextendsfdf2"
[(set (match_operand:DF 0 "register_operand" "=f")
@@ -8139,7 +8274,8 @@
(match_operand:SF 1 "register_operand" "0"))))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt"
- [(set_attr "type" "fpspc")])
+ [(set_attr "type" "fpspc")
+ (set_attr "athlon_decode" "direct")])
(define_insn "sqrtxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -8147,7 +8283,8 @@
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& (TARGET_IEEE_FP || flag_fast_math) "
"fsqrt"
- [(set_attr "type" "fpspc")])
+ [(set_attr "type" "fpspc")
+ (set_attr "athlon_decode" "direct")])
(define_insn "*sqrtextenddfxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -8155,7 +8292,8 @@
(match_operand:DF 1 "register_operand" "0"))))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt"
- [(set_attr "type" "fpspc")])
+ [(set_attr "type" "fpspc")
+ (set_attr "athlon_decode" "direct")])
(define_insn "*sqrtextendsfxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -8163,7 +8301,8 @@
(match_operand:SF 1 "register_operand" "0"))))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt"
- [(set_attr "type" "fpspc")])
+ [(set_attr "type" "fpspc")
+ (set_attr "athlon_decode" "direct")])
(define_insn "sindf2"
[(set (match_operand:DF 0 "register_operand" "=f")