summaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.md
diff options
context:
space:
mode:
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2013-05-31 15:52:42 +0000
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2013-05-31 15:52:42 +0000
commitf3de38f1cd50845751b902a386d44bf77f0c8e2d (patch)
tree042a278a93e3dfcc22ee01de018de0c350b862dd /gcc/config/i386/i386.md
parent78d82aec87898bf527541f995939a7df94cd6b90 (diff)
downloadgcc-f3de38f1cd50845751b902a386d44bf77f0c8e2d.tar.gz
Silvermont (SLM) architecture performance tuning
2013-05-31 Yuri Rumyantsev <yuri.s.rumyantsev@intel.com> Igor Zamyatin <igor.zamyatin@intel.com> * config/i386/i386.h (enum ix86_tune_indices): Add X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS. (TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS): New define. * config/i386/i386.c (initial_ix86_tune_features) <X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS>: Initialize. (ix86_lea_outperforms): Handle Silvermont tuning. (ix86_avoid_lea_for_add): Add new argument to ix86_lea_outperforms call. (ix86_use_lea_for_mov): Likewise. (ix86_avoid_lea_for_addr): Likewise. (ix86_lea_for_add_ok): Likewise. (exact_dependency_1): New function. (exact_store_load_dependency): Likewise. (ix86_adjust_cost): Handle Silvermont tuning. (do_reoder_for_imul): Likewise. (swap_top_of_ready_list): New function. (ix86_sched_reorder): Changed to handle Silvermont tuning. * config/i386/i386.md (peepholes that split memory operand in fp converts): New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@199546 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r--gcc/config/i386/i386.md24
1 files changed, 24 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 28b0c78093a..a9b4dae43d8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3625,6 +3625,18 @@
CONST0_RTX (V4SFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand")
+ (float_extend:DF
+ (match_operand:SF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_extend:DF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+
(define_insn "*extendsfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
(float_extend:DF
@@ -3766,6 +3778,18 @@
CONST0_RTX (V2DFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:SF 0 "register_operand")
+ (float_truncate:SF
+ (match_operand:DF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
(define_expand "truncdfsf2_with_temp"
[(parallel [(set (match_operand:SF 0)
(float_truncate:SF (match_operand:DF 1)))