summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2009-04-20 17:09:25 +0000
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2009-04-20 17:09:25 +0000
commit6df553fd97fd1d607e78cbe8e55645bb6dd6fe09 (patch)
treef3793a3a93ef77fcf4a477a298ef27298d1edd4a
parenta174b54df163d395f1e7748fd70c48b4b18ef28b (diff)
downloadgcc-6df553fd97fd1d607e78cbe8e55645bb6dd6fe09.tar.gz
2009-04-20 Joey Ye <joey.ye@intel.com>
Xuepeng Guo <xuepeng.guo@intel.com> H.J. Lu <hongjiu.lu@intel.com> * config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count. * config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro. (IX86_LEA_PRIORITY): Likewise. (distance_non_agu_define): New function. (distance_agu_use): Likewise. (ix86_lea_for_add_ok): Likewise. (ix86_dep_by_shift_count): Likewise. * config/i386/i386.md: Call ix86_lea_for_add_ok to decide we should split for LEA. * config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new function. (ix86_dep_by_shift_count): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@146443 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/config/i386/atom.md25
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c310
-rw-r--r--gcc/config/i386/i386.md7
5 files changed, 360 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 11704878a53..64139976679 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2009-04-20 Joey Ye <joey.ye@intel.com>
+ Xuepeng Guo <xuepeng.guo@intel.com>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count.
+
+ * config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro.
+ (IX86_LEA_PRIORITY): Likewise.
+ (distance_non_agu_define): New function.
+ (distance_agu_use): Likewise.
+ (ix86_lea_for_add_ok): Likewise.
+ (ix86_dep_by_shift_count): Likewise.
+
+ * config/i386/i386.md: Call ix86_lea_for_add_ok to decide we
+ should split for LEA.
+
+ * config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new
+ function.
+ (ix86_dep_by_shift_count): Likewise.
+
2009-04-20 Richard Guenther <rguenther@suse.de>
* expr.c (handled_component_p): Move ...
diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md
index 9d5cbf24a19..a9c4c5d0576 100644
--- a/gcc/config/i386/atom.md
+++ b/gcc/config/i386/atom.md
@@ -768,3 +768,28 @@
atom_alu1, atom_negnot, atom_incdec, atom_ishift,
atom_ishift1, atom_rotate, atom_rotate1"
"atom_icmov, atom_alu_carry")
+
+;; lea to shift count stall is 2 cycles
+(define_bypass 3 "atom_lea"
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+ atom_ishift_mem, atom_ishift1_mem,
+ atom_rotate_mem, atom_rotate1_mem"
+ "ix86_dep_by_shift_count")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "atom_lea"
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
+ "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+ atom_ishift_mem, atom_ishift1_mem,
+ atom_rotate_mem, atom_rotate1_mem"
+ "ix86_dep_by_shift_count")
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index d6b30781692..5d92ec79f60 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -85,6 +85,8 @@ extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
extern void ix86_expand_binary_operator (enum rtx_code,
enum machine_mode, rtx[]);
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
+extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 503cc0816e4..5af19514f90 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13012,6 +13012,316 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
emit_move_insn (operands[0], dst);
}
+#define LEA_SEARCH_THRESHOLD 12
+
+/* Search backward for non-agu definition of register number REGNO1
+ or register number REGNO2 in INSN's basic block until
+ 1. Pass LEA_SEARCH_THRESHOLD instructions, or
+ 2. Reach BB boundary, or
+ 3. Reach agu definition.
+ Returns the distance between the non-agu definition point and INSN.
+ If no definition point, returns -1. */
+
+static int
+distance_non_agu_define (unsigned int regno1, unsigned int regno2,
+ rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ int distance = 0;
+ df_ref *def_rec;
+ enum attr_type insn_type;
+
+ if (insn != BB_HEAD (bb))
+ {
+ rtx prev = PREV_INSN (insn);
+ while (prev && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (prev))
+ {
+ distance++;
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && (regno1 == DF_REF_REGNO (*def_rec)
+ || regno2 == DF_REF_REGNO (*def_rec)))
+ {
+ insn_type = get_attr_type (prev);
+ if (insn_type != TYPE_LEA)
+ goto done;
+ }
+ }
+ if (prev == BB_HEAD (bb))
+ break;
+ prev = PREV_INSN (prev);
+ }
+ }
+
+ if (distance < LEA_SEARCH_THRESHOLD)
+ {
+ edge e;
+ edge_iterator ei;
+ bool simple_loop = false;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (e->src == bb)
+ {
+ simple_loop = true;
+ break;
+ }
+
+ if (simple_loop)
+ {
+ rtx prev = BB_END (bb);
+ while (prev
+ && prev != insn
+ && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (prev))
+ {
+ distance++;
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && (regno1 == DF_REF_REGNO (*def_rec)
+ || regno2 == DF_REF_REGNO (*def_rec)))
+ {
+ insn_type = get_attr_type (prev);
+ if (insn_type != TYPE_LEA)
+ goto done;
+ }
+ }
+ prev = PREV_INSN (prev);
+ }
+ }
+ }
+
+ distance = -1;
+
+done:
+ /* get_attr_type may modify recog data. We want to make sure
+ that recog data is valid for instruction INSN, on which
+ distance_non_agu_define is called. INSN is unchanged here. */
+ extract_insn_cached (insn);
+ return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses
+ register number REGNO0 in memory address. Return -1 if no such
+ a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ int distance = 0;
+ df_ref *def_rec;
+ df_ref *use_rec;
+
+ if (insn != BB_END (bb))
+ {
+ rtx next = NEXT_INSN (insn);
+ while (next && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (next))
+ {
+ distance++;
+
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+ && regno0 == DF_REF_REGNO (*use_rec))
+ {
+ /* Return DISTANCE if OP0 is used in memory
+ address in NEXT. */
+ return distance;
+ }
+
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && regno0 == DF_REF_REGNO (*def_rec))
+ {
+ /* Return -1 if OP0 is set in NEXT. */
+ return -1;
+ }
+ }
+ if (next == BB_END (bb))
+ break;
+ next = NEXT_INSN (next);
+ }
+ }
+
+ if (distance < LEA_SEARCH_THRESHOLD)
+ {
+ edge e;
+ edge_iterator ei;
+ bool simple_loop = false;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->dest == bb)
+ {
+ simple_loop = true;
+ break;
+ }
+
+ if (simple_loop)
+ {
+ rtx next = BB_HEAD (bb);
+ while (next
+ && next != insn
+ && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (next))
+ {
+ distance++;
+
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+ && regno0 == DF_REF_REGNO (*use_rec))
+ {
+ /* Return DISTANCE if OP0 is used in memory
+ address in NEXT. */
+ return distance;
+ }
+
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && regno0 == DF_REF_REGNO (*def_rec))
+ {
+ /* Return -1 if OP0 is set in NEXT. */
+ return -1;
+ }
+
+ }
+ next = NEXT_INSN (next);
+ }
+ }
+ }
+
+ return -1;
+}
+
+/* Define this macro to tune LEA priority vs ADD, it take effect when
+ there is a dilemma of choicing LEA or ADD
+ Negative value: ADD is more preferred than LEA
+ Zero: Netrual
+ Positive value: LEA is more preferred than ADD*/
+#define IX86_LEA_PRIORITY 2
+
+/* Return true if it is ok to optimize an ADD operation to LEA
+ operation to avoid flag register consumation. For the processors
+ like ATOM, if the destination register of LEA holds an actual
+ address which will be used soon, LEA is better and otherwise ADD
+ is better. */
+
+bool
+ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+ rtx insn, rtx operands[])
+{
+ unsigned int regno0 = true_regnum (operands[0]);
+ unsigned int regno1 = true_regnum (operands[1]);
+ unsigned int regno2;
+
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return regno0 != regno1;
+
+ regno2 = true_regnum (operands[2]);
+
+ /* If a = b + c, (a!=b && a!=c), must use lea form. */
+ if (regno0 != regno1 && regno0 != regno2)
+ return true;
+ else
+ {
+ int dist_define, dist_use;
+ dist_define = distance_non_agu_define (regno1, regno2, insn);
+ if (dist_define <= 0)
+ return true;
+
+ /* If this insn has both backward non-agu dependence and forward
+ agu dependence, the one with short distance take effect. */
+ dist_use = distance_agu_use (regno0, insn);
+ if (dist_use <= 0
+ || (dist_define + IX86_LEA_PRIORITY) < dist_use)
+ return false;
+
+ return true;
+ }
+}
+
+/* Return true if destination reg of SET_BODY is shift count of
+ USE_BODY. */
+
+static bool
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
+{
+ rtx set_dest;
+ rtx shift_rtx;
+ int i;
+
+ /* Retrieve destination of SET_BODY. */
+ switch (GET_CODE (set_body))
+ {
+ case SET:
+ set_dest = SET_DEST (set_body);
+ if (!set_dest || !REG_P (set_dest))
+ return false;
+ break;
+ case PARALLEL:
+ for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+ if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
+ use_body))
+ return true;
+ default:
+ return false;
+ break;
+ }
+
+ /* Retrieve shift count of USE_BODY. */
+ switch (GET_CODE (use_body))
+ {
+ case SET:
+ shift_rtx = XEXP (use_body, 1);
+ break;
+ case PARALLEL:
+ for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
+ if (ix86_dep_by_shift_count_body (set_body,
+ XVECEXP (use_body, 0, i)))
+ return true;
+ default:
+ return false;
+ break;
+ }
+
+ if (shift_rtx
+ && (GET_CODE (shift_rtx) == ASHIFT
+ || GET_CODE (shift_rtx) == LSHIFTRT
+ || GET_CODE (shift_rtx) == ASHIFTRT
+ || GET_CODE (shift_rtx) == ROTATE
+ || GET_CODE (shift_rtx) == ROTATERT))
+ {
+ rtx shift_count = XEXP (shift_rtx, 1);
+
+ /* Return true if shift count is dest of SET_BODY. */
+ if (REG_P (shift_count)
+ && true_regnum (set_dest) == true_regnum (shift_count))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if destination reg of SET_INSN is shift count of
+ USE_INSN. */
+
+bool
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
+{
+ return ix86_dep_by_shift_count_body (PATTERN (set_insn),
+ PATTERN (use_insn));
+}
+
/* Return TRUE or FALSE depending on whether the unary operator meets the
appropriate constraints. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a72c1b7232e..679d38a8a89 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6245,8 +6245,8 @@
(plus:DI (match_operand:DI 1 "register_operand" "")
(match_operand:DI 2 "x86_64_nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && reload_completed
- && true_regnum (operands[0]) != true_regnum (operands[1])"
+ "TARGET_64BIT && reload_completed
+ && ix86_lea_for_add_ok (PLUS, insn, operands)"
[(set (match_dup 0)
(plus:DI (match_dup 1)
(match_dup 2)))]
@@ -6514,8 +6514,7 @@
(plus (match_operand 1 "register_operand" "")
(match_operand 2 "nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "reload_completed
- && true_regnum (operands[0]) != true_regnum (operands[1])"
+ "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)"
[(const_int 0)]
{
rtx pat;