diff options
author | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-04-20 17:09:25 +0000 |
---|---|---|
committer | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-04-20 17:09:25 +0000 |
commit | 6df553fd97fd1d607e78cbe8e55645bb6dd6fe09 (patch) | |
tree | f3793a3a93ef77fcf4a477a298ef27298d1edd4a | |
parent | a174b54df163d395f1e7748fd70c48b4b18ef28b (diff) | |
download | gcc-6df553fd97fd1d607e78cbe8e55645bb6dd6fe09.tar.gz |
2009-04-20 Joey Ye <joey.ye@intel.com>
Xuepeng Guo <xuepeng.guo@intel.com>
H.J. Lu <hongjiu.lu@intel.com>
* config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count.
* config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro.
(IX86_LEA_PRIORITY): Likewise.
(distance_non_agu_define): New function.
(distance_agu_use): Likewise.
(ix86_lea_for_add_ok): Likewise.
(ix86_dep_by_shift_count): Likewise.
* config/i386/i386.md: Call ix86_lea_for_add_ok to decide we
should split for LEA.
* config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new
function.
(ix86_dep_by_shift_count): Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@146443 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/config/i386/atom.md | 25 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 310 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 7 |
5 files changed, 360 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 11704878a53..64139976679 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2009-04-20 Joey Ye <joey.ye@intel.com> + Xuepeng Guo <xuepeng.guo@intel.com> + H.J. Lu <hongjiu.lu@intel.com> + + * config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count. + + * config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro. + (IX86_LEA_PRIORITY): Likewise. + (distance_non_agu_define): New function. + (distance_agu_use): Likewise. + (ix86_lea_for_add_ok): Likewise. + (ix86_dep_by_shift_count): Likewise. + + * config/i386/i386.md: Call ix86_lea_for_add_ok to decide we + should split for LEA. + + * config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new + function. + (ix86_dep_by_shift_count): Likewise. + 2009-04-20 Richard Guenther <rguenther@suse.de> * expr.c (handled_component_p): Move ... diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md index 9d5cbf24a19..a9c4c5d0576 100644 --- a/gcc/config/i386/atom.md +++ b/gcc/config/i386/atom.md @@ -768,3 +768,28 @@ atom_alu1, atom_negnot, atom_incdec, atom_ishift, atom_ishift1, atom_rotate, atom_rotate1" "atom_icmov, atom_alu_carry") + +;; lea to shift count stall is 2 cycles +(define_bypass 3 "atom_lea" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_dep_by_shift_count") + +;; lea to shift source stall is 1 cycle +(define_bypass 2 "atom_lea" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1" + "!ix86_dep_by_shift_count") + +;; non-lea to shift count stall is 1 cycle +(define_bypass 2 "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec,atom_ishift,atom_ishift1,atom_rotate, + atom_rotate1, atom_setcc, atom_icmov, atom_pop, + atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_dep_by_shift_count") diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index d6b30781692..5d92ec79f60 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -85,6 +85,8 @@ extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, extern void ix86_expand_binary_operator (enum rtx_code, enum machine_mode, rtx[]); extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); +extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]); +extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn); extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn); extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, rtx[]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 503cc0816e4..5af19514f90 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13012,6 +13012,316 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, emit_move_insn (operands[0], dst); } +#define LEA_SEARCH_THRESHOLD 12 + +/* Search backward for non-agu definition of register number REGNO1 + or register number REGNO2 in INSN's basic block until + 1. Pass LEA_SEARCH_THRESHOLD instructions, or + 2. Reach BB boundary, or + 3. Reach agu definition. + Returns the distance between the non-agu definition point and INSN. + If no definition point, returns -1. */ + +static int +distance_non_agu_define (unsigned int regno1, unsigned int regno2, + rtx insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + int distance = 0; + df_ref *def_rec; + enum attr_type insn_type; + + if (insn != BB_HEAD (bb)) + { + rtx prev = PREV_INSN (insn); + while (prev && distance < LEA_SEARCH_THRESHOLD) + { + if (INSN_P (prev)) + { + distance++; + for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF + && !DF_REF_IS_ARTIFICIAL (*def_rec) + && (regno1 == DF_REF_REGNO (*def_rec) + || regno2 == DF_REF_REGNO (*def_rec))) + { + insn_type = get_attr_type (prev); + if (insn_type != TYPE_LEA) + goto done; + } + } + if (prev == BB_HEAD (bb)) + break; + prev = PREV_INSN (prev); + } + } + + if (distance < LEA_SEARCH_THRESHOLD) + { + edge e; + edge_iterator ei; + bool simple_loop = false; + + FOR_EACH_EDGE (e, ei, bb->preds) + if (e->src == bb) + { + simple_loop = true; + break; + } + + if (simple_loop) + { + rtx prev = BB_END (bb); + while (prev + && prev != insn + && distance < LEA_SEARCH_THRESHOLD) + { + if (INSN_P (prev)) + { + distance++; + for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF + && !DF_REF_IS_ARTIFICIAL (*def_rec) + && (regno1 == DF_REF_REGNO (*def_rec) + || regno2 == DF_REF_REGNO (*def_rec))) + { + insn_type = get_attr_type (prev); + if (insn_type != TYPE_LEA) + goto done; + } + } + prev = PREV_INSN (prev); + } + } + } + + distance = -1; + +done: + /* get_attr_type may modify recog data. We want to make sure + that recog data is valid for instruction INSN, on which + distance_non_agu_define is called. INSN is unchanged here. */ + extract_insn_cached (insn); + return distance; +} + +/* Return the distance between INSN and the next insn that uses + register number REGNO0 in memory address. Return -1 if no such + a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ + +static int +distance_agu_use (unsigned int regno0, rtx insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + int distance = 0; + df_ref *def_rec; + df_ref *use_rec; + + if (insn != BB_END (bb)) + { + rtx next = NEXT_INSN (insn); + while (next && distance < LEA_SEARCH_THRESHOLD) + { + if (INSN_P (next)) + { + distance++; + + for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) + if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD + || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) + && regno0 == DF_REF_REGNO (*use_rec)) + { + /* Return DISTANCE if OP0 is used in memory + address in NEXT. */ + return distance; + } + + for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF + && !DF_REF_IS_ARTIFICIAL (*def_rec) + && regno0 == DF_REF_REGNO (*def_rec)) + { + /* Return -1 if OP0 is set in NEXT. */ + return -1; + } + } + if (next == BB_END (bb)) + break; + next = NEXT_INSN (next); + } + } + + if (distance < LEA_SEARCH_THRESHOLD) + { + edge e; + edge_iterator ei; + bool simple_loop = false; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest == bb) + { + simple_loop = true; + break; + } + + if (simple_loop) + { + rtx next = BB_HEAD (bb); + while (next + && next != insn + && distance < LEA_SEARCH_THRESHOLD) + { + if (INSN_P (next)) + { + distance++; + + for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) + if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD + || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) + && regno0 == DF_REF_REGNO (*use_rec)) + { + /* Return DISTANCE if OP0 is used in memory + address in NEXT. */ + return distance; + } + + for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF + && !DF_REF_IS_ARTIFICIAL (*def_rec) + && regno0 == DF_REF_REGNO (*def_rec)) + { + /* Return -1 if OP0 is set in NEXT. */ + return -1; + } + + } + next = NEXT_INSN (next); + } + } + } + + return -1; +} + +/* Define this macro to tune LEA priority vs ADD, it take effect when + there is a dilemma of choicing LEA or ADD + Negative value: ADD is more preferred than LEA + Zero: Netrual + Positive value: LEA is more preferred than ADD*/ +#define IX86_LEA_PRIORITY 2 + +/* Return true if it is ok to optimize an ADD operation to LEA + operation to avoid flag register consumation. For the processors + like ATOM, if the destination register of LEA holds an actual + address which will be used soon, LEA is better and otherwise ADD + is better. */ + +bool +ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED, + rtx insn, rtx operands[]) +{ + unsigned int regno0 = true_regnum (operands[0]); + unsigned int regno1 = true_regnum (operands[1]); + unsigned int regno2; + + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) + return regno0 != regno1; + + regno2 = true_regnum (operands[2]); + + /* If a = b + c, (a!=b && a!=c), must use lea form. */ + if (regno0 != regno1 && regno0 != regno2) + return true; + else + { + int dist_define, dist_use; + dist_define = distance_non_agu_define (regno1, regno2, insn); + if (dist_define <= 0) + return true; + + /* If this insn has both backward non-agu dependence and forward + agu dependence, the one with short distance take effect. */ + dist_use = distance_agu_use (regno0, insn); + if (dist_use <= 0 + || (dist_define + IX86_LEA_PRIORITY) < dist_use) + return false; + + return true; + } +} + +/* Return true if destination reg of SET_BODY is shift count of + USE_BODY. */ + +static bool +ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) +{ + rtx set_dest; + rtx shift_rtx; + int i; + + /* Retrieve destination of SET_BODY. */ + switch (GET_CODE (set_body)) + { + case SET: + set_dest = SET_DEST (set_body); + if (!set_dest || !REG_P (set_dest)) + return false; + break; + case PARALLEL: + for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) + if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), + use_body)) + return true; + default: + return false; + break; + } + + /* Retrieve shift count of USE_BODY. */ + switch (GET_CODE (use_body)) + { + case SET: + shift_rtx = XEXP (use_body, 1); + break; + case PARALLEL: + for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) + if (ix86_dep_by_shift_count_body (set_body, + XVECEXP (use_body, 0, i))) + return true; + default: + return false; + break; + } + + if (shift_rtx + && (GET_CODE (shift_rtx) == ASHIFT + || GET_CODE (shift_rtx) == LSHIFTRT + || GET_CODE (shift_rtx) == ASHIFTRT + || GET_CODE (shift_rtx) == ROTATE + || GET_CODE (shift_rtx) == ROTATERT)) + { + rtx shift_count = XEXP (shift_rtx, 1); + + /* Return true if shift count is dest of SET_BODY. */ + if (REG_P (shift_count) + && true_regnum (set_dest) == true_regnum (shift_count)) + return true; + } + + return false; +} + +/* Return true if destination reg of SET_INSN is shift count of + USE_INSN. */ + +bool +ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) +{ + return ix86_dep_by_shift_count_body (PATTERN (set_insn), + PATTERN (use_insn)); +} + /* Return TRUE or FALSE depending on whether the unary operator meets the appropriate constraints. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a72c1b7232e..679d38a8a89 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6245,8 +6245,8 @@ (plus:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + "TARGET_64BIT && reload_completed + && ix86_lea_for_add_ok (PLUS, insn, operands)" [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] @@ -6514,8 +6514,7 @@ (plus (match_operand 1 "register_operand" "") (match_operand 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" [(const_int 0)] { rtx pat; |