diff options
author | hubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-01-11 15:44:34 +0000 |
---|---|---|
committer | hubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-01-11 15:44:34 +0000 |
commit | b52b8962a0369f83ccd01ac16a8b73b6129a239a (patch) | |
tree | 0c27101c36aedb9f44f8d169515ece811b136363 /gcc/config/i386 | |
parent | 57e5bdcc4e1d75bca3c6e6bbba99ae94b806c9ab (diff) | |
download | gcc-b52b8962a0369f83ccd01ac16a8b73b6129a239a.tar.gz |
* i386.c (ix86_attr_length_default): Handle TYPE_STR and TYPE_CLD.
* i386.md (FIRST_PSEUDO_REGISTER): Set to 20.
(FIXED_REGISTERS): Set dirflag as fixed.
(CALL_USED_REGISTERS): Set dirflag as used.
(REG_ALLOC_ORDER): Set dirflag as last one.
(DIRFLAG_REG): New macro.
(MD_ASM_CLOBBERS): Asm clobber dirflag for backward compatibility.
(HI_REGISTER_NAMES): Add dirflag.
(DEBUF_PRINT_REG): Handle dirflag.
* i386.md (type attribute): New cld and str types.
(length_opcode attribute): Set cld and str to 1.
(memory attribute): Set str to unknown - it is not clear from the
patterns.
(pent_np function unit): Prefixed string operations takes 12 cycles
minimally; cld takes 2 cycles.
(ppro_uops attribute): Str is "many" and cld is "few".
(ppro_p0 unit): Handle cld here.
(k6_alux unit): Handle cld and str types.
(k6_load unit): It is ocupied by str opcodes.
(k6_store unit): It is ocupied by str opcodes.
(athlon_decode): Str is vector decoded.
(athlon_ieu): Handle str and cld.
(cld pattern): New.
(movstrsi, clrstr, cmpstr, strlen expander): Emit cld instruction
(movstrsi_1, clrstrsi_1, cmpstrsi_1, strlensi_1,
cmpstrsi_nz_1 insn): Do not output cld instruction
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@31326 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386.c | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 20 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 76 |
3 files changed, 76 insertions, 23 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 895cc8cc904..d35bafb3ecf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5452,6 +5452,9 @@ ix86_attr_length_default (insn) case TYPE_FCMOV: case TYPE_IBR: break; + case TYPE_STR: + case TYPE_CLD: + len = 0; case TYPE_ALU1: case TYPE_NEGNOT: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2bde60e1788..ed7f849ad42 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -610,7 +610,7 @@ extern int ix86_arch; eliminated during reloading in favor of either the stack or frame pointer. */ -#define FIRST_PSEUDO_REGISTER 19 +#define FIRST_PSEUDO_REGISTER 20 /* Number of hardware registers that go into the DWARF-2 unwind info. If not defined, equals FIRST_PSEUDO_REGISTER. */ @@ -621,8 +621,8 @@ extern int ix86_arch; and are not available for the register allocator. On the 80386, the stack pointer is such, as is the arg pointer. */ #define FIXED_REGISTERS \ -/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr*/ \ -{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } +/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr, dir*/ \ +{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 } /* 1 for registers not available across function calls. These must include the FIXED_REGISTERS and also any @@ -632,8 +632,8 @@ extern int ix86_arch; Aside from that, you can include as many other registers as you like. */ #define CALL_USED_REGISTERS \ -/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr*/ \ -{ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } +/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr, dir*/ \ +{ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } /* Order in which to allocate registers. Each register must be listed once, even those in FIXED_REGISTERS. List frame pointer @@ -655,8 +655,8 @@ extern int ix86_arch; generated by allocating edx first, so restore the 'natural' order of things. */ #define REG_ALLOC_ORDER \ -/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,cc,fpsr*/ \ -{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17, 18 } +/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,cc,fpsr, dir*/ \ +{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17, 18, 19 } /* A C statement (sans semicolon) to choose the order in which to allocate hard registers for pseudo-registers local to a basic @@ -764,6 +764,7 @@ extern int ix86_arch; #define FLAGS_REG 17 #define FPSR_REG 18 +#define DIRFLAG_REG 19 /* Value should be nonzero if functions must have frame pointers. Zero means the frame pointer need not be set up (and parms @@ -1067,6 +1068,7 @@ enum reg_class do { \ (CLOBBERS) = tree_cons (NULL_TREE, build_string (5, "flags"), (CLOBBERS));\ (CLOBBERS) = tree_cons (NULL_TREE, build_string (4, "fpsr"), (CLOBBERS)); \ + (CLOBBERS) = tree_cons (NULL_TREE, build_string (7, "dirflag"), (CLOBBERS)); \ } while (0) /* Stack layout; function entry, exit and calling. */ @@ -2169,7 +2171,7 @@ while (0) #define HI_REGISTER_NAMES \ {"ax","dx","cx","bx","si","di","bp","sp", \ "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)","", \ - "flags","fpsr" } + "flags","fpsr", "dirflag" } #define REGISTER_NAMES HI_REGISTER_NAMES @@ -2382,6 +2384,8 @@ do { long l; \ fprintf (FILE, "%d ", REGNO (X)); \ if (REGNO (X) == FLAGS_REG) \ { fputs ("flags", FILE); break; } \ + if (REGNO (X) == DIRFLAG_REG) \ + { fputs ("dirflag", FILE); break; } \ if (REGNO (X) == FPSR_REG) \ { fputs ("fpsr", FILE); break; } \ if (REGNO (X) == ARG_POINTER_REGNUM) \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f9316de4d01..6fd7c8eb32b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -83,7 +83,7 @@ ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. (define_attr "type" - "other,multi,alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch" + "other,multi,alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld" (const_string "other")) ;; The (bounding maximum) length of an instruction in bytes. @@ -105,6 +105,8 @@ (define_attr "length_opcode" "" (cond [(eq_attr "type" "imovx,setcc,icmov") (const_int 3) + (eq_attr "type" "str,cld") + (const_int 1) (and (eq_attr "type" "incdec") (ior (match_operand:SI 1 "register_operand" "") (match_operand:HI 1 "register_operand" ""))) @@ -127,9 +129,9 @@ ;; if the instruction is complex. (define_attr "memory" "none,load,store,both,unknown" - (cond [(eq_attr "type" "other,multi") + (cond [(eq_attr "type" "other,multi,str") (const_string "unknown") - (eq_attr "type" "lea,fcmov,fpspc") + (eq_attr "type" "lea,fcmov,fpspc,cld") (const_string "none") (eq_attr "type" "push") (if_then_else (match_operand 1 "memory_operand" "") @@ -260,6 +262,12 @@ (eq_attr "type" "imul")) 11 11) +;; Rep movs takes minimally 12 cycles. +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "str")) + 12 12) + ; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22 (define_function_unit "pent_np" 1 0 (and (eq_attr "cpu" "pentium") @@ -304,6 +312,11 @@ (eq_attr "memory" "store")))) 2 2) +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "cld")) + 2 2) + (define_function_unit "fpu" 1 0 (and (eq_attr "cpu" "pentium") (and (eq_attr "type" "fmov") @@ -469,9 +482,9 @@ ;; cycles to decode in decoder 0. (define_attr "ppro_uops" "one,few,many" - (cond [(eq_attr "type" "other,multi,call,callv,fpspc") + (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") (const_string "many") - (eq_attr "type" "icmov,fcmov") + (eq_attr "type" "icmov,fcmov,str,cld") (const_string "few") (eq_attr "type" "imov") (if_then_else (eq_attr "memory" "store,both") @@ -496,7 +509,7 @@ (define_function_unit "ppro_p0" 1 0 (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "ishift,lea,ibr")) + (eq_attr "type" "ishift,lea,ibr,cld")) 1 1) (define_function_unit "ppro_p0" 1 0 @@ -611,7 +624,7 @@ ;; Shift instructions and certain arithmetic are issued only to X pipe. (define_function_unit "k6_alux" 1 0 (and (eq_attr "cpu" "k6") - (eq_attr "type" "ishift,alu1,negnot")) + (eq_attr "type" "ishift,alu1,negnot,cld")) 1 1) ;; The QI mode arithmetic is issued to X pipe only. @@ -644,6 +657,12 @@ (eq_attr "memory" "load,both"))) 1 1) +(define_function_unit "k6_load" 1 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both"))) + 10 10) + ;; Lea have two instructions, so latency is probably 2 (define_function_unit "k6_store" 1 0 (and (eq_attr "cpu" "k6") @@ -652,6 +671,11 @@ (define_function_unit "k6_store" 1 0 (and (eq_attr "cpu" "k6") + (eq_attr "type" "str")) + 10 10) + +(define_function_unit "k6_store" 1 0 + (and (eq_attr "cpu" "k6") (ior (eq_attr "type" "push") (eq_attr "memory" "store,both"))) 1 1) @@ -713,7 +737,7 @@ ;; communicates with all the execution units seperately instead. (define_attr "athlon_decode" "direct,vector" - (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc") + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str") (const_string "vector") (and (eq_attr "type" "push") (match_operand 1 "memory_operand" "")) @@ -741,11 +765,16 @@ (define_function_unit "athlon_ieu" 3 0 (and (eq_attr "cpu" "athlon") - (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov")) + (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,str,cld")) 1 1) (define_function_unit "athlon_ieu" 3 0 (and (eq_attr "cpu" "athlon") + (eq_attr "type" "str")) + 15 15) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") (eq_attr "type" "imul")) 4 0) @@ -7789,11 +7818,18 @@ ;; Block operation instructions +(define_insn "cld" + [(set (reg:SI 19) (const_int 0))] + "" + "cld" + [(set_attr "type" "cld")]) + (define_expand "movstrsi" [(parallel [(set (match_operand:BLK 0 "memory_operand" "") (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:SI 2 "const_int_operand" "")) (use (match_operand:SI 3 "const_int_operand" "")) + (use (reg:SI 19)) (clobber (match_scratch:SI 4 "")) (clobber (match_dup 5)) (clobber (match_dup 6))])] @@ -7813,6 +7849,7 @@ operands[0] = change_address (operands[0], VOIDmode, addr0); operands[1] = change_address (operands[1], VOIDmode, addr1); + emit_insn (gen_cld ()); }") ;; It might seem that operands 0 & 1 could use predicate register_operand. @@ -7824,6 +7861,7 @@ (mem:BLK (match_operand:SI 1 "address_operand" "S"))) (use (match_operand:SI 2 "const_int_operand" "n")) (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:SI 19)) (clobber (match_scratch:SI 4 "=&c")) (clobber (match_dup 0)) (clobber (match_dup 1))] @@ -7832,7 +7870,6 @@ { rtx xops[2]; - output_asm_insn (\"cld\", operands); if (GET_CODE (operands[2]) == CONST_INT) { if (INTVAL (operands[2]) & ~0x03) @@ -7855,12 +7892,14 @@ [(set_attr "type" "multi")]) (define_expand "clrstrsi" - [(set (match_dup 3) (const_int 0)) + [(set (reg:SI 19) (const_int 0)) + (set (match_dup 3) (const_int 0)) (parallel [(set (match_operand:BLK 0 "memory_operand" "") (const_int 0)) (use (match_operand:SI 1 "const_int_operand" "")) (use (match_operand:SI 2 "const_int_operand" "")) (use (match_dup 3)) + (use (reg:SI 19)) (clobber (match_scratch:SI 4 "")) (clobber (match_dup 5))])] "" @@ -7877,6 +7916,8 @@ operands[5] = addr0; operands[0] = gen_rtx_MEM (BLKmode, addr0); + + emit_insn (gen_cld ()); }") ;; It might seem that operand 0 could use predicate register_operand. @@ -7889,6 +7930,7 @@ (use (match_operand:SI 1 "const_int_operand" "n")) (use (match_operand:SI 2 "immediate_operand" "i")) (use (match_operand:SI 3 "register_operand" "a")) + (use (reg:SI 19)) (clobber (match_scratch:SI 4 "=&c")) (clobber (match_dup 0))] "" @@ -7896,7 +7938,6 @@ { rtx xops[2]; - output_asm_insn (\"cld\", operands); if (GET_CODE (operands[1]) == CONST_INT) { unsigned int count = INTVAL (operands[1]) & 0xffffffff; @@ -7958,6 +7999,7 @@ once cc0 is dead. */ align = operands[4]; + emit_insn (gen_cld ()); if (GET_CODE (count) == CONST_INT) { if (INTVAL (count) == 0) @@ -8008,11 +8050,12 @@ (mem:BLK (match_operand:SI 1 "address_operand" "D")))) (use (match_operand:SI 2 "register_operand" "c")) (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:SI 19)) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_dup 2))] "" - "cld\;repz{\;| }cmpsb" + "repz{\;| }cmpsb" [(set_attr "type" "multi") (set_attr "length" "3")]) @@ -8026,12 +8069,13 @@ (mem:BLK (match_operand:SI 1 "address_operand" "D"))) (const_int 0))) (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:SI 19)) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_dup 2))] "" ;; The initial compare sets the zero flag. - "cmp{l}\\t%2, %2\;cld\;repz{\;| }cmpsb" + "cmp{l}\\t%2, %2\;repz{\;| }cmpsb" [(set_attr "type" "multi") (set_attr "length" "5")]) @@ -8079,6 +8123,7 @@ emit_move_insn (scratch3, addr); + emit_insn (gen_cld ()); emit_insn (gen_strlensi_1 (scratch1, scratch3, eoschar, align, constm1_rtx)); emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); @@ -8097,10 +8142,11 @@ (match_operand:QI 2 "general_operand" "a") (match_operand:SI 3 "immediate_operand" "i") (match_operand:SI 4 "immediate_operand" "0")] 0)) + (use (reg:SI 19)) (clobber (match_dup 1)) (clobber (reg:CC 17))] "" - "cld\;repnz{\;| }scasb" + "repnz{\;| }scasb" [(set_attr "type" "multi") (set_attr "length" "3")]) |