diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2011-11-08 15:16:58 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2011-11-08 15:16:58 -0800 |
commit | 58ee1727b6279908854d0ef258465d980e591626 (patch) | |
tree | 7d8d46c991344c528d3901801378d28a53905239 | |
parent | 7f67ee605223cd73c3e487bd325a3c5d5ae7aa64 (diff) | |
download | gcc-58ee1727b6279908854d0ef258465d980e591626.tar.gz |
Properly use word_mode and Pmode.
-rw-r--r-- | gcc/ChangeLog.x32 | 51 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 127 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 185 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 4 |
4 files changed, 261 insertions, 106 deletions
diff --git a/gcc/ChangeLog.x32 b/gcc/ChangeLog.x32 index 43558cac9be..b6339e062a0 100644 --- a/gcc/ChangeLog.x32 +++ b/gcc/ChangeLog.x32 @@ -1,5 +1,56 @@ 2011-11-08 H.J. Lu <hongjiu.lu@intel.com> + * config/i386/i386.c (ix86_gen_pro_epilogue_adjust_stack): New. + (ix86_option_override_internal): Check TARGET_LP64. Set + ix86_gen_pro_epilogue_adjust_stack. Update ix86_gen_monitor. + (setup_incoming_varargs_64): Use word_mode with integer + parameters in registers. + (gen_push): Push register in word_mode instead of Pmode. + (ix86_emit_save_regs): Likewise. + (ix86_emit_save_regs_using_mov): Save integer registers in + word_mode. + (gen_pop): Pop register in word_mode instead of Pmode. + (ix86_emit_restore_regs_using_pop): Likewise. + (pro_epilogue_adjust_stack): Check TARGET_LP64. + (ix86_expand_prologue): Replace Pmode with word_mode for push + immediate. Use ix86_gen_pro_epilogue_adjust_stack. Save and + restore RAX and R10 in word_mode. + (ix86_emit_restore_regs_using_mov): Restore integer registers + in word_mode. + (ix86_expand_split_stack_prologue): Save R10_REG and restore in + word_mode. + (ix86_decompose_address): Disallow fs:(reg) if Pmode != + word_mode. + (legitimize_tls_address): Load TP into register for + TLS_MODEL_INITIAL_EXEC and TLS_MODEL_LOCAL_EXEC modes in x32. + (ix86_print_operand): Output register in DImode for 64bit + indirect branch. + (ix86_split_to_parts): Use word_mode with PUT_MODE for push. + (ix86_split_long_move): Likewise. + (ix86_zero_extend_to_Pmode): Handle Pmode != DImode. + (ix86_expand_movmem): Use word_mode for size needed for loop. + + * config/i386/i386.md (W): New. + (x86_64_mode): Likewise. + (*push<mode>2_prologue): Replace :P with :W. + (*pop<mode>1): Likewise. + (*pop<mode>1_epilogue): Likewise. + (*rep_movdi_rex64): Replace :DI with :P. Add addr32 if needed. + (*rep_stosdi_rex64): Likewise. + (*rep_movsi): Add addr32 if needed. + (*rep_movqi): Likewise. + (*rep_stossi): Likewise. + (*rep_stosqi): Likewise. + (*cmpstrnqi_nz_1): Likewise. + (*cmpstrnqi_1): Likewise. + (*strlenqi_1): Likewise. + (push/pop peephole2): Use word_mode scratch registers. + + * config/i386/sse.md (sse3_monitor64): Renamed to ... + (sse3_monitor_<x86_64_mode>): This. + +2011-11-08 H.J. Lu <hongjiu.lu@intel.com> + * longlong.h (count_leading_zeros): Use long long builtin for x86-64. (count_trailing_zeros): Likewise. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 382fabffa02..1cbf553975d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2370,6 +2370,7 @@ static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); +static rtx (*ix86_gen_pro_epilogue_adjust_stack) (rtx, rtx, rtx); /* Preferred alignment for stack boundary in bits. */ unsigned int ix86_preferred_stack_boundary; @@ -4230,28 +4231,42 @@ ix86_option_override_internal (bool main_args_p) if (TARGET_64BIT) { ix86_gen_leave = gen_leave_rex64; + if (TARGET_X32) + ix86_gen_monitor = gen_sse3_monitor_x32; + else + ix86_gen_monitor = gen_sse3_monitor_64; + } + else + { + ix86_gen_leave = gen_leave; + ix86_gen_monitor = gen_sse3_monitor; + } + + if (TARGET_LP64) + { ix86_gen_add3 = gen_adddi3; ix86_gen_sub3 = gen_subdi3; ix86_gen_sub3_carry = gen_subdi3_carry; ix86_gen_one_cmpl2 = gen_one_cmpldi2; - ix86_gen_monitor = gen_sse3_monitor64; ix86_gen_andsp = gen_anddi3; ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; ix86_gen_probe_stack_range = gen_probe_stack_rangedi; + ix86_gen_pro_epilogue_adjust_stack + = gen_pro_epilogue_adjust_stack_di_sub; } else { - ix86_gen_leave = gen_leave; ix86_gen_add3 = gen_addsi3; ix86_gen_sub3 = gen_subsi3; ix86_gen_sub3_carry = gen_subsi3_carry; ix86_gen_one_cmpl2 = gen_one_cmplsi2; - ix86_gen_monitor = gen_sse3_monitor; ix86_gen_andsp = gen_andsi3; ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; ix86_gen_probe_stack_range = gen_probe_stack_rangesi; + ix86_gen_pro_epilogue_adjust_stack + = gen_pro_epilogue_adjust_stack_si_sub; } #ifdef USE_IX86_CLD @@ -7906,12 +7921,13 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) for (i = cum->regno; i < max; i++) { - mem = gen_rtx_MEM (Pmode, + mem = gen_rtx_MEM (word_mode, plus_constant (save_area, i * UNITS_PER_WORD)); MEM_NOTRAP_P (mem) = 1; set_mem_alias_set (mem, set); - emit_move_insn (mem, gen_rtx_REG (Pmode, - x86_64_int_parameter_registers[i])); + emit_move_insn (mem, + gen_rtx_REG (word_mode, + x86_64_int_parameter_registers[i])); } if (ix86_varargs_fpr_size) @@ -9014,8 +9030,11 @@ gen_push (rtx arg) m->fs.cfa_offset += UNITS_PER_WORD; m->fs.sp_offset += UNITS_PER_WORD; + if (REG_P (arg) && GET_MODE (arg) != word_mode) + arg = gen_rtx_REG (word_mode, REGNO (arg)); + return gen_rtx_SET (VOIDmode, - gen_rtx_MEM (Pmode, + gen_rtx_MEM (word_mode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)), arg); @@ -9026,9 +9045,12 @@ gen_push (rtx arg) static rtx gen_pop (rtx arg) { + if (REG_P (arg) && GET_MODE (arg) != word_mode) + arg = gen_rtx_REG (word_mode, REGNO (arg)); + return gen_rtx_SET (VOIDmode, arg, - gen_rtx_MEM (Pmode, + gen_rtx_MEM (word_mode, gen_rtx_POST_INC (Pmode, stack_pointer_rtx))); } @@ -9528,7 +9550,7 @@ ix86_emit_save_regs (void) for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) { - insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); RTX_FRAME_RELATED_P (insn) = 1; } } @@ -9608,7 +9630,7 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) { - ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset); + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); cfa_offset -= UNITS_PER_WORD; } } @@ -9682,7 +9704,7 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, rtx insn; bool add_frame_related_expr = false; - if (! TARGET_64BIT) + if (! TARGET_LP64) insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); else if (x86_64_immediate_operand (offset, DImode)) insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); @@ -10489,7 +10511,7 @@ ix86_expand_prologue (void) to implement macro RETURN_ADDR_RTX and intrinsic function expand_builtin_return_addr etc. */ t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD); - t = gen_frame_mem (Pmode, t); + t = gen_frame_mem (word_mode, t); insn = emit_insn (gen_push (t)); RTX_FRAME_RELATED_P (insn) = 1; @@ -10635,7 +10657,6 @@ ix86_expand_prologue (void) { rtx eax = gen_rtx_REG (Pmode, AX_REG); rtx r10 = NULL; - rtx (*adjust_stack_insn)(rtx, rtx, rtx); bool eax_live = false; bool r10_live = false; @@ -10661,12 +10682,8 @@ ix86_expand_prologue (void) emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); /* Use the fact that AX still contains ALLOCATE. */ - adjust_stack_insn = (TARGET_64BIT - ? gen_pro_epilogue_adjust_stack_di_sub - : gen_pro_epilogue_adjust_stack_si_sub); - - insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, - stack_pointer_rtx, eax)); + insn = emit_insn (ix86_gen_pro_epilogue_adjust_stack + (stack_pointer_rtx, stack_pointer_rtx, eax)); /* Note that SEH directives need to continue tracking the stack pointer even after the frame pointer has been set up. */ @@ -10686,14 +10703,18 @@ ix86_expand_prologue (void) if (r10_live && eax_live) { t = choose_baseaddr (m->fs.sp_offset - allocate); - emit_move_insn (r10, gen_frame_mem (Pmode, t)); + emit_move_insn (gen_rtx_REG (word_mode, R10_REG), + gen_frame_mem (word_mode, t)); t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD); - emit_move_insn (eax, gen_frame_mem (Pmode, t)); + emit_move_insn (gen_rtx_REG (word_mode, AX_REG), + gen_frame_mem (word_mode, t)); } else if (eax_live || r10_live) { t = choose_baseaddr (m->fs.sp_offset - allocate); - emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t)); + emit_move_insn (gen_rtx_REG (word_mode, + (eax_live ? AX_REG : R10_REG)), + gen_frame_mem (word_mode, t)); } } gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); @@ -10857,7 +10878,7 @@ ix86_emit_restore_regs_using_pop (void) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false)) - ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno)); + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); } /* Emit code and notes for the LEAVE instruction. */ @@ -10900,11 +10921,11 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) { - rtx reg = gen_rtx_REG (Pmode, regno); + rtx reg = gen_rtx_REG (word_mode, regno); rtx insn, mem; mem = choose_baseaddr (cfa_offset); - mem = gen_frame_mem (Pmode, mem); + mem = gen_frame_mem (word_mode, mem); insn = emit_move_insn (reg, mem); if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) @@ -11486,8 +11507,8 @@ ix86_expand_split_stack_prologue (void) { rtx rax; - rax = gen_rtx_REG (Pmode, AX_REG); - emit_move_insn (rax, reg10); + rax = gen_rtx_REG (word_mode, AX_REG); + emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); use_reg (&call_fusage, rax); } @@ -11566,8 +11587,8 @@ ix86_expand_split_stack_prologue (void) /* If we are in 64-bit mode and this function uses a static chain, we saved %r10 in %rax before calling _morestack. */ if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) - emit_move_insn (gen_rtx_REG (Pmode, R10_REG), - gen_rtx_REG (Pmode, AX_REG)); + emit_move_insn (gen_rtx_REG (word_mode, R10_REG), + gen_rtx_REG (word_mode, AX_REG)); /* If this function calls va_start, we need to store a pointer to the arguments on the old stack, because they may not have been @@ -11760,6 +11781,11 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) else disp = addr; /* displacement */ + /* Since address override works only on the (reg) part in fs:(reg), + we can't use it as memory operand. */ + if (Pmode != word_mode && seg == SEG_FS && (base || index)) + return 0; + /* Extract the integral value of scale. */ if (scale_rtx) { @@ -12797,8 +12823,19 @@ legitimize_tls_address (rtx x, enum tls_model model, int for_mov) if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); - off = force_reg (Pmode, off); - return gen_rtx_PLUS (Pmode, base, off); + if (Pmode != word_mode) + { + /* Since address override works only on the (reg) part in + fs:(reg), we can't use it as memory operand. */ + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, base); + return gen_rtx_PLUS (Pmode, reg, off); + } + else + { + off = force_reg (Pmode, off); + return gen_rtx_PLUS (Pmode, base, off); + } } else { @@ -12817,7 +12854,16 @@ legitimize_tls_address (rtx x, enum tls_model model, int for_mov) if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); - return gen_rtx_PLUS (Pmode, base, off); + if (Pmode != word_mode) + { + /* Since address override works only on the (reg) part in + fs:(reg), we can't use it as memory operand. */ + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, base); + return gen_rtx_PLUS (Pmode, reg, off); + } + else + return gen_rtx_PLUS (Pmode, base, off); } else { @@ -13922,7 +13968,8 @@ ix86_print_operand (FILE *file, rtx x, int code) gcc_unreachable (); } - ix86_print_operand (file, x, 0); + ix86_print_operand (file, x, + TARGET_64BIT && REG_P (x) ? 'q' : 0); return; @@ -19329,7 +19376,7 @@ ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) gcc_assert (ok); operand = copy_rtx (operand); - PUT_MODE (operand, Pmode); + PUT_MODE (operand, word_mode); parts[0] = parts[1] = parts[2] = parts[3] = operand; return size; } @@ -19482,7 +19529,7 @@ ix86_split_long_move (rtx operands[]) if (push_operand (operands[0], VOIDmode)) { operands[0] = copy_rtx (operands[0]); - PUT_MODE (operands[0], Pmode); + PUT_MODE (operands[0], word_mode); } else operands[0] = gen_lowpart (DImode, operands[0]); @@ -20043,7 +20090,11 @@ ix86_zero_extend_to_Pmode (rtx exp) if (GET_MODE (exp) == Pmode) return copy_to_mode_reg (Pmode, exp); r = gen_reg_rtx (Pmode); - emit_insn (gen_zero_extendsidi2 (r, exp)); + if (Pmode == DImode) + emit_insn (gen_zero_extendsidi2 (r, exp)); + else + emit_move_insn (r, + simplify_gen_subreg (Pmode, exp, GET_MODE (exp), 0)); return r; } @@ -21070,11 +21121,11 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, gcc_unreachable (); case loop: need_zero_guard = true; - size_needed = GET_MODE_SIZE (Pmode); + size_needed = GET_MODE_SIZE (word_mode); break; case unrolled_loop: need_zero_guard = true; - size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2); + size_needed = GET_MODE_SIZE (word_mode) * (TARGET_64BIT ? 4 : 2); break; case rep_prefix_8_byte: size_needed = 8; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f25a97b3ff8..44db1093a61 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -930,6 +930,14 @@ ;; This mode iterator allows :P to be used for patterns that operate on ;; pointer-sized quantities. Exactly one of the two alternatives will match. (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +;; This mode iterator allows :W to be used for patterns that operate on +;; word_mode sized quantities. +(define_mode_iterator W + [(SI "word_mode == SImode") (DI "word_mode == DImode")]) + +;; Used in x84_64 pattern name to mark x32. +(define_mode_attr x86_64_mode [(SI "x32") (DI "64")]) ;; Scheduling descriptions @@ -1730,8 +1738,8 @@ (set_attr "mode" "SI")]) (define_insn "*push<mode>2_prologue" - [(set (match_operand:P 0 "push_operand" "=<") - (match_operand:P 1 "general_no_elim_operand" "r<i>*m")) + [(set (match_operand:W 0 "push_operand" "=<") + (match_operand:W 1 "general_no_elim_operand" "r<i>*m")) (clobber (mem:BLK (scratch)))] "" "push{<imodesuffix>}\t%1" @@ -1739,16 +1747,16 @@ (set_attr "mode" "<MODE>")]) (define_insn "*pop<mode>1" - [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") - (match_operand:P 1 "pop_operand" ">"))] + [(set (match_operand:W 0 "nonimmediate_operand" "=r*m") + (match_operand:W 1 "pop_operand" ">"))] "" "pop{<imodesuffix>}\t%0" [(set_attr "type" "pop") (set_attr "mode" "<MODE>")]) (define_insn "*pop<mode>1_epilogue" - [(set (match_operand:P 0 "nonimmediate_operand" "=r*m") - (match_operand:P 1 "pop_operand" ">")) + [(set (match_operand:W 0 "nonimmediate_operand" "=r*m") + (match_operand:W 1 "pop_operand" ">")) (clobber (mem:BLK (scratch)))] "" "pop{<imodesuffix>}\t%0" @@ -15778,20 +15786,25 @@ "ix86_current_function_needs_cld = 1;") (define_insn "*rep_movdi_rex64" - [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") - (const_int 3)) - (match_operand:DI 3 "register_operand" "0"))) - (set (match_operand:DI 1 "register_operand" "=S") - (plus:DI (ashift:DI (match_dup 5) (const_int 3)) - (match_operand:DI 4 "register_operand" "1"))) + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 5 "register_operand" "2") + (const_int 3)) + (match_operand:P 3 "register_operand" "0"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (ashift:P (match_dup 5) (const_int 3)) + (match_operand:P 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" - "rep{%;} movsq" +{ + if (Pmode == SImode) + return "addr32{%;} rep{%;} movsq"; + else + return "rep{%;} movsq"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -15810,7 +15823,12 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" - "rep{%;} movs{l|d}" +{ + if (word_mode == DImode && Pmode == SImode) + return "addr32{%;} rep{%;} movs{l|d}"; + else + return "rep{%;} movs{l|d}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -15827,7 +15845,12 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" - "rep{%;} movsb" +{ + if (word_mode == DImode && Pmode == SImode) + return "addr32{%;} rep{%;} movsb"; + else + return "rep{%;} movsb"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -15952,18 +15975,23 @@ "ix86_current_function_needs_cld = 1;") (define_insn "*rep_stosdi_rex64" - [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) - (set (match_operand:DI 0 "register_operand" "=D") - (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") - (const_int 3)) - (match_operand:DI 3 "register_operand" "0"))) + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 4 "register_operand" "1") + (const_int 3)) + (match_operand:P 3 "register_operand" "0"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:DI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" - "rep{%;} stosq" +{ + if (Pmode == SImode) + return "addr32{%;} rep{%;} stosq"; + else + return "rep{%;} stosq"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -15980,7 +16008,12 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" - "rep{%;} stos{l|d}" +{ + if (word_mode == DImode && Pmode == SImode) + return "addr32{%;} rep{%;} stos{l|d}"; + else + return "rep{%;} stos{l|d}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -15996,7 +16029,12 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" - "rep{%;} stosb" +{ + if (word_mode == DImode && Pmode == SImode) + return "addr32{%;} rep{%;} stosb"; + else + return "rep{%;} stosb"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") @@ -16117,7 +16155,12 @@ (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (match_operand:P 2 "register_operand" "=c"))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" - "repz{%;} cmpsb" +{ + if (word_mode == DImode && Pmode == SImode) + return "addr32{%;} repz{%;} cmpsb"; + else + return "repz{%;} cmpsb"; +} [(set_attr "type" "str") (set_attr "mode" "QI") (set (attr "prefix_rex") @@ -16157,7 +16200,12 @@ (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (match_operand:P 2 "register_operand" "=c"))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" - "repz{%;} cmpsb" +{ + if (word_mode == DImode && Pmode == SImode) + return "addr32{%;} repz{%;} cmpsb"; + else + return "repz{%;} cmpsb"; +} [(set_attr "type" "str") (set_attr "mode" "QI") (set (attr "prefix_rex") @@ -16198,7 +16246,12 @@ (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (reg:CC FLAGS_REG))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" - "repnz{%;} scasb" +{ + if (word_mode == DImode && Pmode == SImode) + return "addr32{%;} repnz{%;} scasb"; + else + return "repnz{%;} scasb"; +} [(set_attr "type" "str") (set_attr "mode" "QI") (set (attr "prefix_rex") @@ -17271,131 +17324,131 @@ ;; alternative when no register is available later. (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) - && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)" + && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)" [(clobber (match_dup 1)) - (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (clobber (mem:BLK (scratch)))])]) (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) - && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)" + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)" [(clobber (match_dup 1)) - (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) - (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (clobber (mem:BLK (scratch)))])]) ;; Convert esp subtractions to push. (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) - && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)" + && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)" [(clobber (match_dup 1)) - (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) - && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)" + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)" [(clobber (match_dup 1)) - (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1)) - (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) ;; Convert epilogue deallocator to pop. (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_SINGLE_POP || optimize_insn_for_size_p ()) - && INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)" - [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)" + [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))])]) ;; Two pops case is tricky, since pop causes dependency ;; on destination register. We use two registers if available. (define_peephole2 - [(match_scratch:P 1 "r") - (match_scratch:P 2 "r") + [(match_scratch:W 1 "r") + (match_scratch:W 2 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ()) - && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" - [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))]) - (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))]) + (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))]) (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p () - && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" - [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))]) - (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) + (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) ;; Convert esp additions to pop. (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)" - [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) + "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)" + [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) ;; Two pops case is tricky, since pop causes dependency ;; on destination register. We use two registers if available. (define_peephole2 - [(match_scratch:P 1 "r") - (match_scratch:P 2 "r") + [(match_scratch:W 1 "r") + (match_scratch:W 2 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" - [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) - (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))]) + "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) + (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))]) (define_peephole2 - [(match_scratch:P 1 "r") + [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "optimize_insn_for_size_p () - && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)" - [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG)))) - (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))]) + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) + (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) ;; Convert compares with 1 to shorter inc/dec operations when CF is not ;; required and register dies. Similarly for 128 to -128. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b5b900a02a5..46a7f5fe725 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -8296,8 +8296,8 @@ "monitor\t%0, %1, %2" [(set_attr "length" "3")]) -(define_insn "sse3_monitor64" - [(unspec_volatile [(match_operand:DI 0 "register_operand" "a") +(define_insn "sse3_monitor_<x86_64_mode>" + [(unspec_volatile [(match_operand:P 0 "register_operand" "a") (match_operand:SI 1 "register_operand" "c") (match_operand:SI 2 "register_operand" "d")] UNSPECV_MONITOR)] |