diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/cygwin.asm | 214 | ||||
-rw-r--r-- | gcc/config/i386/cygwin.h | 7 | ||||
-rw-r--r-- | gcc/config/i386/cygwin1.c | 42 | ||||
-rw-r--r-- | gcc/config/i386/freebsd.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 588 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 45 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 189 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 4 | ||||
-rw-r--r-- | gcc/config/i386/t-cygming | 2 | ||||
-rw-r--r-- | gcc/config/i386/t-cygwin | 4 | ||||
-rw-r--r-- | gcc/config/i386/t-interix | 2 | ||||
-rw-r--r-- | gcc/config/i386/vx-common.h | 6 |
13 files changed, 693 insertions, 418 deletions
diff --git a/gcc/config/i386/cygwin.asm b/gcc/config/i386/cygwin.asm index 588c12ee701..a6cc94d160a 100644 --- a/gcc/config/i386/cygwin.asm +++ b/gcc/config/i386/cygwin.asm @@ -1,6 +1,7 @@ /* stuff needed for libgcc on win32. * - * Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009 Free Software Foundation, Inc. + * Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009 + * Free Software Foundation, Inc. * Written By Steve Chamberlain * * This file is free software; you can redistribute it and/or modify it @@ -23,104 +24,165 @@ * <http://www.gnu.org/licenses/>. */ -#ifdef L_chkstk +#include "auto-host.h" + +#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE + .cfi_sections .debug_frame +# define cfi_startproc() .cfi_startproc +# define cfi_endproc() .cfi_endproc +# define cfi_adjust_cfa_offset(X) .cfi_adjust_cfa_offset X +# define cfi_def_cfa_register(X) .cfi_def_cfa_register X +# define cfi_register(D,S) .cfi_register D, S +# ifdef _WIN64 +# define cfi_push(X) .cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0 +# define cfi_pop(X) .cfi_adjust_cfa_offset -8; .cfi_restore X +# else +# define cfi_push(X) .cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0 +# define cfi_pop(X) .cfi_adjust_cfa_offset -4; .cfi_restore X +# endif +#else +# define cfi_startproc() +# define cfi_endproc() +# define cfi_adjust_cfa_offset(X) +# define cfi_def_cfa_register(X) +# define cfi_register(D,S) +# define cfi_push(X) +# define cfi_pop(X) +#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */ -/* Function prologue calls _alloca to probe the stack when allocating more +#ifdef L_chkstk +/* Function prologue calls __chkstk to probe the stack when allocating more than CHECK_STACK_LIMIT bytes in one go. Touching the stack at 4K increments is necessary to ensure that the guard pages used by the OS virtual memory manger are allocated in correct sequence. */ .global ___chkstk .global __alloca -#ifndef _WIN64 -___chkstk: +#ifdef _WIN64 +/* __alloca is a normal function call, which uses %rcx as the argument. */ + cfi_startproc() __alloca: - pushl %ecx /* save temp */ - leal 8(%esp), %ecx /* point past return addr */ - cmpl $0x1000, %eax /* > 4k ?*/ - jb Ldone + movq %rcx, %rax + /* FALLTHRU */ -Lprobe: - subl $0x1000, %ecx /* yes, move pointer down 4k*/ - orl $0x0, (%ecx) /* probe there */ - subl $0x1000, %eax /* decrement count */ - cmpl $0x1000, %eax - ja Lprobe /* and do it again */ +/* ___chkstk is a *special* function call, which uses %rax as the argument. + We avoid clobbering the 4 integer argument registers, %rcx, %rdx, + %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */ + .align 4 +___chkstk: + popq %r11 /* pop return address */ + cfi_adjust_cfa_offset(-8) /* indicate return address in r11 */ + cfi_register(%rip, %r11) + movq %rsp, %r10 + cmpq $0x1000, %rax /* > 4k ?*/ + jb 2f -Ldone: - subl %eax, %ecx - orl $0x0, (%ecx) /* less than 4k, just peek here */ +1: subq $0x1000, %r10 /* yes, move pointer down 4k*/ + orl $0x0, (%r10) /* probe there */ + subq $0x1000, %rax /* decrement count */ + cmpq $0x1000, %rax + ja 1b /* and do it again */ - movl %esp, %eax /* save old stack pointer */ - movl %ecx, %esp /* decrement stack */ - movl (%eax), %ecx /* recover saved temp */ - movl 4(%eax), %eax /* recover return address */ +2: subq %rax, %r10 + movq %rsp, %rax /* hold CFA until return */ + cfi_def_cfa_register(%rax) + orl $0x0, (%r10) /* less than 4k, just peek here */ + movq %r10, %rsp /* decrement stack */ /* Push the return value back. Doing this instead of just - jumping to %eax preserves the cached call-return stack + jumping to %r11 preserves the cached call-return stack used by most modern processors. */ - pushl %eax + pushq %r11 ret + cfi_endproc() #else -/* __alloca is a normal function call, which uses %rcx as the argument. And stack space - for the argument is saved. */ + cfi_startproc() +___chkstk: __alloca: - movq %rcx, %rax - addq $0x7, %rax - andq $0xfffffffffffffff8, %rax - popq %rcx /* pop return address */ - popq %r10 /* Pop the reserved stack space. */ - movq %rsp, %r10 /* get sp */ - cmpq $0x1000, %rax /* > 4k ?*/ - jb Ldone_alloca - -Lprobe_alloca: - subq $0x1000, %r10 /* yes, move pointer down 4k*/ - orq $0x0, (%r10) /* probe there */ - subq $0x1000, %rax /* decrement count */ - cmpq $0x1000, %rax - ja Lprobe_alloca /* and do it again */ + pushl %ecx /* save temp */ + cfi_push(%eax) + leal 8(%esp), %ecx /* point past return addr */ + cmpl $0x1000, %eax /* > 4k ?*/ + jb 2f + +1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ + orl $0x0, (%ecx) /* probe there */ + subl $0x1000, %eax /* decrement count */ + cmpl $0x1000, %eax + ja 1b /* and do it again */ -Ldone_alloca: - subq %rax, %r10 - orq $0x0, (%r10) /* less than 4k, just peek here */ - movq %r10, %rax - subq $0x8, %r10 /* Reserve argument stack space. */ - movq %r10, %rsp /* decrement stack */ +2: subl %eax, %ecx + orl $0x0, (%ecx) /* less than 4k, just peek here */ + movl %esp, %eax /* save current stack pointer */ + cfi_def_cfa_register(%eax) + movl %ecx, %esp /* decrement stack */ + movl (%eax), %ecx /* recover saved temp */ - /* Push the return value back. Doing this instead of just - jumping to %rcx preserves the cached call-return stack - used by most modern processors. */ - pushq %rcx + /* Copy the return register. Doing this instead of just jumping to + the address preserves the cached call-return stack used by most + modern processors. */ + pushl 4(%eax) ret + cfi_endproc() +#endif /* _WIN64 */ +#endif /* L_chkstk */ -/* ___chkstk is a *special* function call, which uses %rax as the argument. - We avoid clobbering the 4 integer argument registers, %rcx, %rdx, - %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */ -___chkstk: - addq $0x7, %rax /* Make sure stack is on alignment of 8. */ - andq $0xfffffffffffffff8, %rax - popq %r11 /* pop return address */ - movq %rsp, %r10 /* get sp */ - cmpq $0x1000, %rax /* > 4k ?*/ - jb Ldone - -Lprobe: - subq $0x1000, %r10 /* yes, move pointer down 4k*/ - orl $0x0, (%r10) /* probe there */ +#ifdef L_chkstk_ms +/* ___chkstk_ms is a *special* function call, which uses %rax as the argument. + We avoid clobbering any registers. Unlike ___chkstk, it just probes the + stack and does no stack allocation. */ + .global ___chkstk_ms +#ifdef _WIN64 + cfi_startproc() +___chkstk_ms: + pushq %rcx /* save temps */ + cfi_push(%rcx) + pushq %rax + cfi_push(%rax) + cmpq $0x1000, %rax /* > 4k ?*/ + leaq 24(%rsp), %rcx /* point past return addr */ + jb 2f + +1: subq $0x1000, %rcx /* yes, move pointer down 4k */ + orq $0x0, (%rcx) /* probe there */ subq $0x1000, %rax /* decrement count */ cmpq $0x1000, %rax - ja Lprobe /* and do it again */ + ja 1b /* and do it again */ -Ldone: - subq %rax, %r10 - orl $0x0, (%r10) /* less than 4k, just peek here */ - movq %r10, %rsp /* decrement stack */ +2: subq %rax, %rcx + orq $0x0, (%rcx) /* less than 4k, just peek here */ - /* Push the return value back. Doing this instead of just - jumping to %r11 preserves the cached call-return stack - used by most modern processors. */ - pushq %r11 + popq %rax + cfi_pop(%rax) + popq %rcx + cfi_pop(%rcx) + ret + cfi_endproc() +#else + cfi_startproc() +___chkstk_ms: + pushl %ecx /* save temp */ + cfi_push(%ecx) + pushl %eax + cfi_push(%eax) + cmpl $0x1000, %eax /* > 4k ?*/ + leal 12(%esp), %ecx /* point past return addr */ + jb 2f + +1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ + orl $0x0, (%ecx) /* probe there */ + subl $0x1000, %eax /* decrement count */ + cmpl $0x1000, %eax + ja 1b /* and do it again */ + +2: subl %eax, %ecx + orl $0x0, (%ecx) /* less than 4k, just peek here */ + + popl %eax + cfi_pop(%eax) + popl %ecx + cfi_pop(%ecx) ret -#endif -#endif + cfi_endproc() +#endif /* _WIN64 */ +#endif /* L_chkstk_ms */ diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h index a8d26e9558d..67308fc7dff 100644 --- a/gcc/config/i386/cygwin.h +++ b/gcc/config/i386/cygwin.h @@ -252,12 +252,13 @@ char *cvt_to_mingw[] = #undef GEN_CVT_ARRAY #endif /*GEN_CVT_ARRAY*/ -void mingw_scan (int, const char * const *, const char **); +void mingw_scan (unsigned int, const struct cl_decoded_option *, + const char **); #if 1 #define GCC_DRIVER_HOST_INITIALIZATION \ do \ { \ - mingw_scan(argc, (const char * const *) argv, &spec_machine); \ + mingw_scan (decoded_options_count, decoded_options, &spec_machine); \ } \ while (0) #else @@ -277,7 +278,7 @@ do \ add_prefix (&startfile_prefixes,\ concat (standard_startfile_prefix, "w32api", NULL),\ "GCC", PREFIX_PRIORITY_LAST, 0, NULL);\ - mingw_scan(argc, (const char * const *) argv, &spec_machine); \ + mingw_scan (decoded_options_count, decoded_options, &spec_machine); \ } \ while (0) #endif diff --git a/gcc/config/i386/cygwin1.c b/gcc/config/i386/cygwin1.c index 7de34d24b7a..99d9d8f8243 100644 --- a/gcc/config/i386/cygwin1.c +++ b/gcc/config/i386/cygwin1.c @@ -22,32 +22,42 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "tm.h" +#include "opts.h" #include <string.h> void -mingw_scan (int argc ATTRIBUTE_UNUSED, - const char *const *argv, +mingw_scan (unsigned int decoded_options_count, + const struct cl_decoded_option *decoded_options, const char **spec_machine) { + unsigned int i; putenv (xstrdup ("GCC_CYGWIN_MINGW=0")); - while (*++argv) - if (strcmp (*argv, "-mno-win32") == 0) - putenv (xstrdup ("GCC_CYGWIN_WIN32=0")); - else if (strcmp (*argv, "-mwin32") == 0) - putenv (xstrdup ("GCC_CYGWIN_WIN32=1")); - else if (strcmp (*argv, "-mno-cygwin") == 0) + for (i = 1; i < decoded_options_count; i++) + switch (decoded_options[i].opt_index) { - char *p = strstr (*spec_machine, "-cygwin"); - if (p) + case OPT_mwin32: + if (decoded_options[i].value == 0) + putenv (xstrdup ("GCC_CYGWIN_WIN32=0")); + else + putenv (xstrdup ("GCC_CYGWIN_WIN32=1")); + break; + + case OPT_mcygwin: + if (decoded_options[i].value == 0) { - int len = p - *spec_machine; - char *s = XNEWVEC (char, strlen (*spec_machine) + 3); - memcpy (s, *spec_machine, len); - strcpy (s + len, "-mingw32"); - *spec_machine = s; + char *p = strstr (*spec_machine, "-cygwin"); + if (p) + { + int len = p - *spec_machine; + char *s = XNEWVEC (char, strlen (*spec_machine) + 3); + memcpy (s, *spec_machine, len); + strcpy (s + len, "-mingw32"); + *spec_machine = s; + } + putenv (xstrdup ("GCC_CYGWIN_MINGW=1")); } - putenv (xstrdup ("GCC_CYGWIN_MINGW=1")); + break; } return; } diff --git a/gcc/config/i386/freebsd.h b/gcc/config/i386/freebsd.h index 94b657ec295..1ec5ee0d27d 100644 --- a/gcc/config/i386/freebsd.h +++ b/gcc/config/i386/freebsd.h @@ -147,3 +147,8 @@ along with GCC; see the file COPYING3. If not see /* Static stack checking is supported by means of probes. */ #define STACK_CHECK_STATIC_BUILTIN 1 + +/* Support for i386 has been removed from FreeBSD 6.0 onward. */ +#if FBSD_MAJOR >= 6 +#define SUBTARGET32_DEFAULT_CPU "i486" +#endif diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 909adb9c2b1..fd31e9917f5 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -64,8 +64,7 @@ extern bool legitimate_pic_address_disp_p (rtx); extern void print_reg (rtx, int, FILE*); extern void ix86_print_operand (FILE *, rtx, int); -extern void split_di (rtx[], int, rtx[], rtx[]); -extern void split_ti (rtx[], int, rtx[], rtx[]); +extern void split_double_mode (enum machine_mode, rtx[], int, rtx[], rtx[]); extern const char *output_set_got (rtx, rtx); extern const char *output_387_binary_op (rtx, rtx*); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index dcf887502bc..627d8d20ea0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1576,6 +1576,9 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_PAD_RETURNS */ m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, + /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */ + m_ATOM, + /* X86_TUNE_EXT_80387_CONSTANTS */ m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC, @@ -2025,9 +2028,6 @@ static enum calling_abi ix86_function_abi (const_tree); static int ix86_tune_defaulted; static int ix86_arch_specified; -/* Bit flags that specify the ISA we are compiling for. */ -int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; - /* A mask of ix86_isa_flags that includes bit X if X was set or cleared on the command line. */ static int ix86_isa_flags_explicit; @@ -3661,7 +3661,7 @@ ix86_option_override_internal (bool main_args_p) ix86_gen_one_cmpl2 = gen_one_cmpldi2; ix86_gen_monitor = gen_sse3_monitor64; ix86_gen_andsp = gen_anddi3; - ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64; + ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; ix86_gen_probe_stack_range = gen_probe_stack_rangedi; } @@ -3674,7 +3674,7 @@ ix86_option_override_internal (bool main_args_p) ix86_gen_one_cmpl2 = gen_one_cmplsi2; ix86_gen_monitor = gen_sse3_monitor; ix86_gen_andsp = gen_andsi3; - ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32; + ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; ix86_gen_probe_stack_range = gen_probe_stack_rangesi; } @@ -7964,12 +7964,12 @@ ix86_code_end (void) rtx xops[2]; int regno; - for (regno = 0; regno < 8; ++regno) + for (regno = AX_REG; regno <= SP_REG; regno++) { char name[32]; tree decl; - if (! ((pic_labels_used >> regno) & 1)) + if (!(pic_labels_used & (1 << regno))) continue; get_pc_thunk_name (name, regno); @@ -8022,10 +8022,20 @@ ix86_code_end (void) /* Make sure unwind info is emitted for the thunk if needed. */ final_start_function (emit_barrier (), asm_out_file, 1); + /* Pad stack IP move with 4 instructions (two NOPs count + as one instruction). */ + if (TARGET_PAD_SHORT_FUNCTION) + { + int i = 8; + + while (i--) + fputs ("\tnop\n", asm_out_file); + } + xops[0] = gen_rtx_REG (Pmode, regno); xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); - output_asm_insn ("ret", xops); + fputs ("\tret\n", asm_out_file); final_end_function (); init_insn_lengths (); free_after_compilation (cfun); @@ -8767,9 +8777,9 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, rtx insn; if (! TARGET_64BIT) - insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset)); + insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); else if (x86_64_immediate_operand (offset, DImode)) - insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset)); + insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); else { rtx tmp; @@ -8786,10 +8796,11 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, insn = emit_insn (gen_rtx_SET (DImode, tmp, offset)); if (style < 0) RTX_FRAME_RELATED_P (insn) = 1; - insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp, - offset)); + + insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp); } + insn = emit_insn (insn); if (style >= 0) ix86_add_queued_cfa_restore_notes (insn); @@ -9688,38 +9699,64 @@ ix86_expand_prologue (void) else { rtx eax = gen_rtx_REG (Pmode, AX_REG); - bool eax_live; + rtx r10 = NULL; + rtx (*adjust_stack_insn)(rtx, rtx, rtx); - if (cfun->machine->call_abi == MS_ABI) - eax_live = false; - else - eax_live = ix86_eax_live_at_start_p (); + bool eax_live = false; + bool r10_live = false; + + if (TARGET_64BIT) + r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); + if (!TARGET_64BIT_MS_ABI) + eax_live = ix86_eax_live_at_start_p (); if (eax_live) { emit_insn (gen_push (eax)); allocate -= UNITS_PER_WORD; } + if (r10_live) + { + r10 = gen_rtx_REG (Pmode, R10_REG); + emit_insn (gen_push (r10)); + allocate -= UNITS_PER_WORD; + } emit_move_insn (eax, GEN_INT (allocate)); + emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); - insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); + /* Use the fact that AX still contains ALLOCATE. */ + adjust_stack_insn = (TARGET_64BIT + ? gen_pro_epilogue_adjust_stack_di_sub + : gen_pro_epilogue_adjust_stack_si_sub); + + insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, + stack_pointer_rtx, eax)); if (m->fs.cfa_reg == stack_pointer_rtx) { m->fs.cfa_offset += allocate; - t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); - t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); - add_reg_note (insn, REG_CFA_ADJUST_CFA, t); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -allocate))); } m->fs.sp_offset += allocate; - if (eax_live) - { + if (r10_live && eax_live) + { t = choose_baseaddr (m->fs.sp_offset - allocate); + emit_move_insn (r10, gen_frame_mem (Pmode, t)); + t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD); emit_move_insn (eax, gen_frame_mem (Pmode, t)); } + else if (eax_live || r10_live) + { + t = choose_baseaddr (m->fs.sp_offset - allocate); + emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t)); + } } gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); @@ -13243,15 +13280,33 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) return true; } -/* Split one or more DImode RTL references into pairs of SImode +/* Split one or more double-mode RTL references into pairs of half-mode references. The RTL can be REG, offsettable MEM, integer constant, or - CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to + CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to split and "num" is its length. lo_half and hi_half are output arrays that parallel "operands". */ void -split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) +split_double_mode (enum machine_mode mode, rtx operands[], + int num, rtx lo_half[], rtx hi_half[]) { + enum machine_mode half_mode; + unsigned int byte; + + switch (mode) + { + case TImode: + half_mode = DImode; + break; + case DImode: + half_mode = SImode; + break; + default: + gcc_unreachable (); + } + + byte = GET_MODE_SIZE (half_mode); + while (num--) { rtx op = operands[num]; @@ -13260,44 +13315,17 @@ split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) but we still have to handle it. */ if (MEM_P (op)) { - lo_half[num] = adjust_address (op, SImode, 0); - hi_half[num] = adjust_address (op, SImode, 4); + lo_half[num] = adjust_address (op, half_mode, 0); + hi_half[num] = adjust_address (op, half_mode, byte); } else { - lo_half[num] = simplify_gen_subreg (SImode, op, + lo_half[num] = simplify_gen_subreg (half_mode, op, GET_MODE (op) == VOIDmode - ? DImode : GET_MODE (op), 0); - hi_half[num] = simplify_gen_subreg (SImode, op, + ? mode : GET_MODE (op), 0); + hi_half[num] = simplify_gen_subreg (half_mode, op, GET_MODE (op) == VOIDmode - ? DImode : GET_MODE (op), 4); - } - } -} -/* Split one or more TImode RTL references into pairs of DImode - references. The RTL can be REG, offsettable MEM, integer constant, or - CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to - split and "num" is its length. lo_half and hi_half are output arrays - that parallel "operands". */ - -void -split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) -{ - while (num--) - { - rtx op = operands[num]; - - /* simplify_subreg refuse to split volatile memory addresses, but we - still have to handle it. */ - if (MEM_P (op)) - { - lo_half[num] = adjust_address (op, DImode, 0); - hi_half[num] = adjust_address (op, DImode, 8); - } - else - { - lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); - hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); + ? mode : GET_MODE (op), byte); } } } @@ -16268,9 +16296,10 @@ ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) void ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) { + enum machine_mode mode = GET_MODE (op0); rtx tmp; - switch (GET_MODE (op0)) + switch (mode) { case SFmode: case DFmode: @@ -16301,18 +16330,11 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) tmp = op0, op0 = op1, op1 = tmp; code = swap_condition (code); } - if (GET_MODE (op0) == DImode) - { - split_di (&op0, 1, lo+0, hi+0); - split_di (&op1, 1, lo+1, hi+1); - submode = SImode; - } - else - { - split_ti (&op0, 1, lo+0, hi+0); - split_ti (&op1, 1, lo+1, hi+1); - submode = DImode; - } + + split_double_mode (mode, &op0, 1, lo+0, hi+0); + split_double_mode (mode, &op1, 1, lo+1, hi+1); + + submode = mode == DImode ? SImode : DImode; /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to avoid two branches. This costs one extra insn, so disable when @@ -16469,7 +16491,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) enum machine_mode mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); - /* Do not handle DImode compares that go through special path. */ + /* Do not handle double-mode compares that go through special path. */ if (mode == (TARGET_64BIT ? TImode : DImode)) return false; @@ -17681,8 +17703,8 @@ ix86_expand_int_addcc (rtx operands[]) } -/* Split operands 0 and 1 into SImode parts. Similar to split_di, but - works for floating pointer parameters and nonoffsetable memories. +/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode, + but works for floating pointer parameters and nonoffsetable memories. For pushes, it returns just stack offsets; the values will be saved in the right order. Maximally three parts are generated. */ @@ -17735,7 +17757,7 @@ ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) if (!TARGET_64BIT) { if (mode == DImode) - split_di (&operand, 1, &parts[0], &parts[1]); + split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); else { int i; @@ -17786,7 +17808,7 @@ ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) else { if (mode == TImode) - split_ti (&operand, 1, &parts[0], &parts[1]); + split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); if (mode == XFmode || mode == TFmode) { enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; @@ -17857,7 +17879,7 @@ ix86_split_long_move (rtx operands[]) /* The DFmode expanders may ask us to move double. For 64bit target this is single move. By hiding the fact here we simplify i386.md splitters. */ - if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) + if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8) { /* Optimize constant pool reference to immediates. This is used by fp moves, that force all constants to memory to allow combining. */ @@ -18067,62 +18089,62 @@ ix86_split_long_move (rtx operands[]) static void ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) { - if (count == 1) + rtx (*insn)(rtx, rtx, rtx); + + if (count == 1 + || (count * ix86_cost->add <= ix86_cost->shift_const + && !optimize_insn_for_size_p ())) { - emit_insn ((mode == DImode - ? gen_addsi3 - : gen_adddi3) (operand, operand, operand)); + insn = mode == DImode ? gen_addsi3 : gen_adddi3; + while (count-- > 0) + emit_insn (insn (operand, operand, operand)); } - else if (!optimize_insn_for_size_p () - && count * ix86_cost->add <= ix86_cost->shift_const) + else { - int i; - for (i=0; i<count; i++) - { - emit_insn ((mode == DImode - ? gen_addsi3 - : gen_adddi3) (operand, operand, operand)); - } + insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3; + emit_insn (insn (operand, operand, GEN_INT (count))); } - else - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (operand, operand, GEN_INT (count))); } void ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) { + rtx (*gen_ashl3)(rtx, rtx, rtx); + rtx (*gen_shld)(rtx, rtx, rtx); + int half_width = GET_MODE_BITSIZE (mode) >> 1; + rtx low[2], high[2]; int count; - const int single_width = mode == DImode ? 32 : 64; if (CONST_INT_P (operands[2])) { - (mode == DImode ? split_di : split_ti) (operands, 2, low, high); - count = INTVAL (operands[2]) & (single_width * 2 - 1); + split_double_mode (mode, operands, 2, low, high); + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); - if (count >= single_width) + if (count >= half_width) { emit_move_insn (high[0], low[1]); emit_move_insn (low[0], const0_rtx); - if (count > single_width) - ix86_expand_ashl_const (high[0], count - single_width, mode); + if (count > half_width) + ix86_expand_ashl_const (high[0], count - half_width, mode); } else { + gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; + if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - emit_insn ((mode == DImode - ? gen_x86_shld - : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); + + emit_insn (gen_shld (high[0], low[0], GEN_INT (count))); ix86_expand_ashl_const (low[0], count, mode); } return; } - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + split_double_mode (mode, operands, 1, low, high); + + gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3; if (operands[1] == const1_rtx) { @@ -18134,7 +18156,7 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) ix86_expand_clear (low[0]); ix86_expand_clear (high[0]); - emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width))); d = gen_lowpart (QImode, low[0]); d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); @@ -18154,33 +18176,44 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) pentium4 a bit; no one else seems to care much either way. */ else { + enum machine_mode half_mode; + rtx (*gen_lshr3)(rtx, rtx, rtx); + rtx (*gen_and3)(rtx, rtx, rtx); + rtx (*gen_xor3)(rtx, rtx, rtx); + HOST_WIDE_INT bits; rtx x; + if (mode == DImode) + { + half_mode = SImode; + gen_lshr3 = gen_lshrsi3; + gen_and3 = gen_andsi3; + gen_xor3 = gen_xorsi3; + bits = 5; + } + else + { + half_mode = DImode; + gen_lshr3 = gen_lshrdi3; + gen_and3 = gen_anddi3; + gen_xor3 = gen_xordi3; + bits = 6; + } + if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) - x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); + x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]); else - x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); + x = gen_lowpart (half_mode, operands[2]); emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (high[0], high[0], - GEN_INT (mode == DImode ? 5 : 6))); - emit_insn ((mode == DImode - ? gen_andsi3 - : gen_anddi3) (high[0], high[0], const1_rtx)); + emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits))); + emit_insn (gen_and3 (high[0], high[0], const1_rtx)); emit_move_insn (low[0], high[0]); - emit_insn ((mode == DImode - ? gen_xorsi3 - : gen_xordi3) (low[0], low[0], const1_rtx)); + emit_insn (gen_xor3 (low[0], low[0], const1_rtx)); } - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (low[0], low[0], operands[2])); - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (high[0], high[0], operands[2])); + emit_insn (gen_ashl3 (low[0], low[0], operands[2])); + emit_insn (gen_ashl3 (high[0], high[0], operands[2])); return; } @@ -18196,176 +18229,177 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) } else { + gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; + if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); - emit_insn ((mode == DImode - ? gen_x86_shld - : gen_x86_64_shld) (high[0], low[0], operands[2])); + split_double_mode (mode, operands, 1, low, high); + emit_insn (gen_shld (high[0], low[0], operands[2])); } - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (low[0], low[0], operands[2])); + emit_insn (gen_ashl3 (low[0], low[0], operands[2])); if (TARGET_CMOVE && scratch) { + rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; + ix86_expand_clear (scratch); - emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_1 - : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2], - scratch)); + emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); } else - emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_2 - : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2])); + { + rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; + + emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); + } } void ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) { + rtx (*gen_ashr3)(rtx, rtx, rtx) + = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; + rtx (*gen_shrd)(rtx, rtx, rtx); + int half_width = GET_MODE_BITSIZE (mode) >> 1; + rtx low[2], high[2]; int count; - const int single_width = mode == DImode ? 32 : 64; if (CONST_INT_P (operands[2])) { - (mode == DImode ? split_di : split_ti) (operands, 2, low, high); - count = INTVAL (operands[2]) & (single_width * 2 - 1); + split_double_mode (mode, operands, 2, low, high); + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); - if (count == single_width * 2 - 1) + if (count == GET_MODE_BITSIZE (mode) - 1) { emit_move_insn (high[0], high[1]); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], - GEN_INT (single_width - 1))); + emit_insn (gen_ashr3 (high[0], high[0], + GEN_INT (half_width - 1))); emit_move_insn (low[0], high[0]); } - else if (count >= single_width) + else if (count >= half_width) { emit_move_insn (low[0], high[1]); emit_move_insn (high[0], low[0]); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], - GEN_INT (single_width - 1))); - if (count > single_width) - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (low[0], low[0], - GEN_INT (count - single_width))); + emit_insn (gen_ashr3 (high[0], high[0], + GEN_INT (half_width - 1))); + + if (count > half_width) + emit_insn (gen_ashr3 (low[0], low[0], + GEN_INT (count - half_width))); } else { + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); + + emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); + emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count))); } } else { - if (!rtx_equal_p (operands[0], operands[1])) + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + + if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + split_double_mode (mode, operands, 1, low, high); - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], operands[2])); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], operands[2])); + emit_insn (gen_shrd (low[0], high[0], operands[2])); + emit_insn (gen_ashr3 (high[0], high[0], operands[2])); if (TARGET_CMOVE && scratch) { + rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; + emit_move_insn (scratch, high[0]); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (scratch, scratch, - GEN_INT (single_width - 1))); - emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_1 - : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2], - scratch)); + emit_insn (gen_ashr3 (scratch, scratch, + GEN_INT (half_width - 1))); + emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], + scratch)); } else - emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_3 - : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2])); + { + rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3; + + emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); + } } } void ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) { + rtx (*gen_lshr3)(rtx, rtx, rtx) + = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; + rtx (*gen_shrd)(rtx, rtx, rtx); + int half_width = GET_MODE_BITSIZE (mode) >> 1; + rtx low[2], high[2]; int count; - const int single_width = mode == DImode ? 32 : 64; if (CONST_INT_P (operands[2])) { - (mode == DImode ? split_di : split_ti) (operands, 2, low, high); - count = INTVAL (operands[2]) & (single_width * 2 - 1); + split_double_mode (mode, operands, 2, low, high); + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); - if (count >= single_width) + if (count >= half_width) { emit_move_insn (low[0], high[1]); ix86_expand_clear (high[0]); - if (count > single_width) - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (low[0], low[0], - GEN_INT (count - single_width))); + if (count > half_width) + emit_insn (gen_lshr3 (low[0], low[0], + GEN_INT (count - half_width))); } else { + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); + + emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); + emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count))); } } else { + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); + split_double_mode (mode, operands, 1, low, high); - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], operands[2])); - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (high[0], high[0], operands[2])); + emit_insn (gen_shrd (low[0], high[0], operands[2])); + emit_insn (gen_lshr3 (high[0], high[0], operands[2])); - /* Heh. By reversing the arguments, we can reuse this pattern. */ if (TARGET_CMOVE && scratch) { + rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; + ix86_expand_clear (scratch); - emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_1 - : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2], - scratch)); + emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], + scratch)); } else - emit_insn ((mode == DImode - ? gen_x86_shiftsi_adj_2 - : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2])); + { + rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; + + emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); + } } } @@ -18402,10 +18436,10 @@ ix86_expand_aligntest (rtx variable, int value, bool epilogue) static void ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) { - if (GET_MODE (countreg) == DImode) - emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); - else - emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); + rtx (*gen_add)(rtx, rtx, rtx) + = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3; + + emit_insn (gen_add (countreg, countreg, GEN_INT (-value))); } /* Zero extend possibly SImode EXP to Pmode register. */ @@ -19221,7 +19255,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, { unsigned int i; enum stringop_alg alg = libcall; - for (i = 0; i < NAX_STRINGOP_ALGS; i++) + for (i = 0; i < MAX_STRINGOP_ALGS; i++) { /* We get here if the algorithms that were not libcall-based were rep-prefix based and we are unable to use rep prefixes @@ -19267,7 +19301,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, int i; bool any_alg_usable_p = true; - for (i = 0; i < NAX_STRINGOP_ALGS; i++) + for (i = 0; i < MAX_STRINGOP_ALGS; i++) { enum stringop_alg candidate = algs->size[i].alg; any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate); @@ -26139,7 +26173,7 @@ ix86_force_to_memory (enum machine_mode mode, rtx operand) case DImode: { rtx operands[2]; - split_di (&operand, 1, operands, operands + 1); + split_double_mode (mode, &operand, 1, operands, operands + 1); emit_insn ( gen_rtx_SET (VOIDmode, gen_rtx_MEM (SImode, @@ -27885,6 +27919,120 @@ ix86_pad_returns (void) } } +/* Count the minimum number of instructions in BB. Return 4 if the + number of instructions >= 4. */ + +static int +ix86_count_insn_bb (basic_block bb) +{ + rtx insn; + int insn_count = 0; + + /* Count number of instructions in this block. Return 4 if the number + of instructions >= 4. */ + FOR_BB_INSNS (bb, insn) + { + /* Only happen in exit blocks. */ + if (JUMP_P (insn) + && GET_CODE (PATTERN (insn)) == RETURN) + break; + + if (NONDEBUG_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + { + insn_count++; + if (insn_count >= 4) + return insn_count; + } + } + + return insn_count; +} + + +/* Count the minimum number of instructions in code path in BB. + Return 4 if the number of instructions >= 4. */ + +static int +ix86_count_insn (basic_block bb) +{ + edge e; + edge_iterator ei; + int min_prev_count; + + /* Only bother counting instructions along paths with no + more than 2 basic blocks between entry and exit. Given + that BB has an edge to exit, determine if a predecessor + of BB has an edge from entry. If so, compute the number + of instructions in the predecessor block. If there + happen to be multiple such blocks, compute the minimum. */ + min_prev_count = 4; + FOR_EACH_EDGE (e, ei, bb->preds) + { + edge prev_e; + edge_iterator prev_ei; + + if (e->src == ENTRY_BLOCK_PTR) + { + min_prev_count = 0; + break; + } + FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) + { + if (prev_e->src == ENTRY_BLOCK_PTR) + { + int count = ix86_count_insn_bb (e->src); + if (count < min_prev_count) + min_prev_count = count; + break; + } + } + } + + if (min_prev_count < 4) + min_prev_count += ix86_count_insn_bb (bb); + + return min_prev_count; +} + +/* Pad short funtion to 4 instructions. */ + +static void +ix86_pad_short_function (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + { + rtx ret = BB_END (e->src); + if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN) + { + int insn_count = ix86_count_insn (e->src); + + /* Pad short function. */ + if (insn_count < 4) + { + rtx insn = ret; + + /* Find epilogue. */ + while (insn + && (!NOTE_P (insn) + || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) + insn = PREV_INSN (insn); + + if (!insn) + insn = ret; + + /* Two NOPs are counted as one instruction. */ + insn_count = 2 * (4 - insn_count); + emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); + } + } + } +} + /* Implement machine specific optimizations. We implement padding of returns for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ static void @@ -27892,7 +28040,9 @@ ix86_reorg (void) { if (optimize && optimize_function_for_speed_p (cfun)) { - if (TARGET_PAD_RETURNS) + if (TARGET_PAD_SHORT_FUNCTION) + ix86_pad_short_function (); + else if (TARGET_PAD_RETURNS) ix86_pad_returns (); #ifdef ASM_OUTPUT_MAX_SKIP_PAD if (TARGET_FOUR_JUMP_LIMIT) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 22dd02b0bb8..b3439bc9f0c 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -90,7 +90,7 @@ enum stringop_alg unrolled_loop }; -#define NAX_STRINGOP_ALGS 4 +#define MAX_STRINGOP_ALGS 4 /* Specify what algorithm to use for stringops on known size. When size is unknown, the UNKNOWN_SIZE alg is used. When size is @@ -107,7 +107,7 @@ struct stringop_algs const struct stringop_strategy { const int max; const enum stringop_alg alg; - } size [NAX_STRINGOP_ALGS]; + } size [MAX_STRINGOP_ALGS]; }; /* Define the specific costs for a given cpu */ @@ -299,6 +299,7 @@ enum ix86_tune_indices { X86_TUNE_USE_BT, X86_TUNE_USE_INCDEC, X86_TUNE_PAD_RETURNS, + X86_TUNE_PAD_SHORT_FUNCTION, X86_TUNE_EXT_80387_CONSTANTS, X86_TUNE_SHORTEN_X87_SSE, X86_TUNE_AVOID_VECTOR_DECODE, @@ -385,6 +386,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] #define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS] +#define TARGET_PAD_SHORT_FUNCTION \ + ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION] #define TARGET_EXT_80387_CONSTANTS \ ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS] #define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE] @@ -671,9 +674,8 @@ enum target_cpu_default /* Width of a word, in units (bytes). */ #define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) -#ifdef IN_LIBGCC2 -#define MIN_UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) -#else + +#ifndef IN_LIBGCC2 #define MIN_UNITS_PER_WORD 4 #endif @@ -860,8 +862,8 @@ enum target_cpu_default #define STACK_REGS #define IS_STACK_MODE(MODE) \ - (((MODE) == SFmode && (!TARGET_SSE || !TARGET_SSE_MATH)) \ - || ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH)) \ + (((MODE) == SFmode && !(TARGET_SSE && TARGET_SSE_MATH)) \ + || ((MODE) == DFmode && !(TARGET_SSE2 && TARGET_SSE_MATH)) \ || (MODE) == XFmode) /* Cover class containing the stack registers. */ @@ -976,8 +978,7 @@ enum target_cpu_default Actually there are no two word move instructions for consecutive registers. And only registers 0-3 may have mov byte instructions - applied to them. - */ + applied to them. */ #define HARD_REGNO_NREGS(REGNO, MODE) \ (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \ @@ -1184,7 +1185,8 @@ enum reg_class NON_Q_REGS, /* %esi %edi %ebp %esp */ INDEX_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp */ LEGACY_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp */ - GENERAL_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp %r8 - %r15*/ + GENERAL_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp + %r8 %r9 %r10 %r11 %r12 %r13 %r14 %r15 */ FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */ FLOAT_REGS, SSE_FIRST_REG, @@ -1413,10 +1415,13 @@ enum reg_class /* On the 80386, this is the size of MODE in words, except in the FP regs, where a single reg is always enough. */ #define CLASS_MAX_NREGS(CLASS, MODE) \ - (!MAYBE_INTEGER_CLASS_P (CLASS) \ - ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \ - : (((((MODE) == XFmode ? 12 : GET_MODE_SIZE (MODE))) \ - + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + (MAYBE_INTEGER_CLASS_P (CLASS) \ + ? ((MODE) == XFmode \ + ? (TARGET_64BIT ? 2 : 3) \ + : (MODE) == XCmode \ + ? (TARGET_64BIT ? 4 : 6) \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) \ + : (COMPLEX_MODE_P (MODE) ? 2 : 1)) /* Return a class of registers that cannot change FROM mode to TO mode. */ @@ -1750,7 +1755,7 @@ typedef struct ix86_args { /* MOVE_MAX_PIECES is the number of bytes at a time which we can move efficiently, as opposed to MOVE_MAX which is the maximum number of bytes we can move with a single instruction. */ -#define MOVE_MAX_PIECES (TARGET_64BIT ? 8 : 4) +#define MOVE_MAX_PIECES UNITS_PER_WORD /* If a memory-to-memory move would take MOVE_RATIO or more simple move-instruction pairs, we will do a movmem or libcall instead. @@ -1995,18 +2000,12 @@ do { \ #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ ix86_output_addr_diff_elt ((FILE), (VALUE), (REL)) -/* When we see %v, we will print the 'v' prefix if TARGET_AVX is - true. */ +/* When we see %v, we will print the 'v' prefix if TARGET_AVX is true. */ #define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR) \ { \ if ((PTR)[0] == '%' && (PTR)[1] == 'v') \ - { \ - if (TARGET_AVX) \ - (PTR) += 1; \ - else \ - (PTR) += 2; \ - } \ + (PTR) += TARGET_AVX ? 1 : 2; \ } /* A C statement or statements which output an assembler instruction diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ec43793b951..c541c1485c4 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -247,6 +247,7 @@ UNSPECV_LOCK UNSPECV_PROLOGUE_USE UNSPECV_CLD + UNSPECV_NOPS UNSPECV_VZEROALL UNSPECV_VZEROUPPER UNSPECV_RDTSC @@ -1639,7 +1640,7 @@ [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] { - split_di (&operands[1], 1, &operands[2], &operands[3]); + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, @@ -1656,7 +1657,7 @@ [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] { - split_di (&operands[1], 1, &operands[2], &operands[3]); + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, @@ -2049,7 +2050,7 @@ && !x86_64_immediate_operand (operands[1], DImode) && 1" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (&operands[0], 2, &operands[2], &operands[4]);") + "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);") (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -2060,7 +2061,7 @@ && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] - "split_di (&operands[0], 2, &operands[2], &operands[4]);") + "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);") (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" @@ -3597,7 +3598,7 @@ (zero_extend:DI (match_dup 0)))] "TARGET_64BIT" [(set (match_dup 4) (const_int 0))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") ;; %%% Kill me once multi-word ops are sane. (define_insn "zero_extendsidi2_1" @@ -3625,7 +3626,7 @@ "!TARGET_64BIT && reload_completed && true_regnum (operands[0]) == true_regnum (operands[1])" [(set (match_dup 4) (const_int 0))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split [(set (match_operand:DI 0 "nonimmediate_operand" "") @@ -3635,7 +3636,7 @@ && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_insn "zero_extend<mode>di2" [(set (match_operand:DI 0 "register_operand" "=r") @@ -3800,7 +3801,7 @@ (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 4) (match_dup 1))] - "split_di (&operands[0], 1, &operands[3], &operands[4]);") + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") ;; Extend to memory case when source register does not die. (define_split @@ -3811,7 +3812,7 @@ "reload_completed" [(const_int 0)] { - split_di (&operands[0], 1, &operands[3], &operands[4]); + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); emit_move_insn (operands[3], operands[1]); @@ -3841,7 +3842,7 @@ "reload_completed" [(const_int 0)] { - split_di (&operands[0], 1, &operands[3], &operands[4]); + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); if (true_regnum (operands[3]) != true_regnum (operands[1])) emit_move_insn (operands[3], operands[1]); @@ -5569,7 +5570,7 @@ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)))) (clobber (reg:CC FLAGS_REG))])] - "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);") + "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") (define_insn "*add<mode>3_cc" [(set (reg:CC FLAGS_REG) @@ -6599,7 +6600,7 @@ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)))) (clobber (reg:CC FLAGS_REG))])] - "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);") + "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") (define_insn "*sub<mode>_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") @@ -8592,7 +8593,7 @@ [(set (match_dup 2) (neg:DWIH (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "split_<dwi> (&operands[0], 2, &operands[0], &operands[2]);") + "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);") (define_insn "*neg<mode>2_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") @@ -10071,7 +10072,7 @@ { operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); - split_<dwi> (&operands[0], 1, &operands[4], &operands[5]); + split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]); }) (define_insn_and_split "ix86_rotr<dwi>3_doubleword" @@ -10099,7 +10100,7 @@ { operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); - split_<dwi> (&operands[0], 1, &operands[4], &operands[5]); + split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]); }) (define_insn "*<rotate_insn><mode>3_1" @@ -11465,6 +11466,25 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) +;; Generate nops. Operand 0 is the number of nops, up to 8. +(define_insn "nops" + [(unspec_volatile [(match_operand 0 "const_int_operand" "")] + UNSPECV_NOPS)] + "reload_completed" +{ + int num = INTVAL (operands[0]); + + gcc_assert (num >= 1 && num <= 8); + + while (num--) + fputs ("\tnop\n", asm_out_file); + + return ""; +} + [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + ;; Pad to 16-byte boundary, max skip in op0. Used to avoid ;; branch prediction penalty for the third jump in a 16-byte ;; block on K8. @@ -11495,7 +11515,7 @@ (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - { return output_set_got (operands[0], NULL_RTX); } + "* return output_set_got (operands[0], NULL_RTX);" [(set_attr "type" "multi") (set_attr "length" "12")]) @@ -11505,7 +11525,7 @@ UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" - { return output_set_got (operands[0], operands[1]); } + "* return output_set_got (operands[0], operands[1]);" [(set_attr "type" "multi") (set_attr "length" "12")]) @@ -14960,18 +14980,65 @@ DONE; }) -(define_expand "signbit<mode>2" +(define_expand "signbitxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + DONE; +}) + +(define_insn "movmsk_df" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH" + "%vmovmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DF")]) + +;; Use movmskpd in SSE mode to avoid store forwarding stall +;; for 32bit targets and movq+shrq sequence for 64bit targets. +(define_expand "signbitdf2" [(use (match_operand:SI 0 "register_operand" "")) - (use (match_operand:X87MODEF 1 "register_operand" ""))] + (use (match_operand:DF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" + || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { - rtx mask = GEN_INT (0x0200); + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH) + { + emit_insn (gen_movmsk_df (operands[0], operands[1])); + emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); + } + else + { + rtx scratch = gen_reg_rtx (HImode); + emit_insn (gen_fxamdf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + } + DONE; +}) + +(define_expand "signbitsf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" +{ rtx scratch = gen_reg_rtx (HImode); - emit_insn (gen_fxam<mode>2_i387 (scratch, operands[1])); - emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask)); + emit_insn (gen_fxamsf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); DONE; }) @@ -16021,8 +16088,8 @@ (match_dup 7) (match_dup 8)))] { - split_di (&operands[2], 2, &operands[5], &operands[7]); - split_di (&operands[0], 1, &operands[2], &operands[3]); + split_double_mode (DImode, &operands[2], 2, &operands[5], &operands[7]); + split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]); }) (define_insn "*movxfcc_1" @@ -16179,10 +16246,10 @@ ;; ;; in proper program order. -(define_insn "pro_epilogue_adjust_stack_<mode>_1" +(define_insn "pro_epilogue_adjust_stack_<mode>_add" [(set (match_operand:P 0 "register_operand" "=r,r") (plus:P (match_operand:P 1 "register_operand" "0,r") - (match_operand:P 2 "<immediate_operand>" "<i>,<i>"))) + (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] "" @@ -16222,52 +16289,24 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) -(define_insn "pro_epilogue_adjust_stack_di_2" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (plus:DI (match_operand:DI 1 "register_operand" "0,r") - (match_operand:DI 3 "immediate_operand" "i,i"))) - (use (match_operand:DI 2 "register_operand" "r,l")) +(define_insn "pro_epilogue_adjust_stack_<mode>_sub" + [(set (match_operand:P 0 "register_operand" "=r") + (minus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] - "TARGET_64BIT" -{ - switch (get_attr_type (insn)) - { - case TYPE_ALU: - return "add{q}\t{%2, %0|%0, %2}"; - - case TYPE_LEA: - operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]); - return "lea{q}\t{%a2, %0|%0, %a2}"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "alu,lea") - (set_attr "mode" "DI")]) - -(define_insn "allocate_stack_worker_32" - [(set (match_operand:SI 0 "register_operand" "=a") - (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")] - UNSPECV_STACK_PROBE)) - (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && ix86_target_stack_probe ()" - "call\t___chkstk" - [(set_attr "type" "multi") - (set_attr "length" "5")]) + "" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) -(define_insn "allocate_stack_worker_64" - [(set (match_operand:DI 0 "register_operand" "=a") - (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")] +(define_insn "allocate_stack_worker_probe_<mode>" + [(set (match_operand:P 0 "register_operand" "=a") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] UNSPECV_STACK_PROBE)) - (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1))) - (clobber (reg:DI R10_REG)) - (clobber (reg:DI R11_REG)) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_target_stack_probe ()" - "call\t___chkstk" + "ix86_target_stack_probe ()" + "call\t___chkstk_ms" [(set_attr "type" "multi") (set_attr "length" "5")]) @@ -16292,15 +16331,15 @@ } else { - rtx (*gen_allocate_stack_worker) (rtx, rtx); - + x = copy_to_mode_reg (Pmode, operands[1]); if (TARGET_64BIT) - gen_allocate_stack_worker = gen_allocate_stack_worker_64; + emit_insn (gen_allocate_stack_worker_probe_di (x, x)); else - gen_allocate_stack_worker = gen_allocate_stack_worker_32; - - x = copy_to_mode_reg (Pmode, operands[1]); - emit_insn (gen_allocate_stack_worker (x, x)); + emit_insn (gen_allocate_stack_worker_probe_si (x, x)); + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x, + stack_pointer_rtx, 0, OPTAB_DIRECT); + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); } emit_move_insn (operands[0], virtual_stack_dynamic_rtx); diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index aa78cdfaa26..38a53f616c7 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -19,6 +19,10 @@ ; along with GCC; see the file COPYING3. If not see ; <http://www.gnu.org/licenses/>. +; Bit flags that specify the ISA we are compiling for. +Variable +int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT + ;; Definitions to add to the cl_target_option structure ;; -march= processor TargetSave diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming index 0a65ffd99d9..183e545d8eb 100644 --- a/gcc/config/i386/t-cygming +++ b/gcc/config/i386/t-cygming @@ -17,7 +17,7 @@ # <http://www.gnu.org/licenses/>. LIB1ASMSRC = i386/cygwin.asm -LIB1ASMFUNCS = _chkstk +LIB1ASMFUNCS = _chkstk _chkstk_ms # cygwin and mingw always have a limits.h, but, depending upon how we are # doing the build, it may not be installed yet. diff --git a/gcc/config/i386/t-cygwin b/gcc/config/i386/t-cygwin index af91aa5cdb6..a01219cfb9d 100644 --- a/gcc/config/i386/t-cygwin +++ b/gcc/config/i386/t-cygwin @@ -1,4 +1,4 @@ -# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009 +# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009, 2010 # Free Software Foundation, Inc. # # This file is part of GCC. @@ -24,7 +24,7 @@ LIBGCC2_INCLUDES += -I$(srcdir)/../winsup/include \ -I$(srcdir)/../winsup/cygwin/include cygwin1.o: $(srcdir)/config/i386/cygwin1.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - $(TM_H) $(TM_P_H) + $(TM_H) $(TM_P_H) opts.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/i386/cygwin1.c diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix index 9a25831f135..30539e2e13f 100644 --- a/gcc/config/i386/t-interix +++ b/gcc/config/i386/t-interix @@ -1,5 +1,5 @@ LIB1ASMSRC = i386/cygwin.asm -LIB1ASMFUNCS = _chkstk +LIB1ASMFUNCS = _chkstk _chkstk_ms winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \ diff --git a/gcc/config/i386/vx-common.h b/gcc/config/i386/vx-common.h index cc7ea75ce40..f4547f08848 100644 --- a/gcc/config/i386/vx-common.h +++ b/gcc/config/i386/vx-common.h @@ -24,3 +24,9 @@ along with GCC; see the file COPYING3. If not see #define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \ ix86_solaris_return_in_memory (TYPE, FNTYPE) + +/* Provide our target specific DBX_REGISTER_NUMBER, as advertised by the + common svr4.h. VxWorks relies on the SVR4 numbering. */ + +#undef DBX_REGISTER_NUMBER +#define DBX_REGISTER_NUMBER(n) svr4_dbx_register_map[n] |