summaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2010-07-23 19:37:40 +0000
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>2010-07-23 19:37:40 +0000
commit10ada81fea4490f94ba2eb5923bf5baa367a38bd (patch)
tree437dca120093cc7b1f6debf6f6b31779526c7192 /gcc/config/i386
parent95a236de8aa10bf009e9368dfd28f95a980e5570 (diff)
parent3bd7a983695352a99f7dd597725eb5b839d4b4cf (diff)
downloadgcc-10ada81fea4490f94ba2eb5923bf5baa367a38bd.tar.gz
Merged with trunk at revision 162480.ifunc
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/ifunc@162483 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/cpuid.h4
-rw-r--r--gcc/config/i386/cygming-crtbegin.c4
-rw-r--r--gcc/config/i386/cygming-crtend.c4
-rw-r--r--gcc/config/i386/cygming.h5
-rw-r--r--gcc/config/i386/cygwin.h2
-rw-r--r--gcc/config/i386/darwin.h8
-rw-r--r--gcc/config/i386/freebsd.h3
-rw-r--r--gcc/config/i386/i386-builtin-types.def6
-rw-r--r--gcc/config/i386/i386-c.c6
-rw-r--r--gcc/config/i386/i386-protos.h8
-rw-r--r--gcc/config/i386/i386.c1732
-rw-r--r--gcc/config/i386/i386.h27
-rw-r--r--gcc/config/i386/i386.md517
-rw-r--r--gcc/config/i386/i386.opt12
-rw-r--r--gcc/config/i386/immintrin.h144
-rw-r--r--gcc/config/i386/linux.h3
-rw-r--r--gcc/config/i386/linux64.h3
-rw-r--r--gcc/config/i386/mingw32.h2
-rw-r--r--gcc/config/i386/netware.c1
-rw-r--r--gcc/config/i386/nwld.c1
-rw-r--r--gcc/config/i386/sol2-10.h4
-rw-r--r--gcc/config/i386/sol2-unwind.h113
-rw-r--r--gcc/config/i386/sol2.h7
-rw-r--r--gcc/config/i386/sse.md78
-rw-r--r--gcc/config/i386/vx-common.h6
-rw-r--r--gcc/config/i386/winnt-cxx.c15
-rw-r--r--gcc/config/i386/winnt-stubs.c1
-rw-r--r--gcc/config/i386/winnt.c3
28 files changed, 1809 insertions, 910 deletions
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index a9d90a68412..11c2f1e2662 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -35,6 +35,8 @@
#define bit_XSAVE (1 << 26)
#define bit_OSXSAVE (1 << 27)
#define bit_AVX (1 << 28)
+#define bit_F16C (1 << 29)
+#define bit_RDRND (1 << 30)
/* %edx */
#define bit_CMPXCHG8B (1 << 8)
@@ -58,6 +60,8 @@
#define bit_3DNOWP (1 << 30)
#define bit_3DNOW (1 << 31)
+/* Extended Features (%eax == 7) */
+#define bit_FSGSBASE (1 << 0)
#if defined(__i386__) && defined(__PIC__)
/* %ebx may be the PIC register. */
diff --git a/gcc/config/i386/cygming-crtbegin.c b/gcc/config/i386/cygming-crtbegin.c
index 367a4bbff51..73043ad0dcd 100644
--- a/gcc/config/i386/cygming-crtbegin.c
+++ b/gcc/config/i386/cygming-crtbegin.c
@@ -64,7 +64,7 @@ extern void _Jv_RegisterClasses (const void *) TARGET_ATTRIBUTE_WEAK;
register/deregister it with the exception handling library code. */
#if DWARF2_UNWIND_INFO
static EH_FRAME_SECTION_CONST char __EH_FRAME_BEGIN__[]
- __attribute__((section(EH_FRAME_SECTION_NAME), aligned(4)))
+ __attribute__((used, section(EH_FRAME_SECTION_NAME), aligned(4)))
= { };
static struct object obj;
@@ -72,7 +72,7 @@ static struct object obj;
#if TARGET_USE_JCR_SECTION
static void *__JCR_LIST__[]
- __attribute__ ((unused, section(JCR_SECTION_NAME), aligned(4)))
+ __attribute__ ((used, section(JCR_SECTION_NAME), aligned(4)))
= { };
#endif
diff --git a/gcc/config/i386/cygming-crtend.c b/gcc/config/i386/cygming-crtend.c
index 8c853bfa732..068f4b63613 100644
--- a/gcc/config/i386/cygming-crtend.c
+++ b/gcc/config/i386/cygming-crtend.c
@@ -47,7 +47,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
this would be the 'length' field in a real FDE. */
static EH_FRAME_SECTION_CONST int __FRAME_END__[]
- __attribute__ ((unused, section(EH_FRAME_SECTION_NAME),
+ __attribute__ ((used, section(EH_FRAME_SECTION_NAME),
aligned(4)))
= { 0 };
#endif
@@ -55,7 +55,7 @@ static EH_FRAME_SECTION_CONST int __FRAME_END__[]
#if TARGET_USE_JCR_SECTION
/* Null terminate the .jcr section array. */
static void *__JCR_END__[1]
- __attribute__ ((unused, section(JCR_SECTION_NAME),
+ __attribute__ ((used, section(JCR_SECTION_NAME),
aligned(sizeof(void *))))
= { 0 };
#endif
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index f2b70afa447..1587af472cc 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -276,7 +276,7 @@ do { \
i386_pe_maybe_record_exported_symbol (DECL, NAME, 0); \
if (write_symbols != SDB_DEBUG) \
i386_pe_declare_function_type (FILE, NAME, TREE_PUBLIC (DECL)); \
- ASM_OUTPUT_LABEL (FILE, NAME); \
+ ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL); \
} \
while (0)
@@ -429,6 +429,9 @@ do { \
#define TARGET_CXX_ADJUST_CLASS_AT_DEFINITION i386_pe_adjust_class_at_definition
#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_pe_mangle_decl_assembler_name
+/* Static stack checking is supported by means of probes. */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
#undef TREE
#ifndef BUFSIZ
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
index c365f6603cd..8a637e80ae5 100644
--- a/gcc/config/i386/cygwin.h
+++ b/gcc/config/i386/cygwin.h
@@ -301,5 +301,5 @@ while (0)
#define LIBGCC_SONAME "cyggcc_s" LIBGCC_EH_EXTN "-1.dll"
/* We should find a way to not have to update this manually. */
-#define LIBGCJ_SONAME "cyggcj" /*LIBGCC_EH_EXTN*/ "-11.dll"
+#define LIBGCJ_SONAME "cyggcj" /*LIBGCC_EH_EXTN*/ "-12.dll"
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
index d26ef2d422d..7917f2fc18f 100644
--- a/gcc/config/i386/darwin.h
+++ b/gcc/config/i386/darwin.h
@@ -60,6 +60,12 @@ along with GCC; see the file COPYING3. If not see
#undef WCHAR_TYPE_SIZE
#define WCHAR_TYPE_SIZE 32
+/* Generate branch islands stubs if this is true. */
+extern int darwin_emit_branch_islands;
+
+#undef TARGET_MACHO_BRANCH_ISLANDS
+#define TARGET_MACHO_BRANCH_ISLANDS darwin_emit_branch_islands
+
#undef MAX_BITS_PER_WORD
#define MAX_BITS_PER_WORD 64
@@ -225,7 +231,7 @@ along with GCC; see the file COPYING3. If not see
#undef FUNCTION_PROFILER
#define FUNCTION_PROFILER(FILE, LABELNO) \
do { \
- if (MACHOPIC_INDIRECT && !TARGET_64BIT) \
+ if (TARGET_MACHO_BRANCH_ISLANDS && MACHOPIC_INDIRECT && !TARGET_64BIT) \
{ \
const char *name = machopic_mcount_stub_name (); \
fprintf (FILE, "\tcall %s\n", name+1); /* skip '&' */ \
diff --git a/gcc/config/i386/freebsd.h b/gcc/config/i386/freebsd.h
index 69f5e0f304c..c616a205d2c 100644
--- a/gcc/config/i386/freebsd.h
+++ b/gcc/config/i386/freebsd.h
@@ -138,3 +138,6 @@ along with GCC; see the file COPYING3. If not see
compiler get the contents of <float.h> and std::numeric_limits correct. */
#undef TARGET_96_ROUND_53_LONG_DOUBLE
#define TARGET_96_ROUND_53_LONG_DOUBLE (!TARGET_64BIT)
+
+/* Static stack checking is supported by means of probes. */
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 10310e233cb..09dd9ebca11 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -128,6 +128,7 @@ DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
DEF_FUNCTION_TYPE (FLOAT128)
DEF_FUNCTION_TYPE (UINT64)
DEF_FUNCTION_TYPE (UNSIGNED)
+DEF_FUNCTION_TYPE (UINT16)
DEF_FUNCTION_TYPE (VOID)
DEF_FUNCTION_TYPE (PVOID)
@@ -179,6 +180,7 @@ DEF_FUNCTION_TYPE (V4SF, V4DF)
DEF_FUNCTION_TYPE (V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, V4SI)
DEF_FUNCTION_TYPE (V4SF, V8SF)
+DEF_FUNCTION_TYPE (V4SF, V8HI)
DEF_FUNCTION_TYPE (V4SI, V16QI)
DEF_FUNCTION_TYPE (V4SI, V2DF)
DEF_FUNCTION_TYPE (V4SI, V4DF)
@@ -194,10 +196,12 @@ DEF_FUNCTION_TYPE (V8SF, PCV4SF)
DEF_FUNCTION_TYPE (V8SF, V4SF)
DEF_FUNCTION_TYPE (V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SI)
+DEF_FUNCTION_TYPE (V8SF, V8HI)
DEF_FUNCTION_TYPE (V8SI, V4SI)
DEF_FUNCTION_TYPE (V8SI, V8SF)
DEF_FUNCTION_TYPE (VOID, PCVOID)
DEF_FUNCTION_TYPE (VOID, PVOID)
+DEF_FUNCTION_TYPE (VOID, UINT64)
DEF_FUNCTION_TYPE (VOID, UNSIGNED)
DEF_FUNCTION_TYPE (DI, V2DI, INT)
@@ -282,6 +286,8 @@ DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, INT)
DEF_FUNCTION_TYPE (V8HI, V8HI, SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V8HI, V8SF, INT)
+DEF_FUNCTION_TYPE (V8HI, V4SF, INT)
DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI)
DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 1b89a0b428f..c03c8515bc3 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -246,6 +246,12 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__ABM__");
if (isa_flag & OPTION_MASK_ISA_POPCNT)
def_or_undef (parse_in, "__POPCNT__");
+ if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
+ def_or_undef (parse_in, "__FSGSBASE__");
+ if (isa_flag & OPTION_MASK_ISA_RDRND)
+ def_or_undef (parse_in, "__RDRND__");
+ if (isa_flag & OPTION_MASK_ISA_F16C)
+ def_or_undef (parse_in, "__F16C__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
def_or_undef (parse_in, "__SSE_MATH__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4a0e3062212..23938b882c6 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -24,6 +24,7 @@ extern void override_options (bool);
extern void optimization_options (int, int);
extern void ix86_conditional_register_usage (void);
+extern bool ix86_target_stack_probe (void);
extern int ix86_can_use_return_insn_p (void);
extern void ix86_setup_frame_addresses (void);
@@ -71,6 +72,8 @@ extern const char *output_387_binary_op (rtx, rtx*);
extern const char *output_387_reg_move (rtx, rtx*);
extern const char *output_fix_trunc (rtx, rtx*, int);
extern const char *output_fp_compare (rtx, rtx*, int, int);
+extern const char *output_adjust_stack_and_probe (rtx);
+extern const char *output_probe_stack_range (rtx, rtx);
extern void ix86_expand_clear (rtx);
extern void ix86_expand_move (enum machine_mode, rtx[]);
@@ -136,8 +139,9 @@ extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
extern rtx ix86_libcall_value (enum machine_mode);
extern bool ix86_function_arg_regno_p (int);
-extern int ix86_function_arg_boundary (enum machine_mode, tree);
-extern bool ix86_sol10_return_in_memory (const_tree,const_tree);
+extern void ix86_asm_output_function_label (FILE *, const char *, tree);
+extern int ix86_function_arg_boundary (enum machine_mode, const_tree);
+extern bool ix86_solaris_return_in_memory (const_tree, const_tree);
extern rtx ix86_force_to_memory (enum machine_mode, rtx);
extern void ix86_free_from_memory (enum machine_mode);
extern enum calling_abi ix86_cfun_abi (void);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4f5afbec0a1..596a6db22ad 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
#include "recog.h"
#include "expr.h"
#include "optabs.h"
+#include "diagnostic-core.h"
#include "toplev.h"
#include "basic-block.h"
#include "ggc.h"
@@ -82,21 +83,21 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (2), /* variable shift costs */
COSTS_N_BYTES (3), /* constant shift costs */
{COSTS_N_BYTES (3), /* cost of starting multiply for QI */
- COSTS_N_BYTES (3), /* HI */
- COSTS_N_BYTES (3), /* SI */
- COSTS_N_BYTES (3), /* DI */
- COSTS_N_BYTES (5)}, /* other */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
- COSTS_N_BYTES (3), /* HI */
- COSTS_N_BYTES (3), /* SI */
- COSTS_N_BYTES (3), /* DI */
- COSTS_N_BYTES (5)}, /* other */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
COSTS_N_BYTES (3), /* cost of movsx */
COSTS_N_BYTES (3), /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -132,17 +133,17 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 1, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 1, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 1, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 1, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
/* Processor costs (relative to an add) */
@@ -153,21 +154,21 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (3), /* variable shift costs */
COSTS_N_INSNS (2), /* constant shift costs */
{COSTS_N_INSNS (6), /* cost of starting multiply for QI */
- COSTS_N_INSNS (6), /* HI */
- COSTS_N_INSNS (6), /* SI */
- COSTS_N_INSNS (6), /* DI */
- COSTS_N_INSNS (6)}, /* other */
+ COSTS_N_INSNS (6), /* HI */
+ COSTS_N_INSNS (6), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
COSTS_N_INSNS (1), /* cost of multiply per each bit set */
{COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (23), /* HI */
- COSTS_N_INSNS (23), /* SI */
- COSTS_N_INSNS (23), /* DI */
- COSTS_N_INSNS (23)}, /* other */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (23), /* SI */
+ COSTS_N_INSNS (23), /* DI */
+ COSTS_N_INSNS (23)}, /* other */
COSTS_N_INSNS (3), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -203,17 +204,17 @@ struct processor_costs i386_cost = { /* 386 specific costs */
DUMMY_STRINGOP_ALGS},
{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -223,21 +224,21 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (3), /* variable shift costs */
COSTS_N_INSNS (2), /* constant shift costs */
{COSTS_N_INSNS (12), /* cost of starting multiply for QI */
- COSTS_N_INSNS (12), /* HI */
- COSTS_N_INSNS (12), /* SI */
- COSTS_N_INSNS (12), /* DI */
- COSTS_N_INSNS (12)}, /* other */
+ COSTS_N_INSNS (12), /* HI */
+ COSTS_N_INSNS (12), /* SI */
+ COSTS_N_INSNS (12), /* DI */
+ COSTS_N_INSNS (12)}, /* other */
1, /* cost of multiply per each bit set */
{COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (40), /* HI */
- COSTS_N_INSNS (40), /* SI */
- COSTS_N_INSNS (40), /* DI */
- COSTS_N_INSNS (40)}, /* other */
+ COSTS_N_INSNS (40), /* HI */
+ COSTS_N_INSNS (40), /* SI */
+ COSTS_N_INSNS (40), /* DI */
+ COSTS_N_INSNS (40)}, /* other */
COSTS_N_INSNS (3), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -275,17 +276,17 @@ struct processor_costs i486_cost = { /* 486 specific costs */
DUMMY_STRINGOP_ALGS},
{{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -295,21 +296,21 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (4), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (11), /* cost of starting multiply for QI */
- COSTS_N_INSNS (11), /* HI */
- COSTS_N_INSNS (11), /* SI */
- COSTS_N_INSNS (11), /* DI */
- COSTS_N_INSNS (11)}, /* other */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (11), /* DI */
+ COSTS_N_INSNS (11)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (25), /* HI */
- COSTS_N_INSNS (25), /* SI */
- COSTS_N_INSNS (25), /* DI */
- COSTS_N_INSNS (25)}, /* other */
+ COSTS_N_INSNS (25), /* HI */
+ COSTS_N_INSNS (25), /* SI */
+ COSTS_N_INSNS (25), /* DI */
+ COSTS_N_INSNS (25)}, /* other */
COSTS_N_INSNS (3), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -345,17 +346,17 @@ struct processor_costs pentium_cost = {
DUMMY_STRINGOP_ALGS},
{{libcall, {{-1, rep_prefix_4_byte}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -365,21 +366,21 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (4), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (4), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (4)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (17), /* HI */
- COSTS_N_INSNS (17), /* SI */
- COSTS_N_INSNS (17), /* DI */
- COSTS_N_INSNS (17)}, /* other */
+ COSTS_N_INSNS (17), /* HI */
+ COSTS_N_INSNS (17), /* SI */
+ COSTS_N_INSNS (17), /* DI */
+ COSTS_N_INSNS (17)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -411,28 +412,28 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
- /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
- the alignment). For small blocks inline loop is still a noticeable win, for bigger
- blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
- more expensive startup time in CPU, but after 4K the difference is down in the noise.
- */
+ /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+ (we ensure the alignment). For small blocks inline loop is still a
+ noticeable win, for bigger blocks either rep movsl or rep movsb is
+ way to go. Rep movsb has apparently more expensive startup time in CPU,
+ but after 4K the difference is down in the noise. */
{{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
{8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
DUMMY_STRINGOP_ALGS},
{{rep_prefix_4_byte, {{1024, unrolled_loop},
- {8192, rep_prefix_4_byte}, {-1, libcall}}},
+ {8192, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -442,21 +443,21 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (2), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (7), /* SI */
- COSTS_N_INSNS (7), /* DI */
- COSTS_N_INSNS (7)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (7), /* SI */
+ COSTS_N_INSNS (7), /* DI */
+ COSTS_N_INSNS (7)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (23), /* HI */
- COSTS_N_INSNS (39), /* SI */
- COSTS_N_INSNS (39), /* DI */
- COSTS_N_INSNS (39)}, /* other */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (39), /* SI */
+ COSTS_N_INSNS (39), /* DI */
+ COSTS_N_INSNS (39)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
- 1, /* cost for loading QImode using movzbl */
+ 1, /* cost for loading QImode using movzbl */
{1, 1, 1}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -493,17 +494,17 @@ struct processor_costs geode_cost = {
DUMMY_STRINGOP_ALGS},
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -513,21 +514,21 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (3), /* DI */
- COSTS_N_INSNS (3)}, /* other */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (18), /* HI */
- COSTS_N_INSNS (18), /* SI */
- COSTS_N_INSNS (18), /* DI */
- COSTS_N_INSNS (18)}, /* other */
+ COSTS_N_INSNS (18), /* HI */
+ COSTS_N_INSNS (18), /* SI */
+ COSTS_N_INSNS (18), /* DI */
+ COSTS_N_INSNS (18)}, /* other */
COSTS_N_INSNS (2), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
- 3, /* cost for loading QImode using movzbl */
+ 3, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -566,17 +567,17 @@ struct processor_costs k6_cost = {
DUMMY_STRINGOP_ALGS},
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -586,21 +587,21 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (5), /* cost of starting multiply for QI */
- COSTS_N_INSNS (5), /* HI */
- COSTS_N_INSNS (5), /* SI */
- COSTS_N_INSNS (5), /* DI */
- COSTS_N_INSNS (5)}, /* other */
+ COSTS_N_INSNS (5), /* HI */
+ COSTS_N_INSNS (5), /* SI */
+ COSTS_N_INSNS (5), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -639,17 +640,17 @@ struct processor_costs athlon_cost = {
DUMMY_STRINGOP_ALGS},
{{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -659,21 +660,21 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -710,48 +711,48 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
- /* K8 has optimized REP instruction for medium sized blocks, but for very small
- blocks it is better to use loop. For large blocks, libcall can do
- nontemporary accesses and beat inline considerably. */
+ /* K8 has optimized REP instruction for medium sized blocks, but for very
+ small blocks it is better to use loop. For large blocks, libcall can
+ do nontemporary accesses and beat inline considerably. */
{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
{libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 4, /* scalar_stmt_cost. */
- 2, /* scalar load_cost. */
- 2, /* scalar_store_cost. */
- 5, /* vec_stmt_cost. */
- 0, /* vec_to_scalar_cost. */
- 2, /* scalar_to_vec_cost. */
- 2, /* vec_align_load_cost. */
- 3, /* vec_unalign_load_cost. */
- 3, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 2, /* cond_not_taken_branch_cost. */
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 5, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 3, /* vec_unalign_load_cost. */
+ 3, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 2, /* cond_not_taken_branch_cost. */
};
struct processor_costs amdfam10_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (35), /* HI */
- COSTS_N_INSNS (51), /* SI */
- COSTS_N_INSNS (83), /* DI */
- COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -772,14 +773,14 @@ struct processor_costs amdfam10_cost = {
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
- /* On K8
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ /* On K8:
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10:
+ MOVD reg64, xmmreg Double FADD 3
+ 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3
+ 1/1 1/1 */
64, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block */
@@ -805,40 +806,40 @@ struct processor_costs amdfam10_cost = {
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 4, /* scalar_stmt_cost. */
- 2, /* scalar load_cost. */
- 2, /* scalar_store_cost. */
- 6, /* vec_stmt_cost. */
- 0, /* vec_to_scalar_cost. */
- 2, /* scalar_to_vec_cost. */
- 2, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 2, /* vec_store_cost. */
- 2, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 2, /* vec_store_cost. */
+ 2, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
struct processor_costs bdver1_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (35), /* HI */
- COSTS_N_INSNS (51), /* SI */
- COSTS_N_INSNS (83), /* DI */
- COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -859,14 +860,14 @@ struct processor_costs bdver1_cost = {
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
- /* On K8
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ /* On K8:
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10:
+ MOVD reg64, xmmreg Double FADD 3
+ 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3
+ 1/1 1/1 */
64, /* size of l1 cache. */
1024, /* size of l2 cache. */
64, /* size of prefetch block */
@@ -885,24 +886,24 @@ struct processor_costs bdver1_cost = {
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
/* BDVER1 has optimized REP instruction for medium sized blocks, but for
- very small blocks it is better to use loop. For large blocks, libcall can
- do nontemporary accesses and beat inline considerably. */
+ very small blocks it is better to use loop. For large blocks, libcall
+ can do nontemporary accesses and beat inline considerably. */
{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
{libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 4, /* scalar_stmt_cost. */
- 2, /* scalar load_cost. */
- 2, /* scalar_store_cost. */
- 6, /* vec_stmt_cost. */
- 0, /* vec_to_scalar_cost. */
- 2, /* scalar_to_vec_cost. */
- 2, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 2, /* vec_store_cost. */
- 2, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 2, /* vec_store_cost. */
+ 2, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -912,21 +913,21 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (4), /* variable shift costs */
COSTS_N_INSNS (4), /* constant shift costs */
{COSTS_N_INSNS (15), /* cost of starting multiply for QI */
- COSTS_N_INSNS (15), /* HI */
- COSTS_N_INSNS (15), /* SI */
- COSTS_N_INSNS (15), /* DI */
- COSTS_N_INSNS (15)}, /* other */
+ COSTS_N_INSNS (15), /* HI */
+ COSTS_N_INSNS (15), /* SI */
+ COSTS_N_INSNS (15), /* DI */
+ COSTS_N_INSNS (15)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (56), /* HI */
- COSTS_N_INSNS (56), /* SI */
- COSTS_N_INSNS (56), /* DI */
- COSTS_N_INSNS (56)}, /* other */
+ COSTS_N_INSNS (56), /* HI */
+ COSTS_N_INSNS (56), /* SI */
+ COSTS_N_INSNS (56), /* DI */
+ COSTS_N_INSNS (56)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -963,17 +964,17 @@ struct processor_costs pentium4_cost = {
{{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -983,21 +984,21 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (10), /* cost of starting multiply for QI */
- COSTS_N_INSNS (10), /* HI */
- COSTS_N_INSNS (10), /* SI */
- COSTS_N_INSNS (10), /* DI */
- COSTS_N_INSNS (10)}, /* other */
+ COSTS_N_INSNS (10), /* HI */
+ COSTS_N_INSNS (10), /* SI */
+ COSTS_N_INSNS (10), /* DI */
+ COSTS_N_INSNS (10)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (66), /* HI */
- COSTS_N_INSNS (66), /* SI */
- COSTS_N_INSNS (66), /* DI */
- COSTS_N_INSNS (66)}, /* other */
+ COSTS_N_INSNS (66), /* HI */
+ COSTS_N_INSNS (66), /* SI */
+ COSTS_N_INSNS (66), /* DI */
+ COSTS_N_INSNS (66)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -1036,17 +1037,17 @@ struct processor_costs nocona_cost = {
{-1, libcall}}},
{libcall, {{24, loop}, {64, unrolled_loop},
{8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -1056,21 +1057,21 @@ struct processor_costs core2_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (3), /* DI */
- COSTS_N_INSNS (3)}, /* other */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (22), /* HI */
- COSTS_N_INSNS (22), /* SI */
- COSTS_N_INSNS (22), /* DI */
- COSTS_N_INSNS (22)}, /* other */
+ COSTS_N_INSNS (22), /* HI */
+ COSTS_N_INSNS (22), /* SI */
+ COSTS_N_INSNS (22), /* DI */
+ COSTS_N_INSNS (22)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
16, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -1109,17 +1110,17 @@ struct processor_costs core2_cost = {
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{24, loop}, {32, unrolled_loop},
{8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
@@ -1129,21 +1130,21 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -1177,22 +1178,22 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
{{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
{libcall, {{32, loop}, {64, rep_prefix_4_byte},
- {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {15, unrolled_loop},
- {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{24, loop}, {32, unrolled_loop},
- {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
/* Generic64 should produce code tuned for Nocona and K8. */
@@ -1207,21 +1208,21 @@ struct processor_costs generic64_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -1246,8 +1247,8 @@ struct processor_costs generic64_cost = {
512, /* size of l2 cache. */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
- /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
- is increased to perhaps more appropriate value of 5. */
+ /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
+ value is increased to perhaps more appropriate value of 5. */
3, /* Branch cost */
COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (8), /* cost of FMUL instruction. */
@@ -1259,20 +1260,21 @@ struct processor_costs generic64_cost = {
{libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{DUMMY_STRINGOP_ALGS,
{libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
-/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
+/* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
+ Athlon and K8. */
static const
struct processor_costs generic32_cost = {
COSTS_N_INSNS (1), /* cost of an add instruction */
@@ -1280,21 +1282,21 @@ struct processor_costs generic32_cost = {
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
@@ -1330,17 +1332,17 @@ struct processor_costs generic32_cost = {
DUMMY_STRINGOP_ALGS},
{{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
const struct processor_costs *ix86_cost = &pentium_cost;
@@ -1906,6 +1908,9 @@ static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
+static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
+static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
+static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
/* Preferred alignment for stack boundary in bits. */
unsigned int ix86_preferred_stack_boundary;
@@ -2080,6 +2085,11 @@ static int ix86_isa_flags_explicit;
#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
+#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_F16C_SET \
+ (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
+
/* Define a set of ISAs which aren't available when a given ISA is
disabled. MMX and SSE ISAs are handled separately. */
@@ -2105,7 +2115,7 @@ static int ix86_isa_flags_explicit;
(OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
#define OPTION_MASK_ISA_AVX_UNSET \
(OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
- | OPTION_MASK_ISA_FMA4_UNSET)
+ | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
@@ -2129,8 +2139,13 @@ static int ix86_isa_flags_explicit;
#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
+#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
+
/* Vectorization library interface and handlers. */
-tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
+static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
+
static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
@@ -2498,6 +2513,45 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
}
return true;
+ case OPT_mfsgsbase:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
+ }
+ return true;
+
+ case OPT_mrdrnd:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
+ }
+ return true;
+
+ case OPT_mf16c:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
+ }
+ return true;
+
default:
return true;
}
@@ -2541,6 +2595,9 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{ "-mcrc32", OPTION_MASK_ISA_CRC32 },
{ "-maes", OPTION_MASK_ISA_AES },
{ "-mpclmul", OPTION_MASK_ISA_PCLMUL },
+ { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
+ { "-mrdrnd", OPTION_MASK_ISA_RDRND },
+ { "-mf16c", OPTION_MASK_ISA_F16C },
};
/* Flag options. */
@@ -2778,7 +2835,10 @@ override_options (bool main_args_p)
PTA_MOVBE = 1 << 20,
PTA_FMA4 = 1 << 21,
PTA_XOP = 1 << 22,
- PTA_LWP = 1 << 23
+ PTA_LWP = 1 << 23,
+ PTA_FSGSBASE = 1 << 24,
+ PTA_RDRND = 1 << 25,
+ PTA_F16C = 1 << 26
};
static struct pta
@@ -3154,6 +3214,15 @@ override_options (bool main_args_p)
if (processor_alias_table[i].flags & PTA_PCLMUL
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
+ if (processor_alias_table[i].flags & PTA_FSGSBASE
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
+ ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
+ if (processor_alias_table[i].flags & PTA_RDRND
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
+ ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
+ if (processor_alias_table[i].flags & PTA_F16C
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
+ ix86_isa_flags |= OPTION_MASK_ISA_F16C;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
@@ -3577,6 +3646,9 @@ override_options (bool main_args_p)
ix86_gen_one_cmpl2 = gen_one_cmpldi2;
ix86_gen_monitor = gen_sse3_monitor64;
ix86_gen_andsp = gen_anddi3;
+ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
+ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
+ ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
}
else
{
@@ -3588,6 +3660,9 @@ override_options (bool main_args_p)
ix86_gen_one_cmpl2 = gen_one_cmplsi2;
ix86_gen_monitor = gen_sse3_monitor;
ix86_gen_andsp = gen_andsi3;
+ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
+ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
+ ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
}
#ifdef USE_IX86_CLD
@@ -3826,6 +3901,9 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
IX86_ATTR_ISA ("fma4", OPT_mfma4),
IX86_ATTR_ISA ("xop", OPT_mxop),
IX86_ATTR_ISA ("lwp", OPT_mlwp),
+ IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
+ IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
+ IX86_ATTR_ISA ("f16c", OPT_mf16c),
/* string options */
IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
@@ -4444,6 +4522,20 @@ optimization_options (int level, int size ATTRIBUTE_UNUSED)
SUBTARGET_OPTIMIZATION_OPTIONS;
#endif
}
+
+/* Decide whether we must probe the stack before any space allocation
+ on this target. It's essentially TARGET_STACK_PROBE except when
+ -fstack-check causes the stack to be already probed differently. */
+
+bool
+ix86_target_stack_probe (void)
+{
+ /* Do not probe the stack twice if static stack checking is enabled. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+ return false;
+
+ return TARGET_STACK_PROBE;
+}
/* Decide whether we can make a sibling call to a function. DECL is the
declaration of the function being targeted by the call and EXP is the
@@ -5009,18 +5101,15 @@ ix86_function_type_abi (const_tree fntype)
static bool
ix86_function_ms_hook_prologue (const_tree fntype)
{
- if (!TARGET_64BIT)
+ if (fntype && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
{
- if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
- {
- if (decl_function_context (fntype) != NULL_TREE)
- {
- error_at (DECL_SOURCE_LOCATION (fntype),
- "ms_hook_prologue is not compatible with nested function");
- }
+ if (decl_function_context (fntype) != NULL_TREE)
+ {
+ error_at (DECL_SOURCE_LOCATION (fntype),
+ "ms_hook_prologue is not compatible with nested function");
+ }
- return true;
- }
+ return true;
}
return false;
}
@@ -5043,6 +5132,40 @@ ix86_cfun_abi (void)
return cfun->machine->call_abi;
}
+/* Write the extra assembler code needed to declare a function properly. */
+
+void
+ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
+ tree decl)
+{
+ bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
+
+ if (is_ms_hook)
+ {
+ int i, filler_count = (TARGET_64BIT ? 32 : 16);
+ unsigned int filler_cc = 0xcccccccc;
+
+ for (i = 0; i < filler_count; i += 4)
+ fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
+ }
+
+ ASM_OUTPUT_LABEL (asm_out_file, fname);
+
+ /* Output magic byte marker, if hot-patch attribute is set.
+ For x86 case frame-pointer prologue will be emitted in
+ expand_prologue. */
+ if (is_ms_hook)
+ {
+ if (TARGET_64BIT)
+ /* leaq [%rsp + 0], %rsp */
+ asm_fprintf (asm_out_file, ASM_BYTE
+ "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
+ else
+ /* movl.s %edi, %edi. */
+ asm_fprintf (asm_out_file, ASM_BYTE "0x8b, 0xff\n");
+ }
+}
+
/* regclass.c */
extern void init_regs (void);
@@ -5352,7 +5475,7 @@ classify_argument (enum machine_mode mode, const_tree type,
{
case RECORD_TYPE:
/* And now merge the fields of structure. */
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
@@ -5440,7 +5563,7 @@ classify_argument (enum machine_mode mode, const_tree type,
case QUAL_UNION_TYPE:
/* Unions are similar to RECORD_TYPE but offset is always 0.
*/
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
@@ -6034,9 +6157,8 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
if (!named && VALID_AVX256_REG_MODE (mode))
return;
- if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
- cum->words += words;
- else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
+ if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
+ && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
{
cum->nregs -= int_nregs;
cum->sse_nregs -= sse_nregs;
@@ -6044,7 +6166,11 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
cum->sse_regno += sse_nregs;
}
else
- cum->words += words;
+ {
+ int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
+ cum->words = (cum->words + align - 1) & ~(align - 1);
+ cum->words += words;
+ }
}
static void
@@ -6385,7 +6511,7 @@ ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
/* Return true when TYPE should be 128bit aligned for 32bit argument passing
ABI. */
static bool
-contains_aligned_value_p (tree type)
+contains_aligned_value_p (const_tree type)
{
enum machine_mode mode = TYPE_MODE (type);
if (((TARGET_SSE && SSE_REG_MODE_P (mode))
@@ -6409,7 +6535,7 @@ contains_aligned_value_p (tree type)
tree field;
/* Walk all the structure fields. */
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL
&& contains_aligned_value_p (TREE_TYPE (field)))
@@ -6435,7 +6561,7 @@ contains_aligned_value_p (tree type)
specified mode and type. */
int
-ix86_function_arg_boundary (enum machine_mode mode, tree type)
+ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
{
int align;
if (type)
@@ -6750,12 +6876,12 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
}
/* Return false iff TYPE is returned in memory. This version is used
- on Solaris 10. It is similar to the generic ix86_return_in_memory,
+ on Solaris 2. It is similar to the generic ix86_return_in_memory,
but differs notably in that when MMX is available, 8-byte vectors
are returned in memory, rather than in MMX registers. */
bool
-ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
int size;
enum machine_mode mode = type_natural_mode (type, NULL);
@@ -6847,7 +6973,7 @@ ix86_build_builtin_va_list_abi (enum calling_abi abi)
if (!TARGET_64BIT || abi == MS_ABI)
return build_pointer_type (char_type_node);
- record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+ record = lang_hooks.types.make_type (RECORD_TYPE);
type_decl = build_decl (BUILTINS_LOCATION,
TYPE_DECL, get_identifier ("__va_list_tag"), record);
@@ -6875,9 +7001,9 @@ ix86_build_builtin_va_list_abi (enum calling_abi abi)
TREE_CHAIN (record) = type_decl;
TYPE_NAME (record) = type_decl;
TYPE_FIELDS (record) = f_gpr;
- TREE_CHAIN (f_gpr) = f_fpr;
- TREE_CHAIN (f_fpr) = f_ovf;
- TREE_CHAIN (f_ovf) = f_sav;
+ DECL_CHAIN (f_gpr) = f_fpr;
+ DECL_CHAIN (f_fpr) = f_ovf;
+ DECL_CHAIN (f_ovf) = f_sav;
layout_type (record);
@@ -6936,11 +7062,8 @@ static void
setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
{
rtx save_area, mem;
- rtx label;
- rtx tmp_reg;
- rtx nsse_reg;
alias_set_type set;
- int i;
+ int i, max;
/* GPR size of varargs save area. */
if (cfun->va_list_gpr_size)
@@ -6950,7 +7073,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
/* FPR size of varargs save area. We don't need it if we don't pass
anything in SSE registers. */
- if (cum->sse_nregs && cfun->va_list_fpr_size)
+ if (TARGET_SSE && cfun->va_list_fpr_size)
ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
else
ix86_varargs_fpr_size = 0;
@@ -6961,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
- for (i = cum->regno;
- i < X86_64_REGPARM_MAX
- && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
- i++)
+ max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+ if (max > X86_64_REGPARM_MAX)
+ max = X86_64_REGPARM_MAX;
+
+ for (i = cum->regno; i < max; i++)
{
mem = gen_rtx_MEM (Pmode,
plus_constant (save_area, i * UNITS_PER_WORD));
@@ -6976,33 +7100,42 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
if (ix86_varargs_fpr_size)
{
+ enum machine_mode smode;
+ rtx label, test;
+
/* Now emit code to save SSE registers. The AX parameter contains number
- of SSE parameter registers used to call this function. We use
- sse_prologue_save insn template that produces computed jump across
- SSE saves. We need some preparation work to get this working. */
+ of SSE parameter registers used to call this function, though all we
+ actually check here is the zero/non-zero status. */
label = gen_label_rtx ();
+ test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
+ emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
+ label));
- nsse_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
-
- /* Compute address of memory block we save into. We always use pointer
- pointing 127 bytes after first byte to store - this is needed to keep
- instruction size limited by 4 bytes (5 bytes for AVX) with one
- byte displacement. */
- tmp_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- plus_constant (save_area,
- ix86_varargs_gpr_size + 127)));
- mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
- MEM_NOTRAP_P (mem) = 1;
- set_mem_alias_set (mem, set);
- set_mem_align (mem, 64);
+ /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
+ we used movdqa (i.e. TImode) instead? Perhaps even better would
+ be if we could determine the real mode of the data, via a hook
+ into pass_stdarg. Ignore all that for now. */
+ smode = V4SFmode;
+ if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
+ crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
+
+ max = cum->sse_regno + cfun->va_list_fpr_size / 16;
+ if (max > X86_64_SSE_REGPARM_MAX)
+ max = X86_64_SSE_REGPARM_MAX;
+
+ for (i = cum->sse_regno; i < max; ++i)
+ {
+ mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
+ mem = gen_rtx_MEM (smode, mem);
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
- /* And finally do the dirty job! */
- emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label,
- gen_reg_rtx (Pmode)));
+ emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
+ }
+
+ emit_label (label);
}
}
@@ -7089,15 +7222,21 @@ ix86_va_start (tree valist, rtx nextarg)
}
f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
- f_fpr = TREE_CHAIN (f_gpr);
- f_ovf = TREE_CHAIN (f_fpr);
- f_sav = TREE_CHAIN (f_ovf);
-
- valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+ f_fpr = DECL_CHAIN (f_gpr);
+ f_ovf = DECL_CHAIN (f_fpr);
+ f_sav = DECL_CHAIN (f_ovf);
+
+ valist = build_simple_mem_ref (valist);
+ TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
+ /* The following should be folded into the MEM_REF offset. */
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
+ f_gpr, NULL_TREE);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+ f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+ f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+ f_sav, NULL_TREE);
/* Count number of gp and fp argument registers used. */
words = crtl->args.info.words;
@@ -7170,9 +7309,9 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
- f_fpr = TREE_CHAIN (f_gpr);
- f_ovf = TREE_CHAIN (f_fpr);
- f_sav = TREE_CHAIN (f_ovf);
+ f_fpr = DECL_CHAIN (f_gpr);
+ f_ovf = DECL_CHAIN (f_fpr);
+ f_sav = DECL_CHAIN (f_ovf);
gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
@@ -7411,8 +7550,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
/* Care for on-stack alignment if needed. */
- if (arg_boundary <= 64
- || integer_zerop (TYPE_SIZE (type)))
+ if (arg_boundary <= 64 || size == 0)
t = ovf;
else
{
@@ -7423,9 +7561,8 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
size_int (-align));
t = fold_convert (TREE_TYPE (ovf), t);
- if (crtl->stack_alignment_needed < arg_boundary)
- crtl->stack_alignment_needed = arg_boundary;
}
+
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
@@ -7820,7 +7957,7 @@ ix86_code_end (void)
assemble_name (asm_out_file, name);
fputs ("\n\t.private_extern\t", asm_out_file);
assemble_name (asm_out_file, name);
- fputs ("\n", asm_out_file);
+ putc ('\n', asm_out_file);
ASM_OUTPUT_LABEL (asm_out_file, name);
DECL_WEAK (decl) = 1;
}
@@ -7830,10 +7967,10 @@ ix86_code_end (void)
{
DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
- (*targetm.asm_out.unique_section) (decl, 0);
+ targetm.asm_out.unique_section (decl, 0);
switch_to_section (get_named_section (decl, NULL, 0));
- (*targetm.asm_out.globalize_label) (asm_out_file, name);
+ targetm.asm_out.globalize_label (asm_out_file, name);
fputs ("\t.hidden\t", asm_out_file);
assemble_name (asm_out_file, name);
putc ('\n', asm_out_file);
@@ -7924,8 +8061,8 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
#endif
- (*targetm.asm_out.internal_label) (asm_out_file, "L",
- CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
+ targetm.asm_out.internal_label (asm_out_file, "L",
+ CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
if (flag_pic)
{
@@ -8223,6 +8360,11 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
else
frame->save_regs_using_mov = false;
+ /* If static stack checking is enabled and done with probes, the registers
+ need to be saved before allocating the frame. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+ frame->save_regs_using_mov = false;
+
/* Skip return address. */
offset = UNITS_PER_WORD;
@@ -8434,9 +8576,9 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
rtx insn;
if (! TARGET_64BIT)
- insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
else if (x86_64_immediate_operand (offset, DImode))
- insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
else
{
rtx tmp;
@@ -8453,8 +8595,8 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
if (style < 0)
RTX_FRAME_RELATED_P (insn) = 1;
- insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, tmp,
- offset));
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
+ offset));
}
if (style >= 0)
@@ -8632,6 +8774,377 @@ ix86_internal_arg_pointer (void)
return virtual_incoming_args_rtx;
}
+struct scratch_reg {
+ rtx reg;
+ bool saved;
+};
+
+/* Return a short-lived scratch register for use on function entry.
+ In 32-bit mode, it is valid only after the registers are saved
+ in the prologue. This register must be released by means of
+ release_scratch_register_on_entry once it is dead. */
+
+static void
+get_scratch_register_on_entry (struct scratch_reg *sr)
+{
+ int regno;
+
+ sr->saved = false;
+
+ if (TARGET_64BIT)
+ {
+ /* We always use R11 in 64-bit mode. */
+ regno = R11_REG;
+ }
+ else
+ {
+ tree decl = current_function_decl, fntype = TREE_TYPE (decl);
+ bool fastcall_p
+ = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+ bool static_chain_p = DECL_STATIC_CHAIN (decl);
+ int regparm = ix86_function_regparm (fntype, decl);
+ int drap_regno
+ = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
+
+ /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
+ for the static chain register. */
+ if ((regparm < 1 || (fastcall_p && !static_chain_p))
+ && drap_regno != AX_REG)
+ regno = AX_REG;
+ else if (regparm < 2 && drap_regno != DX_REG)
+ regno = DX_REG;
+ /* ecx is the static chain register. */
+ else if (regparm < 3 && !fastcall_p && !static_chain_p
+ && drap_regno != CX_REG)
+ regno = CX_REG;
+ else if (ix86_save_reg (BX_REG, true))
+ regno = BX_REG;
+ /* esi is the static chain register. */
+ else if (!(regparm == 3 && static_chain_p)
+ && ix86_save_reg (SI_REG, true))
+ regno = SI_REG;
+ else if (ix86_save_reg (DI_REG, true))
+ regno = DI_REG;
+ else
+ {
+ regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
+ sr->saved = true;
+ }
+ }
+
+ sr->reg = gen_rtx_REG (Pmode, regno);
+ if (sr->saved)
+ {
+ rtx insn = emit_insn (gen_push (sr->reg));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+}
+
+/* Release a scratch register obtained from the preceding function. */
+
+static void
+release_scratch_register_on_entry (struct scratch_reg *sr)
+{
+ if (sr->saved)
+ {
+ rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
+
+ /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
+ RTX_FRAME_RELATED_P (insn) = 1;
+ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
+ }
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
+
+static void
+ix86_adjust_stack_and_probe (HOST_WIDE_INT size)
+{
+ /* We skip the probe for the first interval + a small dope of 4 words and
+ probe that many bytes past the specified size to maintain a protection
+ area at the botton of the stack. */
+ const int dope = 4 * UNITS_PER_WORD;
+ rtx size_rtx = GEN_INT (size);
+
+ /* See if we have a constant small number of probes to generate. If so,
+ that's the easy case. The run-time loop is made up of 11 insns in the
+ generic case while the compile-time loop is made up of 3+2*(n-1) insns
+ for n # of intervals. */
+ if (size <= 5 * PROBE_INTERVAL)
+ {
+ HOST_WIDE_INT i, adjust;
+ bool first_probe = true;
+
+ /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
+ values of N from 1 until it exceeds SIZE. If only one probe is
+ needed, this will not generate any code. Then adjust and probe
+ to PROBE_INTERVAL + SIZE. */
+ for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+ {
+ if (first_probe)
+ {
+ adjust = 2 * PROBE_INTERVAL + dope;
+ first_probe = false;
+ }
+ else
+ adjust = PROBE_INTERVAL;
+
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -adjust)));
+ emit_stack_probe (stack_pointer_rtx);
+ }
+
+ if (first_probe)
+ adjust = size + PROBE_INTERVAL + dope;
+ else
+ adjust = size + PROBE_INTERVAL - i;
+
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -adjust)));
+ emit_stack_probe (stack_pointer_rtx);
+
+ /* Adjust back to account for the additional first interval. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ PROBE_INTERVAL + dope)));
+ }
+
+ /* Otherwise, do the same as above, but in a loop. Note that we must be
+ extra careful with variables wrapping around because we might be at
+ the very top (or the very bottom) of the address space and we have
+ to be able to handle this case properly; in particular, we use an
+ equality test for the loop condition. */
+ else
+ {
+ HOST_WIDE_INT rounded_size;
+ struct scratch_reg sr;
+
+ get_scratch_register_on_entry (&sr);
+
+
+ /* Step 1: round SIZE to the previous multiple of the interval. */
+
+ rounded_size = size & -PROBE_INTERVAL;
+
+
+ /* Step 2: compute initial and final value of the loop counter. */
+
+ /* SP = SP_0 + PROBE_INTERVAL. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ - (PROBE_INTERVAL + dope))));
+
+ /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
+ emit_move_insn (sr.reg, GEN_INT (-rounded_size));
+ emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
+ gen_rtx_PLUS (Pmode, sr.reg,
+ stack_pointer_rtx)));
+
+
+ /* Step 3: the loop
+
+ while (SP != LAST_ADDR)
+ {
+ SP = SP + PROBE_INTERVAL
+ probe at SP
+ }
+
+ adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
+ values of N from 1 until it is equal to ROUNDED_SIZE. */
+
+ emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
+
+
+ /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
+ assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
+
+ if (size != rounded_size)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ rounded_size - size)));
+ emit_stack_probe (stack_pointer_rtx);
+ }
+
+ /* Adjust back to account for the additional first interval. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ PROBE_INTERVAL + dope)));
+
+ release_scratch_register_on_entry (&sr);
+ }
+
+ gcc_assert (ix86_cfa_state->reg != stack_pointer_rtx);
+
+ /* Make sure nothing is scheduled before we are done. */
+ emit_insn (gen_blockage ());
+}
+
+/* Adjust the stack pointer up to REG while probing it. */
+
+const char *
+output_adjust_stack_and_probe (rtx reg)
+{
+ static int labelno = 0;
+ char loop_lab[32], end_lab[32];
+ rtx xops[2];
+
+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+ /* Jump to END_LAB if SP == LAST_ADDR. */
+ xops[0] = stack_pointer_rtx;
+ xops[1] = reg;
+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+ fputs ("\tje\t", asm_out_file);
+ assemble_name_raw (asm_out_file, end_lab);
+ fputc ('\n', asm_out_file);
+
+ /* SP = SP + PROBE_INTERVAL. */
+ xops[1] = GEN_INT (PROBE_INTERVAL);
+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+ /* Probe at SP. */
+ xops[1] = const0_rtx;
+ output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
+
+ fprintf (asm_out_file, "\tjmp\t");
+ assemble_name_raw (asm_out_file, loop_lab);
+ fputc ('\n', asm_out_file);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+ return "";
+}
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+ inclusive. These are offsets from the current stack pointer. */
+
+static void
+ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+ /* See if we have a constant small number of probes to generate. If so,
+ that's the easy case. The run-time loop is made up of 7 insns in the
+ generic case while the compile-time loop is made up of n insns for n #
+ of intervals. */
+ if (size <= 7 * PROBE_INTERVAL)
+ {
+ HOST_WIDE_INT i;
+
+ /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+ it exceeds SIZE. If only one probe is needed, this will not
+ generate any code. Then probe at FIRST + SIZE. */
+ for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+ emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
+
+ emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
+ }
+
+ /* Otherwise, do the same as above, but in a loop. Note that we must be
+ extra careful with variables wrapping around because we might be at
+ the very top (or the very bottom) of the address space and we have
+ to be able to handle this case properly; in particular, we use an
+ equality test for the loop condition. */
+ else
+ {
+ HOST_WIDE_INT rounded_size, last;
+ struct scratch_reg sr;
+
+ get_scratch_register_on_entry (&sr);
+
+
+ /* Step 1: round SIZE to the previous multiple of the interval. */
+
+ rounded_size = size & -PROBE_INTERVAL;
+
+
+ /* Step 2: compute initial and final value of the loop counter. */
+
+ /* TEST_OFFSET = FIRST. */
+ emit_move_insn (sr.reg, GEN_INT (-first));
+
+ /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
+ last = first + rounded_size;
+
+
+ /* Step 3: the loop
+
+ while (TEST_ADDR != LAST_ADDR)
+ {
+ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+ probe at TEST_ADDR
+ }
+
+ probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+ until it is equal to ROUNDED_SIZE. */
+
+ emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
+
+
+ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+ that SIZE is equal to ROUNDED_SIZE. */
+
+ if (size != rounded_size)
+ emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ sr.reg),
+ rounded_size - size));
+
+ release_scratch_register_on_entry (&sr);
+ }
+
+ /* Make sure nothing is scheduled before we are done. */
+ emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG to END, inclusive. These are
+ offsets from the current stack pointer. */
+
+const char *
+output_probe_stack_range (rtx reg, rtx end)
+{
+ static int labelno = 0;
+ char loop_lab[32], end_lab[32];
+ rtx xops[3];
+
+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+ /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
+ xops[0] = reg;
+ xops[1] = end;
+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+ fputs ("\tje\t", asm_out_file);
+ assemble_name_raw (asm_out_file, end_lab);
+ fputc ('\n', asm_out_file);
+
+ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
+ xops[1] = GEN_INT (PROBE_INTERVAL);
+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+ /* Probe at TEST_ADDR. */
+ xops[0] = stack_pointer_rtx;
+ xops[1] = reg;
+ xops[2] = const0_rtx;
+ output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
+
+ fprintf (asm_out_file, "\tjmp\t");
+ assemble_name_raw (asm_out_file, loop_lab);
+ fputc ('\n', asm_out_file);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+ return "";
+}
+
/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
to be generated in correct form. */
static void
@@ -8682,12 +9195,12 @@ ix86_expand_prologue (void)
ix86_compute_frame_layout (&frame);
- if (ix86_function_ms_hook_prologue (current_function_decl))
+ if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
{
rtx push, mov;
/* Make sure the function starts with
- 8b ff movl.s %edi,%edi
+ 8b ff movl.s %edi,%edi (emited by ix86_asm_output_function_label)
55 push %ebp
8b ec movl.s %esp,%ebp
@@ -8695,8 +9208,6 @@ ix86_expand_prologue (void)
functions in Microsoft Windows XP Service Pack 2 and newer.
Wine uses this to enable Windows apps to hook the Win32 API
functions provided by Wine. */
- insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG),
- gen_rtx_REG (SImode, DI_REG)));
push = emit_insn (gen_push (hard_frame_pointer_rtx));
mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
stack_pointer_rtx));
@@ -8719,7 +9230,7 @@ ix86_expand_prologue (void)
the base pointer again, align the stack, and later regenerate
the frame pointer setup. The frame pointer generated by the
hook prologue is not aligned, so it can't be used. */
- insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
+ insn = emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
}
/* The first insn of a function that accepts its static chain on the
@@ -8774,9 +9285,9 @@ ix86_expand_prologue (void)
ix86_cfa_state->reg = crtl->drap_reg;
/* Align the stack. */
- insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (-align_bytes)));
+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
RTX_FRAME_RELATED_P (insn) = 1;
/* Replicate the return address on the stack so that return
@@ -8811,9 +9322,9 @@ ix86_expand_prologue (void)
gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
/* Align the stack. */
- insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (-align_bytes)));
+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -8824,6 +9335,32 @@ ix86_expand_prologue (void)
else
allocate += frame.nregs * UNITS_PER_WORD;
+ /* The stack has already been decremented by the instruction calling us
+ so we need to probe unconditionally to preserve the protection area. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+ {
+ /* We expect the registers to be saved when probes are used. */
+ gcc_assert (!frame.save_regs_using_mov);
+
+ if (STACK_CHECK_MOVING_SP)
+ {
+ ix86_adjust_stack_and_probe (allocate);
+ allocate = 0;
+ }
+ else
+ {
+ HOST_WIDE_INT size = allocate;
+
+ if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
+ size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+
+ if (TARGET_STACK_PROBE)
+ ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+ else
+ ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ }
+ }
+
/* When using red zone we may start register saving before allocating
the stack frame saving one cycle of the prologue. However I will
avoid doing this if I am going to have to probe the stack since
@@ -8839,7 +9376,7 @@ ix86_expand_prologue (void)
if (allocate == 0)
;
- else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
+ else if (!ix86_target_stack_probe () || allocate < CHECK_STACK_LIMIT)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-allocate), -1,
ix86_cfa_state->reg == stack_pointer_rtx);
@@ -8862,11 +9399,7 @@ ix86_expand_prologue (void)
emit_move_insn (eax, GEN_INT (allocate));
- if (TARGET_64BIT)
- insn = gen_allocate_stack_worker_64 (eax, eax);
- else
- insn = gen_allocate_stack_worker_32 (eax, eax);
- insn = emit_insn (insn);
+ insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
if (ix86_cfa_state->reg == stack_pointer_rtx)
{
@@ -10985,7 +11518,7 @@ output_pic_addr_const (FILE *file, rtx x, int code)
break;
case SYMBOL_REF:
- if (! TARGET_MACHO || TARGET_64BIT)
+ if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
output_addr_const (file, x);
else
{
@@ -12127,8 +12660,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
return;
case ';':
-#if TARGET_MACHO || !HAVE_AS_IX86_REP_LOCK_PREFIX
- fputs (";", file);
+#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
+ putc (';', file);
#endif
return;
@@ -19291,7 +19824,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
QImode, 1, end_0_label);
/* Increment the address. */
- emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
@@ -19301,7 +19834,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const1_rtx));
emit_label (align_3_label);
}
@@ -19309,7 +19842,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
@@ -19319,7 +19852,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
mem = change_address (src, SImode, out);
emit_move_insn (scratch, mem);
- emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
+ emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
@@ -19374,7 +19907,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const2_rtx));
emit_label (end_2_label);
@@ -19385,7 +19918,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
tmp = gen_rtx_REG (CCmode, FLAGS_REG);
cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
- emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), tmp, cmp));
+ emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
emit_label (end_0_label);
}
@@ -19427,7 +19960,7 @@ ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
/* strlensi_unroll_1 returns the address of the zero at the end of
the string, like memchr(), so compute the length by subtracting
the start address. */
- emit_insn ((*ix86_gen_sub3) (out, out, addr));
+ emit_insn (ix86_gen_sub3 (out, out, addr));
}
else
{
@@ -19450,8 +19983,8 @@ ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
scratch4), UNSPEC_SCAS);
emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
- emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
- emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
+ emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
+ emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
}
return 1;
}
@@ -21623,6 +22156,27 @@ enum ix86_builtins
IX86_BUILTIN_CLZS,
+ /* FSGSBASE instructions. */
+ IX86_BUILTIN_RDFSBASE32,
+ IX86_BUILTIN_RDFSBASE64,
+ IX86_BUILTIN_RDGSBASE32,
+ IX86_BUILTIN_RDGSBASE64,
+ IX86_BUILTIN_WRFSBASE32,
+ IX86_BUILTIN_WRFSBASE64,
+ IX86_BUILTIN_WRGSBASE32,
+ IX86_BUILTIN_WRGSBASE64,
+
+ /* RDRND instructions. */
+ IX86_BUILTIN_RDRAND16,
+ IX86_BUILTIN_RDRAND32,
+ IX86_BUILTIN_RDRAND64,
+
+ /* F16C instructions. */
+ IX86_BUILTIN_CVTPH2PS,
+ IX86_BUILTIN_CVTPH2PS256,
+ IX86_BUILTIN_CVTPS2PH,
+ IX86_BUILTIN_CVTPS2PH256,
+
IX86_BUILTIN_MAX
};
@@ -21897,6 +22451,20 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+ /* FSGSBASE */
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+
+ /* RDRND */
+ { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
+ { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+ { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
};
/* Builtins with variable number of arguments. */
@@ -22523,6 +23091,12 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
{ OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+
+ /* F16C */
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
};
/* FMA4 and XOP. */
@@ -23022,13 +23596,13 @@ ix86_init_builtin_types (void)
TYPE_PRECISION (float80_type_node) = 80;
layout_type (float80_type_node);
}
- (*lang_hooks.types.register_builtin_type) (float80_type_node, "__float80");
+ lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
/* The __float128 type. */
float128_type_node = make_node (REAL_TYPE);
TYPE_PRECISION (float128_type_node) = 128;
layout_type (float128_type_node);
- (*lang_hooks.types.register_builtin_type) (float128_type_node, "__float128");
+ lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
/* This macro is built by i386-builtin-types.awk. */
DEFINE_BUILTIN_PRIMITIVE_TYPES;
@@ -23110,7 +23684,7 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ || !insn_data[icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
if (GET_MODE (op1) == SImode && mode1 == TImode)
@@ -23120,9 +23694,9 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
op1 = gen_lowpart (TImode, x);
}
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ if (!insn_data[icode].operand[1].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ if (!insn_data[icode].operand[2].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (icode) (target, op0, op1);
@@ -23240,7 +23814,7 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ || !insn_data[icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
gcc_assert (nargs <= 4);
@@ -23273,7 +23847,7 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
if (optimize
- || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
+ || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
|| num_memory > 1)
op = force_reg (mode, op);
}
@@ -23338,18 +23912,18 @@ ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ || !insn_data[icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ || !insn_data[icode].operand[1].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
op1 = op0;
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
+ if (!insn_data[icode].operand[2].predicate (op1, mode0))
op1 = copy_to_mode_reg (mode0, op1);
pat = GEN_FCN (icode) (target, op0, op1);
@@ -23393,14 +23967,14 @@ ix86_expand_sse_compare (const struct builtin_description *d,
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
+ || !insn_data[d->icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
+ || !insn_data[d->icode].operand[1].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if ((optimize && !register_operand (op1, mode1))
- || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
+ || !insn_data[d->icode].operand[2].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
@@ -23445,10 +24019,10 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
target = gen_rtx_SUBREG (QImode, target, 0);
if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ || !insn_data[d->icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ || !insn_data[d->icode].operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (d->icode) (op0, op1);
@@ -23489,10 +24063,10 @@ ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
target = gen_rtx_SUBREG (QImode, target, 0);
if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ || !insn_data[d->icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ || !insn_data[d->icode].operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (d->icode) (op0, op1);
@@ -23541,17 +24115,17 @@ ix86_expand_sse_pcmpestr (const struct builtin_description *d,
if (VECTOR_MODE_P (modev4))
op2 = safe_vector_operand (op2, modev4);
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
op0 = copy_to_mode_reg (modev2, op0);
- if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
op1 = copy_to_mode_reg (modei3, op1);
if ((optimize && !register_operand (op2, modev4))
- || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ || !insn_data[d->icode].operand[4].predicate (op2, modev4))
op2 = copy_to_mode_reg (modev4, op2);
- if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
op3 = copy_to_mode_reg (modei5, op3);
- if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
{
error ("the fifth argument must be a 8-bit immediate");
return const0_rtx;
@@ -23561,7 +24135,7 @@ ix86_expand_sse_pcmpestr (const struct builtin_description *d,
{
if (optimize || !target
|| GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ || !insn_data[d->icode].operand[0].predicate (target, tmode0))
target = gen_reg_rtx (tmode0);
scratch1 = gen_reg_rtx (tmode1);
@@ -23572,7 +24146,7 @@ ix86_expand_sse_pcmpestr (const struct builtin_description *d,
{
if (optimize || !target
|| GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ || !insn_data[d->icode].operand[1].predicate (target, tmode1))
target = gen_reg_rtx (tmode1);
scratch0 = gen_reg_rtx (tmode0);
@@ -23640,13 +24214,13 @@ ix86_expand_sse_pcmpistr (const struct builtin_description *d,
if (VECTOR_MODE_P (modev3))
op1 = safe_vector_operand (op1, modev3);
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
op0 = copy_to_mode_reg (modev2, op0);
if ((optimize && !register_operand (op1, modev3))
- || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ || !insn_data[d->icode].operand[3].predicate (op1, modev3))
op1 = copy_to_mode_reg (modev3, op1);
- if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
{
error ("the third argument must be a 8-bit immediate");
return const0_rtx;
@@ -23656,7 +24230,7 @@ ix86_expand_sse_pcmpistr (const struct builtin_description *d,
{
if (optimize || !target
|| GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ || !insn_data[d->icode].operand[0].predicate (target, tmode0))
target = gen_reg_rtx (tmode0);
scratch1 = gen_reg_rtx (tmode1);
@@ -23667,7 +24241,7 @@ ix86_expand_sse_pcmpistr (const struct builtin_description *d,
{
if (optimize || !target
|| GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ || !insn_data[d->icode].operand[1].predicate (target, tmode1))
target = gen_reg_rtx (tmode1);
scratch0 = gen_reg_rtx (tmode0);
@@ -23763,6 +24337,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8SF_FTYPE_V8SF:
case V8SF_FTYPE_V8SI:
case V8SF_FTYPE_V4SF:
+ case V8SF_FTYPE_V8HI:
case V4SI_FTYPE_V4SI:
case V4SI_FTYPE_V16QI:
case V4SI_FTYPE_V4SF:
@@ -23779,6 +24354,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4SF_FTYPE_V4SI:
case V4SF_FTYPE_V8SF:
case V4SF_FTYPE_V4DF:
+ case V4SF_FTYPE_V8HI:
case V4SF_FTYPE_V2DF:
case V2DI_FTYPE_V2DI:
case V2DI_FTYPE_V16QI:
@@ -23881,6 +24457,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs_constant = 1;
break;
case V8HI_FTYPE_V8HI_INT:
+ case V8HI_FTYPE_V8SF_INT:
+ case V8HI_FTYPE_V4SF_INT:
case V8SF_FTYPE_V8SF_INT:
case V4SI_FTYPE_V4SI_INT:
case V4SI_FTYPE_V8SI_INT:
@@ -23958,7 +24536,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
if (optimize
|| target == 0
|| GET_MODE (target) != tmode
- || ! (*insn_p->operand[0].predicate) (target, tmode))
+ || !insn_p->operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
real_target = target;
}
@@ -23973,7 +24551,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
tree arg = CALL_EXPR_ARG (exp, i);
rtx op = expand_normal (arg);
enum machine_mode mode = insn_p->operand[i + 1].mode;
- bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
+ bool match = insn_p->operand[i + 1].predicate (op, mode);
if (last_arg_count && (i + 1) == nargs)
{
@@ -23983,7 +24561,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
if (!match)
{
op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
- if (!(*insn_p->operand[i + 1].predicate) (op, mode))
+ if (!insn_p->operand[i + 1].predicate (op, mode))
op = copy_to_reg (op);
}
}
@@ -24128,7 +24706,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_VOID:
emit_insn (GEN_FCN (icode) (target));
return 0;
+ case VOID_FTYPE_UINT64:
+ case VOID_FTYPE_UNSIGNED:
+ nargs = 0;
+ klass = store;
+ memory = 0;
+ break;
+ break;
case UINT64_FTYPE_VOID:
+ case UNSIGNED_FTYPE_VOID:
+ case UINT16_FTYPE_VOID:
nargs = 0;
klass = load;
memory = 0;
@@ -24207,7 +24794,10 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
arg = CALL_EXPR_ARG (exp, 0);
op = expand_normal (arg);
gcc_assert (target == 0);
- target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ if (memory)
+ target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ else
+ target = force_reg (tmode, op);
arg_adjust = 1;
}
else
@@ -24216,7 +24806,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
if (optimize
|| target == 0
|| GET_MODE (target) != tmode
- || ! (*insn_p->operand[0].predicate) (target, tmode))
+ || !insn_p->operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
}
@@ -24227,7 +24817,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
arg = CALL_EXPR_ARG (exp, i + arg_adjust);
op = expand_normal (arg);
- match = (*insn_p->operand[i + 1].predicate) (op, mode);
+ match = insn_p->operand[i + 1].predicate (op, mode);
if (last_arg_constant && (i + 1) == nargs)
{
@@ -24477,11 +25067,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
op0 = force_reg (Pmode, op0);
op0 = gen_rtx_MEM (mode1, op0);
- if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+ if (!insn_data[icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+ if (!insn_data[icode].operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
+ if (!insn_data[icode].operand[2].predicate (op2, mode2))
op2 = copy_to_mode_reg (mode2, op2);
pat = GEN_FCN (icode) (op0, op1, op2);
if (! pat)
@@ -24505,7 +25095,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
icode = CODE_FOR_sse2_clflush;
- if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ if (!insn_data[icode].operand[0].predicate (op0, Pmode))
op0 = copy_to_mode_reg (Pmode, op0);
emit_insn (gen_sse2_clflush (op0));
@@ -24524,7 +25114,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (SImode, op2);
- emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
+ emit_insn (ix86_gen_monitor (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
@@ -24598,7 +25188,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
icode = CODE_FOR_lwp_llwpcb;
- if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ if (!insn_data[icode].operand[0].predicate (op0, Pmode))
op0 = copy_to_mode_reg (Pmode, op0);
emit_insn (gen_lwp_llwpcb (op0));
return 0;
@@ -24606,7 +25196,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_SLWPCB:
icode = CODE_FOR_lwp_slwpcb;
if (!target
- || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
+ || !insn_data[icode].operand[0].predicate (target, Pmode))
target = gen_reg_rtx (Pmode);
emit_insn (gen_lwp_slwpcb (target));
return target;
@@ -24727,8 +25317,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
/* Dispatch to a handler for a vectorization library. */
if (ix86_veclib_handler)
- return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
- type_in);
+ return ix86_veclib_handler ((enum built_in_function) fn, type_out,
+ type_in);
return NULL_TREE;
}
@@ -26235,7 +26825,7 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub)
gcc_assert (!TARGET_64BIT);
/* Lose our funky encoding stuff so it doesn't contaminate the stub. */
- symb = (*targetm.strip_name_encoding) (symb);
+ symb = targetm.strip_name_encoding (symb);
length = strlen (stub);
binder_name = XALLOCAVEC (char, length + 32);
@@ -26425,15 +27015,9 @@ ix86_handle_fndecl_attribute (tree *node, tree name,
return NULL_TREE;
}
- if (TARGET_64BIT)
- {
- warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
- name);
- return NULL_TREE;
- }
-
#ifndef HAVE_AS_IX86_SWAP
- sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
+ if (!TARGET_64BIT)
+ sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
#endif
return NULL_TREE;
@@ -26520,7 +27104,7 @@ x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
return false;
/* Need a free register for GOT references. */
- if (flag_pic && !(*targetm.binds_local_p) (function))
+ if (flag_pic && !targetm.binds_local_p (function))
return false;
/* Otherwise ok. */
@@ -26630,7 +27214,7 @@ x86_output_mi_thunk (FILE *file,
xops[0] = XEXP (DECL_RTL (function), 0);
if (TARGET_64BIT)
{
- if (!flag_pic || (*targetm.binds_local_p) (function))
+ if (!flag_pic || targetm.binds_local_p (function))
output_asm_insn ("jmp\t%P0", xops);
/* All thunks should be in the same object as their target,
and thus binds_local_p should be true. */
@@ -26647,17 +27231,18 @@ x86_output_mi_thunk (FILE *file,
}
else
{
- if (!flag_pic || (*targetm.binds_local_p) (function))
+ if (!flag_pic || targetm.binds_local_p (function))
output_asm_insn ("jmp\t%P0", xops);
else
#if TARGET_MACHO
if (TARGET_MACHO)
{
rtx sym_ref = XEXP (DECL_RTL (function), 0);
- tmp = (gen_rtx_SYMBOL_REF
+ if (TARGET_MACHO_BRANCH_ISLANDS)
+ sym_ref = (gen_rtx_SYMBOL_REF
(Pmode,
machopic_indirection_name (sym_ref, /*stub_p=*/true)));
- tmp = gen_rtx_MEM (QImode, tmp);
+ tmp = gen_rtx_MEM (QImode, sym_ref);
xops[0] = tmp;
output_asm_insn ("jmp\t%0", xops);
}
@@ -27701,10 +28286,10 @@ ix86_expand_vector_init_interleave (enum machine_mode mode,
emit_move_insn (op0, gen_lowpart (mode, op1));
/* Load even elements into the second positon. */
- emit_insn ((*gen_load_even) (op0,
- force_reg (inner_mode,
- ops [i + i + 1]),
- const1_rtx));
+ emit_insn (gen_load_even (op0,
+ force_reg (inner_mode,
+ ops [i + i + 1]),
+ const1_rtx));
/* Cast vector to FIRST_IMODE vector. */
ops[i] = gen_reg_rtx (first_imode);
@@ -27715,7 +28300,7 @@ ix86_expand_vector_init_interleave (enum machine_mode mode,
for (i = j = 0; i < n; i += 2, j++)
{
op0 = gen_reg_rtx (first_imode);
- emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
+ emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
/* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
ops[j] = gen_reg_rtx (second_imode);
@@ -27729,8 +28314,8 @@ ix86_expand_vector_init_interleave (enum machine_mode mode,
for (i = j = 0; i < n / 2; i += 2, j++)
{
op0 = gen_reg_rtx (second_imode);
- emit_insn ((*gen_interleave_second_low) (op0, ops[i],
- ops[i + 1]));
+ emit_insn (gen_interleave_second_low (op0, ops[i],
+ ops[i + 1]));
/* Cast the SECOND_IMODE vector to the THIRD_IMODE
vector. */
@@ -27743,8 +28328,8 @@ ix86_expand_vector_init_interleave (enum machine_mode mode,
case V2DImode:
op0 = gen_reg_rtx (second_imode);
- emit_insn ((*gen_interleave_second_low) (op0, ops[0],
- ops[1]));
+ emit_insn (gen_interleave_second_low (op0, ops[0],
+ ops[1]));
/* Cast the SECOND_IMODE vector back to a vector on original
mode. */
@@ -28175,13 +28760,13 @@ half:
/* Extract the half. */
tmp = gen_reg_rtx (half_mode);
- emit_insn ((*gen_extract[j][i]) (tmp, target));
+ emit_insn (gen_extract[j][i] (tmp, target));
/* Put val in tmp at elt. */
ix86_expand_vector_set (false, tmp, val, elt);
/* Put it back. */
- emit_insn ((*gen_insert[j][i]) (target, target, tmp));
+ emit_insn (gen_insert[j][i] (target, target, tmp));
return;
default:
@@ -29370,7 +29955,9 @@ static const struct attribute_spec ix86_attribute_table[] =
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
switch (type_of_cost)
{
@@ -29399,6 +29986,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
return ix86_cost->scalar_to_vec_cost;
case unaligned_load:
+ case unaligned_store:
return ix86_cost->vec_unalign_load_cost;
case cond_branch_taken:
@@ -30619,10 +31207,12 @@ ix86_canonical_va_list_type (tree type)
tree wtype, htype;
/* Resolve references and pointers to va_list type. */
- if (INDIRECT_REF_P (type))
+ if (TREE_CODE (type) == MEM_REF)
type = TREE_TYPE (type);
else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
type = TREE_TYPE (type);
+ else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
+ type = TREE_TYPE (type);
if (TARGET_64BIT)
{
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index afe05f9f6db..e153920a5c1 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -66,6 +66,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AES OPTION_ISA_AES
#define TARGET_PCLMUL OPTION_ISA_PCLMUL
#define TARGET_CMPXCHG16B OPTION_ISA_CX16
+#define TARGET_FSGSBASE OPTION_ISA_FSGSBASE
+#define TARGET_RDRND OPTION_ISA_RDRND
+#define TARGET_F16C OPTION_ISA_F16C
/* SSE4.1 defines round instructions */
@@ -474,7 +477,13 @@ extern tree x86_mfence;
redefines this to 1. */
#define TARGET_MACHO 0
-/* Likewise, for the Windows 64-bit ABI. */
+/* Branch island 'stubs' are emitted for earlier versions of darwin.
+ This provides a default (over-ridden in darwin.h.) */
+#ifndef TARGET_MACHO_BRANCH_ISLANDS
+#define TARGET_MACHO_BRANCH_ISLANDS 0
+#endif
+
+/* For the Windows 64-bit ABI. */
#define TARGET_64BIT_MS_ABI (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
/* Available call abi. */
@@ -1812,10 +1821,11 @@ typedef struct ix86_args {
#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
-/* Define if shifts truncate the shift count
- which implies one can omit a sign-extension or zero-extension
- of a shift count. */
-/* On i386, shifts do truncate the count. But bit opcodes don't. */
+/* Define if shifts truncate the shift count which implies one can
+ omit a sign-extension or zero-extension of a shift count.
+
+ On i386, shifts do truncate the count. But bit test instructions
+ take the modulo of the bit offset operand. */
/* #define SHIFT_COUNT_TRUNCATED */
@@ -2079,6 +2089,13 @@ do { \
}
#endif
+/* Write the extra assembler code needed to declare a function
+ properly. */
+
+#undef ASM_OUTPUT_FUNCTION_LABEL
+#define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \
+ ix86_asm_output_function_label (FILE, NAME, DECL)
+
/* Under some conditions we need jump tables in the text section,
because the assembler cannot handle label differences between
sections. This is the case for x86_64 on Mach-O for example. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e361fd707ab..6616da2e729 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -79,13 +79,11 @@
;; Prologue support
UNSPEC_STACK_ALLOC
UNSPEC_SET_GOT
- UNSPEC_SSE_PROLOGUE_SAVE
UNSPEC_REG_SAVE
UNSPEC_DEF_CFA
UNSPEC_SET_RIP
UNSPEC_SET_GOT_OFFSET
UNSPEC_MEMORY_BLOCKAGE
- UNSPEC_SSE_PROLOGUE_SAVE_LOW
;; TLS support
UNSPEC_TP
@@ -226,11 +224,14 @@
UNSPEC_MASKSTORE
UNSPEC_CAST
UNSPEC_VTESTP
+ UNSPEC_VCVTPH2PS
+ UNSPEC_VCVTPS2PH
])
(define_c_enum "unspecv" [
UNSPECV_BLOCKAGE
UNSPECV_STACK_PROBE
+ UNSPECV_PROBE_STACK_RANGE
UNSPECV_EMMS
UNSPECV_LDMXCSR
UNSPECV_STMXCSR
@@ -254,6 +255,11 @@
UNSPECV_SLWP_INTRINSIC
UNSPECV_LWPVAL_INTRINSIC
UNSPECV_LWPINS_INTRINSIC
+ UNSPECV_RDFSBASE
+ UNSPECV_RDGSBASE
+ UNSPECV_WRFSBASE
+ UNSPECV_WRGSBASE
+ UNSPECV_RDRAND
])
;; Constants to represent pcomtrue/pcomfalse variants
@@ -760,8 +766,6 @@
;; Used in signed and unsigned divisions.
(define_code_iterator any_div [div udiv])
-(define_code_attr extract_code
- [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
;; Instruction prefix for signed and unsigned operations.
(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -855,6 +859,13 @@
(SI "general_operand")
(DI "x86_64_szext_general_operand")])
+;; Immediate operand predicate for integer modes.
+(define_mode_attr immediate_operand
+ [(QI "immediate_operand")
+ (HI "immediate_operand")
+ (SI "immediate_operand")
+ (DI "x86_64_immediate_operand")])
+
;; Operand predicate for shifts.
(define_mode_attr shift_operand
[(QI "nonimmediate_operand")
@@ -6899,7 +6910,7 @@
(compare:CCC
(plus:SWI
(match_operand:SWI 1 "nonimmediate_operand" "%0")
- (match_operand:SWI 2 "<general_operand>" "<r><i>m"))
+ (match_operand:SWI 2 "<general_operand>" "<g>"))
(match_dup 1)))
(clobber (match_scratch:SWI 0 "=<r>"))]
"ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
@@ -7308,9 +7319,9 @@
;; Divmod instructions.
-(define_expand "<u>divmodqi4"
+(define_expand "divmodqi4"
[(parallel [(set (match_operand:QI 0 "register_operand" "")
- (any_div:QI
+ (div:QI
(match_operand:QI 1 "register_operand" "")
(match_operand:QI 2 "nonimmediate_operand" "")))
(set (match_operand:QI 3 "register_operand" "")
@@ -7326,36 +7337,58 @@
/* Extend operands[1] to HImode. Generate 8bit divide. Result is
in AX. */
- if (<extract_code> == SIGN_EXTRACT)
- {
- emit_insn (gen_extendqihi2 (tmp1, operands[1]));
- emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
+ emit_insn (gen_extendqihi2 (tmp1, operands[1]));
+ emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
- div = gen_rtx_DIV (QImode, operands[1], operands[2]);
- mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
+ /* Extract remainder from AH. */
+ tmp1 = gen_rtx_SIGN_EXTRACT (QImode, tmp0, GEN_INT (8), GEN_INT (8));
+ insn = emit_move_insn (operands[3], tmp1);
- tmp1 = gen_rtx_<extract_code> (QImode, tmp0,
- GEN_INT (8), GEN_INT (8));
- }
- else
- {
- emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
- emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
+ mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
+ set_unique_reg_note (insn, REG_EQUAL, mod);
+
+ /* Extract quotient from AL. */
+ insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
- div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
- mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
+ div = gen_rtx_DIV (QImode, operands[1], operands[2]);
+ set_unique_reg_note (insn, REG_EQUAL, div);
- tmp1 = gen_rtx_<extract_code> (SImode, tmp0,
- GEN_INT (8), GEN_INT (8));
- tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0);
- }
+ DONE;
+})
+
+(define_expand "udivmodqi4"
+ [(parallel [(set (match_operand:QI 0 "register_operand" "")
+ (udiv:QI
+ (match_operand:QI 1 "register_operand" "")
+ (match_operand:QI 2 "nonimmediate_operand" "")))
+ (set (match_operand:QI 3 "register_operand" "")
+ (umod:QI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+{
+ rtx div, mod, insn;
+ rtx tmp0, tmp1;
+
+ tmp0 = gen_reg_rtx (HImode);
+ tmp1 = gen_reg_rtx (HImode);
+
+ /* Extend operands[1] to HImode. Generate 8bit divide. Result is
+ in AX. */
+ emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
+ emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
/* Extract remainder from AH. */
+ tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
+ tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0);
insn = emit_move_insn (operands[3], tmp1);
+
+ mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
set_unique_reg_note (insn, REG_EQUAL, mod);
/* Extract quotient from AL. */
insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+ div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
set_unique_reg_note (insn, REG_EQUAL, div);
DONE;
@@ -7364,15 +7397,22 @@
;; Divide AX by r/m8, with result stored in
;; AL <- Quotient
;; AH <- Remainder
+;; Change div/mod to HImode and extend the second argument to HImode
+;; so that mode of div/mod matches with mode of arguments. Otherwise
+;; combine may fail.
(define_insn "divmodhiqi3"
[(set (match_operand:HI 0 "register_operand" "=a")
(ior:HI
(ashift:HI
(zero_extend:HI
- (mod:QI (match_operand:HI 1 "register_operand" "0")
- (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (truncate:QI
+ (mod:HI (match_operand:HI 1 "register_operand" "0")
+ (sign_extend:HI
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))))
(const_int 8))
- (zero_extend:HI (div:QI (match_dup 1) (match_dup 2)))))
+ (zero_extend:HI
+ (truncate:QI
+ (div:HI (match_dup 1) (sign_extend:HI (match_dup 2)))))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_QIMODE_MATH"
"idiv{b}\t%2"
@@ -7384,10 +7424,14 @@
(ior:HI
(ashift:HI
(zero_extend:HI
- (umod:QI (match_operand:HI 1 "register_operand" "0")
- (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (truncate:QI
+ (mod:HI (match_operand:HI 1 "register_operand" "0")
+ (zero_extend:HI
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))))
(const_int 8))
- (zero_extend:HI (udiv:QI (match_dup 1) (match_dup 2)))))
+ (zero_extend:HI
+ (truncate:QI
+ (div:HI (match_dup 1) (zero_extend:HI (match_dup 2)))))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_QIMODE_MATH"
"div{b}\t%2"
@@ -10301,6 +10345,8 @@
(match_operand 3 "register_operand" ""))]
""
{
+ rtx (*gen_mov_insv_1) (rtx, rtx);
+
/* Handle insertions to %ah et al. */
if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
FAIL;
@@ -10310,11 +10356,10 @@
if (! ext_register_operand (operands[0], VOIDmode))
FAIL;
- if (TARGET_64BIT)
- emit_insn (gen_movdi_insv_1 (operands[0], operands[3]));
- else
- emit_insn (gen_movsi_insv_1 (operands[0], operands[3]));
+ gen_mov_insv_1 = (TARGET_64BIT
+ ? gen_movdi_insv_1 : gen_movsi_insv_1);
+ emit_insn (gen_mov_insv_1 (operands[0], operands[3]));
DONE;
})
@@ -13001,6 +13046,8 @@
(use (match_operand:MODEF 2 "general_operand" ""))]
"TARGET_USE_FANCY_MATH_387"
{
+ rtx (*gen_truncxf) (rtx, rtx);
+
rtx label = gen_label_rtx ();
rtx op1 = gen_reg_rtx (XFmode);
@@ -13017,10 +13064,11 @@
/* Truncate the result properly for strict SSE math. */
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387)
- emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+ gen_truncxf = gen_truncxf<mode>2;
else
- emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+ gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+ emit_insn (gen_truncxf (operands[0], op1));
DONE;
})
@@ -13069,6 +13117,8 @@
(use (match_operand:MODEF 2 "general_operand" ""))]
"TARGET_USE_FANCY_MATH_387"
{
+ rtx (*gen_truncxf) (rtx, rtx);
+
rtx label = gen_label_rtx ();
rtx op1 = gen_reg_rtx (XFmode);
@@ -13086,10 +13136,11 @@
/* Truncate the result properly for strict SSE math. */
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387)
- emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+ gen_truncxf = gen_truncxf<mode>2;
else
- emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+ gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+ emit_insn (gen_truncxf (operands[0], op1));
DONE;
})
@@ -15695,13 +15746,12 @@
}
else
{
- rtx (*cmp_insn)(rtx, rtx);
+ rtx (*gen_cmp) (rtx, rtx);
- if (TARGET_64BIT)
- cmp_insn = gen_cmpdi_1;
- else
- cmp_insn = gen_cmpsi_1;
- emit_insn (cmp_insn (countreg, countreg));
+ gen_cmp = (TARGET_64BIT
+ ? gen_cmpdi_1 : gen_cmpsi_1);
+
+ emit_insn (gen_cmp (countreg, countreg));
emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
operands[1], operands[2]));
}
@@ -16315,79 +16365,37 @@
;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
;;
;; in proper program order.
-(define_insn "pro_epilogue_adjust_stack_1"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (plus:SI (match_operand:SI 1 "register_operand" "0,r")
- (match_operand:SI 2 "immediate_operand" "i,i")))
- (clobber (reg:CC FLAGS_REG))
- (clobber (mem:BLK (scratch)))]
- "!TARGET_64BIT"
-{
- switch (get_attr_type (insn))
- {
- case TYPE_IMOV:
- return "mov{l}\t{%1, %0|%0, %1}";
-
- case TYPE_ALU:
- gcc_assert (rtx_equal_p (operands[0], operands[1]));
- if (x86_maybe_negate_const_int (&operands[2], SImode))
- return "sub{l}\t{%2, %0|%0, %2}";
- return "add{l}\t{%2, %0|%0, %2}";
-
- default:
- operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
- return "lea{l}\t{%a2, %0|%0, %a2}";
- }
-}
- [(set (attr "type")
- (cond [(and (eq_attr "alternative" "0")
- (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
- (const_string "alu")
- (match_operand:SI 2 "const0_operand" "")
- (const_string "imov")
- ]
- (const_string "lea")))
- (set (attr "length_immediate")
- (cond [(eq_attr "type" "imov")
- (const_string "0")
- (and (eq_attr "type" "alu")
- (match_operand 2 "const128_operand" ""))
- (const_string "1")
- ]
- (const_string "*")))
- (set_attr "mode" "SI")])
-
-(define_insn "pro_epilogue_adjust_stack_rex64"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (plus:DI (match_operand:DI 1 "register_operand" "0,r")
- (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
+(define_insn "pro_epilogue_adjust_stack_<mode>_1"
+ [(set (match_operand:P 0 "register_operand" "=r,r")
+ (plus:P (match_operand:P 1 "register_operand" "0,r")
+ (match_operand:P 2 "<immediate_operand>" "<i>,<i>")))
(clobber (reg:CC FLAGS_REG))
(clobber (mem:BLK (scratch)))]
- "TARGET_64BIT"
+ ""
{
switch (get_attr_type (insn))
{
case TYPE_IMOV:
- return "mov{q}\t{%1, %0|%0, %1}";
+ return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
case TYPE_ALU:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
- if (x86_maybe_negate_const_int (&operands[2], DImode))
- return "sub{q}\t{%2, %0|%0, %2}";
+ if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+ return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
- return "add{q}\t{%2, %0|%0, %2}";
+ return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
default:
operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
- return "lea{q}\t{%a2, %0|%0, %a2}";
+ return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}";
}
}
[(set (attr "type")
(cond [(and (eq_attr "alternative" "0")
- (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
(const_string "alu")
- (match_operand:DI 2 "const0_operand" "")
+ (match_operand:<MODE> 2 "const0_operand" "")
(const_string "imov")
]
(const_string "lea")))
@@ -16399,9 +16407,9 @@
(const_string "1")
]
(const_string "*")))
- (set_attr "mode" "DI")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "pro_epilogue_adjust_stack_rex64_2"
+(define_insn "pro_epilogue_adjust_stack_di_2"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(plus:DI (match_operand:DI 1 "register_operand" "0,r")
(match_operand:DI 3 "immediate_operand" "i,i")))
@@ -16432,7 +16440,7 @@
UNSPECV_STACK_PROBE))
(set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1)))
(clobber (reg:CC FLAGS_REG))]
- "!TARGET_64BIT && TARGET_STACK_PROBE"
+ "!TARGET_64BIT && ix86_target_stack_probe ()"
"call\t___chkstk"
[(set_attr "type" "multi")
(set_attr "length" "5")])
@@ -16445,7 +16453,7 @@
(clobber (reg:DI R10_REG))
(clobber (reg:DI R11_REG))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && TARGET_STACK_PROBE"
+ "TARGET_64BIT && ix86_target_stack_probe ()"
"call\t___chkstk"
[(set_attr "type" "multi")
(set_attr "length" "5")])
@@ -16453,7 +16461,7 @@
(define_expand "allocate_stack"
[(match_operand 0 "register_operand" "")
(match_operand 1 "general_operand" "")]
- "TARGET_STACK_PROBE"
+ "ix86_target_stack_probe ()"
{
rtx x;
@@ -16471,12 +16479,15 @@
}
else
{
- x = copy_to_mode_reg (Pmode, operands[1]);
+ rtx (*gen_allocate_stack_worker) (rtx, rtx);
+
if (TARGET_64BIT)
- x = gen_allocate_stack_worker_64 (x, x);
+ gen_allocate_stack_worker = gen_allocate_stack_worker_64;
else
- x = gen_allocate_stack_worker_32 (x, x);
- emit_insn (x);
+ gen_allocate_stack_worker = gen_allocate_stack_worker_32;
+
+ x = copy_to_mode_reg (Pmode, operands[1]);
+ emit_insn (gen_allocate_stack_worker (x, x));
}
emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
@@ -16488,13 +16499,37 @@
[(match_operand 0 "memory_operand" "")]
""
{
- if (GET_MODE (operands[0]) == DImode)
- emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx));
- else
- emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx));
+ rtx (*gen_ior3) (rtx, rtx, rtx);
+
+ gen_ior3 = (GET_MODE (operands[0]) == DImode
+ ? gen_iordi3 : gen_iorsi3);
+
+ emit_insn (gen_ior3 (operands[0], operands[0], const0_rtx));
DONE;
})
+(define_insn "adjust_stack_and_probe<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
+ UNSPECV_PROBE_STACK_RANGE))
+ (set (reg:P SP_REG)
+ (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))]
+ ""
+ "* return output_adjust_stack_and_probe (operands[0]);"
+ [(set_attr "type" "multi")])
+
+(define_insn "probe_stack_range<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+ (match_operand:P 2 "const_int_operand" "n")]
+ UNSPECV_PROBE_STACK_RANGE))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "* return output_probe_stack_range (operands[0], operands[2]);"
+ [(set_attr "type" "multi")])
+
(define_expand "builtin_setjmp_receiver"
[(label_ref (match_operand 0 "" ""))]
"!TARGET_64BIT && flag_pic"
@@ -17403,7 +17438,7 @@
[(set (match_dup 0)
(plus:SI (mult:SI (match_dup 1) (match_dup 2))
(match_dup 1)))]
- { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+ "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
(define_peephole2
[(parallel
@@ -17419,7 +17454,7 @@
(set (match_dup 0)
(plus:SI (mult:SI (match_dup 0) (match_dup 2))
(match_dup 0)))]
- { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+ "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
(define_peephole2
[(parallel
@@ -17434,7 +17469,7 @@
[(set (match_dup 0)
(plus:DI (mult:DI (match_dup 1) (match_dup 2))
(match_dup 1)))]
- { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+ "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
(define_peephole2
[(parallel
@@ -17451,7 +17486,7 @@
(set (match_dup 0)
(plus:DI (mult:DI (match_dup 0) (match_dup 2))
(match_dup 0)))]
- { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+ "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
;; Imul $32bit_imm, mem, reg is vector decoded, while
;; imul $32bit_imm, reg, reg is direct decoded.
@@ -17466,7 +17501,7 @@
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
-"")
+ "")
(define_peephole2
[(match_scratch:SI 3 "r")
@@ -17479,7 +17514,7 @@
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
-"")
+ "")
(define_peephole2
[(match_scratch:SI 3 "r")
@@ -17491,9 +17526,10 @@
"TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
&& !satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 1))
- (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
(clobber (reg:CC FLAGS_REG))])]
-"")
+ "")
;; imul $8/16bit_imm, regmem, reg is vector decoded.
;; Convert it into imul reg, reg
@@ -17575,6 +17611,8 @@
|| GET_MODE (operands[0]) == HImode))
|| GET_MODE (operands[0]) == SImode
|| (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+ && (rtx_equal_p (operands[0], operands[3])
+ || peep2_reg_dead_p (2, operands[0]))
/* We reorder load and the shift. */
&& !reg_overlap_mentioned_p (operands[0], operands[4])"
[(set (match_dup 5) (match_dup 4))
@@ -17785,178 +17823,6 @@
{ return ASM_SHORT "0x0b0f"; }
[(set_attr "length" "2")])
-(define_expand "sse_prologue_save"
- [(parallel [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (clobber (reg:CC FLAGS_REG))
- (clobber (match_operand:DI 1 "register_operand" ""))
- (use (match_operand:DI 2 "immediate_operand" ""))
- (use (label_ref:DI (match_operand 3 "" "")))
- (clobber (match_operand:DI 4 "register_operand" ""))
- (use (match_dup 1))])]
- "TARGET_64BIT"
- "")
-
-;; Pre-reload version of prologue save. Until after prologue generation we don't know
-;; what the size of save instruction will be.
-;; Operand 0+operand 6 is the memory save area
-;; Operand 1 is number of registers to save (will get overwritten to operand 5)
-;; Operand 2 is number of non-vaargs SSE arguments
-;; Operand 3 is label starting the save block
-;; Operand 4 is used for temporary computation of jump address
-(define_insn "*sse_prologue_save_insn1"
- [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
- (match_operand:DI 6 "const_int_operand" "n")))
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (clobber (reg:CC FLAGS_REG))
- (clobber (match_operand:DI 1 "register_operand" "=r"))
- (use (match_operand:DI 2 "const_int_operand" "i"))
- (use (label_ref:DI (match_operand 3 "" "X")))
- (clobber (match_operand:DI 4 "register_operand" "=&r"))
- (use (match_operand:DI 5 "register_operand" "1"))]
- "TARGET_64BIT
- && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
- && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
- "#"
- [(set_attr "type" "other")
- (set_attr "memory" "store")
- (set_attr "mode" "DI")])
-
-;; We know size of save instruction; expand the computation of jump address
-;; in the jumptable.
-(define_split
- [(parallel [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (clobber (reg:CC FLAGS_REG))
- (clobber (match_operand:DI 1 "register_operand" ""))
- (use (match_operand:DI 2 "const_int_operand" ""))
- (use (match_operand 3 "" ""))
- (clobber (match_operand:DI 4 "register_operand" ""))
- (use (match_operand:DI 5 "register_operand" ""))])]
- "reload_completed"
- [(parallel [(set (match_dup 0)
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
- (use (match_dup 1))
- (use (match_dup 2))
- (use (match_dup 3))
- (use (match_dup 5))])]
-{
- /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */
- int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
-
- /* Compute address to jump to:
- label - eax*size + nnamed_sse_arguments*size. */
- if (size == 5)
- emit_insn (gen_rtx_SET (VOIDmode, operands[4],
- gen_rtx_PLUS
- (Pmode,
- gen_rtx_MULT (Pmode, operands[1],
- GEN_INT (4)),
- operands[1])));
- else if (size == 4)
- emit_insn (gen_rtx_SET (VOIDmode, operands[4],
- gen_rtx_MULT (Pmode, operands[1],
- GEN_INT (4))));
- else
- gcc_unreachable ();
- if (INTVAL (operands[2]))
- emit_move_insn
- (operands[1],
- gen_rtx_CONST (DImode,
- gen_rtx_PLUS (DImode,
- operands[3],
- GEN_INT (INTVAL (operands[2])
- * size))));
- else
- emit_move_insn (operands[1], operands[3]);
- emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
- operands[5] = GEN_INT (size);
-})
-
-(define_insn "sse_prologue_save_insn"
- [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
- (match_operand:DI 4 "const_int_operand" "n")))
- (unspec:BLK [(reg:DI XMM0_REG)
- (reg:DI XMM1_REG)
- (reg:DI XMM2_REG)
- (reg:DI XMM3_REG)
- (reg:DI XMM4_REG)
- (reg:DI XMM5_REG)
- (reg:DI XMM6_REG)
- (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
- (use (match_operand:DI 1 "register_operand" "r"))
- (use (match_operand:DI 2 "const_int_operand" "i"))
- (use (label_ref:DI (match_operand 3 "" "X")))
- (use (match_operand:DI 5 "const_int_operand" "i"))]
- "TARGET_64BIT
- && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
- && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
-{
- int i;
- operands[0] = gen_rtx_MEM (Pmode,
- gen_rtx_PLUS (Pmode, operands[0], operands[4]));
- /* VEX instruction with a REX prefix will #UD. */
- if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
- gcc_unreachable ();
-
- output_asm_insn ("jmp\t%A1", operands);
- for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
- {
- operands[4] = adjust_address (operands[0], DImode, i*16);
- operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
- PUT_MODE (operands[4], TImode);
- if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
- output_asm_insn ("rex", operands);
- if (crtl->stack_alignment_needed < 128)
- output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
- else
- output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
- }
- (*targetm.asm_out.internal_label) (asm_out_file, "L",
- CODE_LABEL_NUMBER (operands[3]));
- return "";
-}
- [(set_attr "type" "other")
- (set_attr "length_immediate" "0")
- (set_attr "length_address" "0")
- ;; 2 bytes for jump and opernds[4] bytes for each save.
- (set (attr "length")
- (plus (const_int 2)
- (mult (symbol_ref ("INTVAL (operands[5])"))
- (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
- (set_attr "memory" "store")
- (set_attr "modrm" "0")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "DI")])
-
(define_expand "prefetch"
[(prefetch (match_operand 0 "address_operand" "")
(match_operand:SI 1 "const_int_operand" "")
@@ -18444,6 +18310,71 @@
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
+(define_insn "rdfsbase<mode>"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDFSBASE))]
+ "TARGET_64BIT && TARGET_FSGSBASE"
+ "rdfsbase %0"
+ [(set_attr "type" "other")
+ (set_attr "prefix_extra" "2")])
+
+(define_insn "rdgsbase<mode>"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDGSBASE))]
+ "TARGET_64BIT && TARGET_FSGSBASE"
+ "rdgsbase %0"
+ [(set_attr "type" "other")
+ (set_attr "prefix_extra" "2")])
+
+(define_insn "wrfsbase<mode>"
+ [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+ UNSPECV_WRFSBASE)]
+ "TARGET_64BIT && TARGET_FSGSBASE"
+ "wrfsbase %0"
+ [(set_attr "type" "other")
+ (set_attr "prefix_extra" "2")])
+
+(define_insn "wrgsbase<mode>"
+ [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+ UNSPECV_WRGSBASE)]
+ "TARGET_64BIT && TARGET_FSGSBASE"
+ "wrgsbase %0"
+ [(set_attr "type" "other")
+ (set_attr "prefix_extra" "2")])
+
+(define_expand "rdrand<mode>"
+ [(set (match_operand:SWI248 0 "register_operand" "=r")
+ (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))]
+ "TARGET_RDRND"
+{
+ rtx retry_label, insn, ccc;
+
+ retry_label = gen_label_rtx ();
+
+ emit_label (retry_label);
+
+ /* Generate rdrand. */
+ emit_insn (gen_rdrand<mode>_1 (operands[0]));
+
+ /* Retry if the carry flag isn't valid. */
+ ccc = gen_rtx_REG (CCCmode, FLAGS_REG);
+ ccc = gen_rtx_EQ (VOIDmode, ccc, const0_rtx);
+ ccc = gen_rtx_IF_THEN_ELSE (VOIDmode, ccc, pc_rtx,
+ gen_rtx_LABEL_REF (VOIDmode, retry_label));
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, ccc));
+ JUMP_LABEL (insn) = retry_label;
+
+ DONE;
+})
+
+(define_insn "rdrand<mode>_1"
+ [(set (match_operand:SWI248 0 "register_operand" "=r")
+ (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))]
+ "TARGET_RDRND"
+ "rdrand %0"
+ [(set_attr "type" "other")
+ (set_attr "prefix_extra" "1")])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 0afdd1197f6..f264c42a45d 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -363,3 +363,15 @@ Support PCLMUL built-in functions and code generation
msse2avx
Target Report Var(ix86_sse2avx)
Encode SSE instructions with VEX prefix
+
+mfsgsbase
+Target Report Mask(ISA_FSGSBASE) Var(ix86_isa_flags) VarExists Save
+Support FSGSBASE built-in functions and code generation
+
+mrdrnd
+Target Report Mask(ISA_RDRND) Var(ix86_isa_flags) VarExists Save
+Support RDRND built-in functions and code generation
+
+mf16c
+Target Report Mask(ISA_F16C) Var(ix86_isa_flags) VarExists Save
+Support F16C built-in functions and code generation
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 7a2b9b9c63e..3e69060700a 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -56,4 +56,148 @@
#include <avxintrin.h>
#endif
+#ifdef __RDRND__
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand_u16 (void)
+{
+ return __builtin_ia32_rdrand16 ();
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand_u32 (void)
+{
+ return __builtin_ia32_rdrand32 ();
+}
+#endif /* __RDRND__ */
+
+#ifdef __x86_64__
+#ifdef __FSGSBASE__
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u32 (void)
+{
+ return __builtin_ia32_rdfsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u64 (void)
+{
+ return __builtin_ia32_rdfsbase64 ();
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u32 (void)
+{
+ return __builtin_ia32_rdgsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u64 (void)
+{
+ return __builtin_ia32_rdgsbase64 ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u32 (unsigned int __B)
+{
+ __builtin_ia32_wrfsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u64 (unsigned long long __B)
+{
+ __builtin_ia32_wrfsbase64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u32 (unsigned int __B)
+{
+ __builtin_ia32_wrgsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u64 (unsigned long long __B)
+{
+ __builtin_ia32_wrgsbase64 (__B);
+}
+#endif /* __FSGSBASE__ */
+
+#ifdef __RDRND__
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand_u64 (void)
+{
+ return __builtin_ia32_rdrand64 ();
+}
+#endif /* __RDRND__ */
+#endif /* __x86_64__ */
+
+#ifdef __F16C__
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtsh_ss (unsigned short __S)
+{
+ __v8hi __H = __extension__ (__v8hi){ __S, 0, 0, 0, 0, 0, 0, 0 };
+ __v4sf __A = __builtin_ia32_vcvtph2ps (__H);
+ return __builtin_ia32_vec_ext_v4sf (__A, 0);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_ps (__m128i __A)
+{
+ return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtss_sh (float __F, const int __I)
+{
+ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
+ __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
+ return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_ph (__m128 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_ph (__m256 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
+}
+#else
+#define _cvtss_sh(__F, __I) \
+ (__extension__ \
+ ({ \
+ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; \
+ __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); \
+ (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); \
+ }))
+
+#define _mm_cvtps_ph(A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I)))
+
+#define _mm256_cvtps_ph(A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I)))
+#endif
+
+#endif /* __F16C__ */
+
#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h
index 2a31880e6d6..81dfd1e2509 100644
--- a/gcc/config/i386/linux.h
+++ b/gcc/config/i386/linux.h
@@ -209,6 +209,9 @@ along with GCC; see the file COPYING3. If not see
/* The stack pointer needs to be moved while checking the stack. */
#define STACK_CHECK_MOVING_SP 1
+/* Static stack checking is supported by means of probes. */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
/* This macro may be overridden in i386/k*bsd-gnu.h. */
#define REG_NAME(reg) reg
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index 867de59a5ec..33b4dc9cd8f 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -112,6 +112,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
/* The stack pointer needs to be moved while checking the stack. */
#define STACK_CHECK_MOVING_SP 1
+/* Static stack checking is supported by means of probes. */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
/* This macro may be overridden in i386/k*bsd-gnu.h. */
#define REG_NAME(reg) reg
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
index e08ea1cd90c..264abf1b148 100644
--- a/gcc/config/i386/mingw32.h
+++ b/gcc/config/i386/mingw32.h
@@ -245,5 +245,5 @@ __enable_execute_stack (void *addr) \
#define LIBGCC_SONAME "libgcc_s" LIBGCC_EH_EXTN "-1.dll"
/* We should find a way to not have to update this manually. */
-#define LIBGCJ_SONAME "libgcj" /*LIBGCC_EH_EXTN*/ "-11.dll"
+#define LIBGCJ_SONAME "libgcj" /*LIBGCC_EH_EXTN*/ "-12.dll"
diff --git a/gcc/config/i386/netware.c b/gcc/config/i386/netware.c
index fa9ffbbeac1..555571adb44 100644
--- a/gcc/config/i386/netware.c
+++ b/gcc/config/i386/netware.c
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree.h"
#include "flags.h"
#include "tm_p.h"
+#include "diagnostic-core.h"
#include "toplev.h"
#include "langhooks.h"
#include "ggc.h"
diff --git a/gcc/config/i386/nwld.c b/gcc/config/i386/nwld.c
index 9ae2aad3d35..ac0da7c3f00 100644
--- a/gcc/config/i386/nwld.c
+++ b/gcc/config/i386/nwld.c
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree.h"
#include "flags.h"
#include "tm_p.h"
+#include "diagnostic-core.h"
#include "toplev.h"
void
diff --git a/gcc/config/i386/sol2-10.h b/gcc/config/i386/sol2-10.h
index d57ed7f9d69..c7fdec9a1a3 100644
--- a/gcc/config/i386/sol2-10.h
+++ b/gcc/config/i386/sol2-10.h
@@ -145,7 +145,3 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_ASM_NAMED_SECTION
#define TARGET_ASM_NAMED_SECTION i386_solaris_elf_named_section
-
-#undef SUBTARGET_RETURN_IN_MEMORY
-#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
- ix86_sol10_return_in_memory (TYPE, FNTYPE)
diff --git a/gcc/config/i386/sol2-unwind.h b/gcc/config/i386/sol2-unwind.h
index 41ffb03b6cd..d93b60c781c 100644
--- a/gcc/config/i386/sol2-unwind.h
+++ b/gcc/config/i386/sol2-unwind.h
@@ -1,5 +1,5 @@
/* DWARF2 EH unwinding support for AMD x86-64 and x86.
- Copyright (C) 2009 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010 Free Software Foundation, Inc.
This file is part of GCC.
@@ -26,6 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
state data appropriately. See unwind-dw2.c for the structs. */
#include <ucontext.h>
+#include <sys/frame.h>
#ifdef __x86_64__
@@ -39,7 +40,7 @@ x86_64_fallback_frame_state (struct _Unwind_Context *context,
mcontext_t *mctx;
long new_cfa;
- if (/* Solaris 2.10
+ if (/* Solaris 10+
------------
<__sighndlr+0>: push %rbp
<__sighndlr+1>: mov %rsp,%rbp
@@ -47,15 +48,41 @@ x86_64_fallback_frame_state (struct _Unwind_Context *context,
<__sighndlr+6>: leaveq <--- PC
<__sighndlr+7>: retq */
*(unsigned long *)(pc - 6) == 0xc3c9d1ffe5894855)
- /* We need to move up four frames (the kernel frame, the signal frame,
- the call_user_handler frame and the __sighndlr frame). Two of them
- have the minimum stack frame size (kernel and __sighndlr frames),
- the signal frame has a stack frame size of 32 and there is another
- with a stack frame size of 112 bytes (the call_user_handler frame).
- The ucontext_t structure is after this offset. */
+
+ /* We need to move up three frames:
+
+ <signal handler> <-- context->cfa
+ __sighndlr
+ call_user_handler
+ sigacthandler
+ <kernel>
+
+ context->cfa points into the frame after the saved frame pointer and
+ saved pc (struct frame).
+
+ The ucontext_t structure is in the kernel frame after the signal
+ number and a siginfo_t *. Since the frame sizes vary even within
+ Solaris 10 updates, we need to walk the stack to get there. */
{
- int off = 16 + 16 + 32 + 112;
- mctx = &((ucontext_t *) (context->cfa + off))->uc_mcontext;
+ struct frame *fp = (struct frame *) context->cfa - 1;
+ struct handler_args {
+ int signo;
+ siginfo_t *sip;
+ ucontext_t ucontext;
+ } *handler_args;
+ ucontext_t *ucp;
+
+ /* Next frame: __sighndlr frame pointer. */
+ fp = (struct frame *) fp->fr_savfp;
+ /* call_user_handler frame pointer. */
+ fp = (struct frame *) fp->fr_savfp;
+ /* sigacthandler frame pointer. */
+ fp = (struct frame *) fp->fr_savfp;
+
+ /* The argument area precedes the struct frame. */
+ handler_args = (struct handler_args *) (fp + 1);
+ ucp = &handler_args->ucontext;
+ mctx = &ucp->uc_mcontext;
}
else
return _URC_END_OF_STACK;
@@ -117,8 +144,8 @@ x86_fallback_frame_state (struct _Unwind_Context *context,
mcontext_t *mctx;
long new_cfa;
- if (/* Solaris 2.8 - single thread
- -------------------------
+ if (/* Solaris 8 - single-threaded
+ ----------------------------
<sigacthandler+17>: mov 0x10(%ebp),%esi
<sigacthandler+20>: push %esi
<sigacthandler+21>: pushl 0xc(%ebp)
@@ -135,7 +162,7 @@ x86_fallback_frame_state (struct _Unwind_Context *context,
&& *(unsigned long *)(pc - 4) == 0x8814ff00
&& *(unsigned long *)(pc - 0) == 0x560cc483)
- || /* Solaris 2.8 - multi thread
+ || /* Solaris 8 - multi-threaded
---------------------------
<__sighndlr+0>: push %ebp
<__sighndlr+1>: mov %esp,%ebp
@@ -149,8 +176,26 @@ x86_fallback_frame_state (struct _Unwind_Context *context,
&& *(unsigned long *)(pc - 7) == 0x0875ff0c
&& *(unsigned long *)(pc - 3) == 0xc91455ff)
- || /* Solaris 2.10
- ------------
+ || /* Solaris 9 - single-threaded
+ ----------------------------
+ <sigacthandler+16>: mov 0x244(%ebx),%ecx
+ <sigacthandler+22>: mov 0x8(%ebp),%eax
+ <sigacthandler+25>: mov (%ecx,%eax,4),%ecx
+ <sigacthandler+28>: pushl 0x10(%ebp)
+ <sigacthandler+31>: pushl 0xc(%ebp)
+ <sigacthandler+34>: push %eax
+ <sigacthandler+35>: call *%ecx
+ <sigacthandler+37>: add $0xc,%esp <--- PC
+ <sigacthandler+40>: pushl 0x10(%ebp) */
+ (*(unsigned long *)(pc - 21) == 0x2448b8b
+ && *(unsigned long *)(pc - 17) == 0x458b0000
+ && *(unsigned long *)(pc - 13) == 0x810c8b08
+ && *(unsigned long *)(pc - 9) == 0xff1075ff
+ && *(unsigned long *)(pc - 5) == 0xff500c75
+ && *(unsigned long *)(pc - 1) == 0xcc483d1)
+
+ || /* Solaris 9 - multi-threaded, Solaris 10
+ ---------------------------------------
<__sighndlr+0>: push %ebp
<__sighndlr+1>: mov %esp,%ebp
<__sighndlr+3>: pushl 0x10(%ebp)
@@ -164,7 +209,43 @@ x86_fallback_frame_state (struct _Unwind_Context *context,
&& *(unsigned long *)(pc - 11) == 0x75ff1075
&& *(unsigned long *)(pc - 7) == 0x0875ff0c
&& *(unsigned long *)(pc - 3) == 0x831455ff
- && *(unsigned long *)(pc + 1) == 0xc3c90cc4))
+ && *(unsigned long *)(pc + 1) == 0xc3c90cc4)
+
+ || /* Solaris 11 before snv_125
+ --------------------------
+ <__sighndlr+0> push %ebp
+ <__sighndlr+1> mov %esp,%ebp
+ <__sighndlr+4> pushl 0x10(%ebp)
+ <__sighndlr+6> pushl 0xc(%ebp)
+ <__sighndlr+9> pushl 0x8(%ebp)
+ <__sighndlr+12> call *0x14(%ebp)
+ <__sighndlr+15> add $0xc,%esp
+ <__sighndlr+18> leave <--- PC
+ <__sighndlr+19> ret */
+ (*(unsigned long *)(pc - 18) == 0xffec8b55
+ && *(unsigned long *)(pc - 14) == 0x7fff107f
+ && *(unsigned long *)(pc - 10) == 0x0875ff0c
+ && *(unsigned long *)(pc - 6) == 0x83145fff
+ && *(unsigned long *)(pc - 1) == 0xc3c90cc4)
+
+ || /* Solaris 11 since snv_125
+ -------------------------
+ <__sighndlr+0> push %ebp
+ <__sighndlr+1> mov %esp,%ebp
+ <__sighndlr+3> and $0xfffffff0,%esp
+ <__sighndlr+6> sub $0x4,%esp
+ <__sighndlr+9> pushl 0x10(%ebp)
+ <__sighndlr+12> pushl 0xc(%ebp)
+ <__sighndlr+15> pushl 0x8(%ebp)
+ <__sighndlr+18> call *0x14(%ebp)
+ <__sighndlr+21> leave <--- PC
+ <__sighndlr+22> ret */
+ (*(unsigned long *)(pc - 21) == 0x83ec8b55
+ && *(unsigned long *)(pc - 17) == 0xec83f0e4
+ && *(unsigned long *)(pc - 13) == 0x1075ff04
+ && *(unsigned long *)(pc - 9) == 0xff0c75ff
+ && *(unsigned long *)(pc - 5) == 0x55ff0875
+ && (*(unsigned long *)(pc - 1) & 0x00ffffff) == 0x00c3c914))
{
struct handler_args {
int signo;
diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h
index 11eaa15dd80..57f75ea5092 100644
--- a/gcc/config/i386/sol2.h
+++ b/gcc/config/i386/sol2.h
@@ -140,6 +140,10 @@ along with GCC; see the file COPYING3. If not see
/* Register the Solaris-specific #pragma directives. */
#define REGISTER_SUBTARGET_PRAGMAS() solaris_register_pragmas ()
+#undef SUBTARGET_RETURN_IN_MEMORY
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+ ix86_solaris_return_in_memory (TYPE, FNTYPE)
+
/* Output a simple call for .init/.fini. */
#define ASM_OUTPUT_CALL(FILE, FN) \
do \
@@ -154,6 +158,9 @@ along with GCC; see the file COPYING3. If not see
#undef X86_FILE_START_VERSION_DIRECTIVE
#define X86_FILE_START_VERSION_DIRECTIVE false
+/* Static stack checking is supported by means of probes. */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
/* Only recent versions of Solaris 11 ld properly support hidden .gnu.linkonce
sections, so don't use them. */
#ifndef TARGET_GNU_LD
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f5e716f2149..5ac0da875b6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12322,3 +12322,81 @@
(set_attr "length_immediate" "1,*")
(set_attr "prefix" "vex")
(set_attr "mode" "<avxvecmode>")])
+
+(define_insn "vcvtph2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
+ UNSPEC_VCVTPH2PS)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 1) (const_int 2)])))]
+ "TARGET_F16C"
+ "vcvtph2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vcvtph2ps_load"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
+ UNSPEC_VCVTPH2PS))]
+ "TARGET_F16C"
+ "vcvtph2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vcvtph2ps256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_VCVTPH2PS))]
+ "TARGET_F16C"
+ "vcvtph2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_expand "vcvtps2ph"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (vec_concat:V8HI
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")]
+ UNSPEC_VCVTPS2PH)
+ (match_dup 3)))]
+ "TARGET_F16C"
+ "operands[3] = CONST0_RTX (V4HImode);")
+
+(define_insn "*vcvtps2ph"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:SI 2 "immediate_operand" "N")]
+ UNSPEC_VCVTPS2PH)
+ (match_operand:V4HI 3 "const0_operand" "")))]
+ "TARGET_F16C"
+ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vcvtps2ph_store"
+ [(set (match_operand:V4HI 0 "memory_operand" "=m")
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:SI 2 "immediate_operand" "N")]
+ UNSPEC_VCVTPS2PH))]
+ "TARGET_F16C"
+ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "vcvtps2ph256"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
+ (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:SI 2 "immediate_operand" "N")]
+ UNSPEC_VCVTPS2PH))]
+ "TARGET_F16C"
+ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
diff --git a/gcc/config/i386/vx-common.h b/gcc/config/i386/vx-common.h
index b4bea845fcd..cc7ea75ce40 100644
--- a/gcc/config/i386/vx-common.h
+++ b/gcc/config/i386/vx-common.h
@@ -1,5 +1,5 @@
/* IA32 VxWorks and VxWorks AE target definitions.
- Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc.
This file is part of GCC.
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. If not see
#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
-/* VxWorks uses the same ABI as Solaris 10. */
+/* VxWorks uses the same ABI as Solaris 2. */
#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
- ix86_sol10_return_in_memory (TYPE, FNTYPE)
+ ix86_solaris_return_in_memory (TYPE, FNTYPE)
diff --git a/gcc/config/i386/winnt-cxx.c b/gcc/config/i386/winnt-cxx.c
index 9ae7ceda8a2..9191c08a676 100644
--- a/gcc/config/i386/winnt-cxx.c
+++ b/gcc/config/i386/winnt-cxx.c
@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see
#include "cp/cp-tree.h" /* This is why we're a separate module. */
#include "flags.h"
#include "tm_p.h"
+#include "diagnostic-core.h"
#include "toplev.h"
#include "hashtab.h"
@@ -98,12 +99,12 @@ i386_pe_adjust_class_at_definition (tree t)
if (lookup_attribute ("dllexport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
{
/* Check static VAR_DECL's. */
- for (member = TYPE_FIELDS (t); member; member = TREE_CHAIN (member))
+ for (member = TYPE_FIELDS (t); member; member = DECL_CHAIN (member))
if (TREE_CODE (member) == VAR_DECL)
maybe_add_dllexport (member);
/* Check FUNCTION_DECL's. */
- for (member = TYPE_METHODS (t); member; member = TREE_CHAIN (member))
+ for (member = TYPE_METHODS (t); member; member = DECL_CHAIN (member))
if (TREE_CODE (member) == FUNCTION_DECL)
{
tree thunk;
@@ -115,7 +116,7 @@ i386_pe_adjust_class_at_definition (tree t)
maybe_add_dllexport (thunk);
}
/* Check vtables */
- for (member = CLASSTYPE_VTABLES (t); member; member = TREE_CHAIN (member))
+ for (member = CLASSTYPE_VTABLES (t); member; member = DECL_CHAIN (member))
if (TREE_CODE (member) == VAR_DECL)
maybe_add_dllexport (member);
}
@@ -131,12 +132,12 @@ i386_pe_adjust_class_at_definition (tree t)
definition. */
/* Check static VAR_DECL's. */
- for (member = TYPE_FIELDS (t); member; member = TREE_CHAIN (member))
+ for (member = TYPE_FIELDS (t); member; member = DECL_CHAIN (member))
if (TREE_CODE (member) == VAR_DECL)
maybe_add_dllimport (member);
/* Check FUNCTION_DECL's. */
- for (member = TYPE_METHODS (t); member; member = TREE_CHAIN (member))
+ for (member = TYPE_METHODS (t); member; member = DECL_CHAIN (member))
if (TREE_CODE (member) == FUNCTION_DECL)
{
tree thunk;
@@ -144,12 +145,12 @@ i386_pe_adjust_class_at_definition (tree t)
/* Also add the attribute to its thunks. */
for (thunk = DECL_THUNKS (member); thunk;
- thunk = TREE_CHAIN (thunk))
+ thunk = DECL_CHAIN (thunk))
maybe_add_dllimport (thunk);
}
/* Check vtables */
- for (member = CLASSTYPE_VTABLES (t); member; member = TREE_CHAIN (member))
+ for (member = CLASSTYPE_VTABLES (t); member; member = DECL_CHAIN (member))
if (TREE_CODE (member) == VAR_DECL)
maybe_add_dllimport (member);
diff --git a/gcc/config/i386/winnt-stubs.c b/gcc/config/i386/winnt-stubs.c
index a9c7cd7398f..f075e9626dc 100644
--- a/gcc/config/i386/winnt-stubs.c
+++ b/gcc/config/i386/winnt-stubs.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree.h"
#include "flags.h"
#include "tm_p.h"
+#include "diagnostic-core.h"
#include "toplev.h"
#include "hashtab.h"
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index c20a2ae89fc..60a8b79d64a 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree.h"
#include "flags.h"
#include "tm_p.h"
+#include "diagnostic-core.h"
#include "toplev.h"
#include "hashtab.h"
#include "langhooks.h"
@@ -722,7 +723,7 @@ i386_pe_file_end (void)
drectve_section ();
for (q = export_head; q != NULL; q = q->next)
{
- fprintf (asm_out_file, "\t.ascii \" -export:%s%s\"\n",
+ fprintf (asm_out_file, "\t.ascii \" -export:\\\"%s\\\"%s\"\n",
default_strip_name_encoding (q->name),
(q->is_data ? ",data" : ""));
}