diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/darwin.h | 7 | ||||
-rw-r--r-- | gcc/config/i386/emmintrin.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.c | 344 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 8 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 1859 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 261 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 501 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 173 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 8 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 175 | ||||
-rw-r--r-- | gcc/config/i386/t-i386 | 13 | ||||
-rw-r--r-- | gcc/config/i386/winnt.c | 6 |
12 files changed, 2514 insertions, 843 deletions
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h index 6001f64b42a..a1defcf9bc6 100644 --- a/gcc/config/i386/darwin.h +++ b/gcc/config/i386/darwin.h @@ -75,6 +75,9 @@ along with GCC; see the file COPYING3. If not see #undef STACK_BOUNDARY #define STACK_BOUNDARY 128 +#undef MAIN_STACK_BOUNDARY +#define MAIN_STACK_BOUNDARY 128 + /* Since we'll never want a stack boundary less aligned than 128 bits we need the extra work here otherwise bits of gcc get very grumpy when we ask for lower alignment. We could just reject values less @@ -263,8 +266,8 @@ extern void darwin_x86_file_end (void); : (n) >= 11 && (n) <= 18 ? (n) + 1 \ : (n)) -#undef REGISTER_TARGET_PRAGMAS -#define REGISTER_TARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS() +#undef REGISTER_SUBTARGET_PRAGMAS +#define REGISTER_SUBTARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS() #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 933dcd61e63..c6590dce4d4 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -726,7 +726,7 @@ _mm_movpi64_epi64 (__m64 __A) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_epi64 (__m128i __A) { - return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A)); + return (__m128i)__builtin_ia32_movq128 ((__v2di) __A); } /* Create a vector of zeros. */ diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c new file mode 100644 index 00000000000..f0a3a17f9f6 --- /dev/null +++ b/gcc/config/i386/i386-c.c @@ -0,0 +1,344 @@ +/* Subroutines used for macro/preprocessor support on the ia-32. + Copyright (C) 2008 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "flags.h" +#include "c-common.h" +#include "ggc.h" +#include "target.h" +#include "target-def.h" +#include "cpplib.h" +#include "c-pragma.h" + +static bool ix86_pragma_option_parse (tree); +static void ix86_target_macros_internal + (int, enum processor_type, enum processor_type, enum fpmath_unit, + void (*def_or_undef) (cpp_reader *, const char *)); + + +/* Internal function to either define or undef the appropriate system + macros. */ +static void +ix86_target_macros_internal (int isa_flag, + enum processor_type arch, + enum processor_type tune, + enum fpmath_unit fpmath, + void (*def_or_undef) (cpp_reader *, + const char *)) +{ + /* For some of the k6/pentium varients there weren't seperate ISA bits to + identify which tune/arch flag was passed, so figure it out here. */ + size_t arch_len = strlen (ix86_arch_string); + size_t tune_len = strlen (ix86_tune_string); + int last_arch_char = ix86_arch_string[arch_len - 1]; + int last_tune_char = ix86_tune_string[tune_len - 1]; + + /* Built-ins based on -march=. */ + switch (arch) + { + case PROCESSOR_I386: + break; + case PROCESSOR_I486: + def_or_undef (parse_in, "__i486"); + def_or_undef (parse_in, "__i486__"); + break; + case PROCESSOR_PENTIUM: + def_or_undef (parse_in, "__i586"); + def_or_undef (parse_in, "__i586__"); + def_or_undef (parse_in, "__pentium"); + def_or_undef (parse_in, "__pentium__"); + if (isa_flag & OPTION_MASK_ISA_MMX) + def_or_undef (parse_in, "__pentium_mmx__"); + break; + case PROCESSOR_PENTIUMPRO: + def_or_undef (parse_in, "__i686"); + def_or_undef (parse_in, "__i686__"); + def_or_undef (parse_in, "__pentiumpro"); + def_or_undef (parse_in, "__pentiumpro__"); + break; + case PROCESSOR_GEODE: + def_or_undef (parse_in, "__geode"); + def_or_undef (parse_in, "__geode__"); + break; + case PROCESSOR_K6: + def_or_undef (parse_in, "__k6"); + def_or_undef (parse_in, "__k6__"); + if (last_arch_char == '2') + def_or_undef (parse_in, "__k6_2__"); + else if (last_arch_char == '3') + def_or_undef (parse_in, "__k6_3__"); + else if (isa_flag & OPTION_MASK_ISA_3DNOW) + def_or_undef (parse_in, "__k6_3__"); + break; + case PROCESSOR_ATHLON: + def_or_undef (parse_in, "__athlon"); + def_or_undef (parse_in, "__athlon__"); + if (isa_flag & OPTION_MASK_ISA_SSE) + def_or_undef (parse_in, "__athlon_sse__"); + break; + case PROCESSOR_K8: + def_or_undef (parse_in, "__k8"); + def_or_undef (parse_in, "__k8__"); + break; + case PROCESSOR_AMDFAM10: + def_or_undef (parse_in, "__amdfam10"); + def_or_undef (parse_in, "__amdfam10__"); + break; + case PROCESSOR_PENTIUM4: + def_or_undef (parse_in, "__pentium4"); + def_or_undef (parse_in, "__pentium4__"); + break; + case PROCESSOR_NOCONA: + def_or_undef (parse_in, "__nocona"); + def_or_undef (parse_in, "__nocona__"); + break; + case PROCESSOR_CORE2: + def_or_undef (parse_in, "__core2"); + def_or_undef (parse_in, "__core2__"); + break; + /* use PROCESSOR_max to not set/unset the arch macro. */ + case PROCESSOR_max: + break; + case PROCESSOR_GENERIC32: + case PROCESSOR_GENERIC64: + gcc_unreachable (); + } + + /* Built-ins based on -mtune=. */ + switch (tune) + { + case PROCESSOR_I386: + def_or_undef (parse_in, "__tune_i386__"); + break; + case PROCESSOR_I486: + def_or_undef (parse_in, "__tune_i486__"); + break; + case PROCESSOR_PENTIUM: + def_or_undef (parse_in, "__tune_i586__"); + def_or_undef (parse_in, "__tune_pentium__"); + if (last_tune_char == 'x') + def_or_undef (parse_in, "__tune_pentium_mmx__"); + break; + case PROCESSOR_PENTIUMPRO: + def_or_undef (parse_in, "__tune_i686__"); + def_or_undef (parse_in, "__tune_pentiumpro__"); + switch (last_tune_char) + { + case '3': + def_or_undef (parse_in, "__tune_pentium3__"); + /* FALLTHRU */ + case '2': + def_or_undef (parse_in, "__tune_pentium2__"); + break; + } + break; + case PROCESSOR_GEODE: + def_or_undef (parse_in, "__tune_geode__"); + break; + case PROCESSOR_K6: + def_or_undef (parse_in, "__tune_k6__"); + if (last_tune_char == '2') + def_or_undef (parse_in, "__tune_k6_2__"); + else if (last_tune_char == '3') + def_or_undef (parse_in, "__tune_k6_3__"); + else if (isa_flag & OPTION_MASK_ISA_3DNOW) + def_or_undef (parse_in, "__tune_k6_3__"); + break; + case PROCESSOR_ATHLON: + def_or_undef (parse_in, "__tune_athlon__"); + if (isa_flag & OPTION_MASK_ISA_SSE) + def_or_undef (parse_in, "__tune_athlon_sse__"); + break; + case PROCESSOR_K8: + def_or_undef (parse_in, "__tune_k8__"); + break; + case PROCESSOR_AMDFAM10: + def_or_undef (parse_in, "__tune_amdfam10__"); + break; + case PROCESSOR_PENTIUM4: + def_or_undef (parse_in, "__tune_pentium4__"); + break; + case PROCESSOR_NOCONA: + def_or_undef (parse_in, "__tune_nocona__"); + break; + case PROCESSOR_CORE2: + def_or_undef (parse_in, "__tune_core2__"); + break; + case PROCESSOR_GENERIC32: + case PROCESSOR_GENERIC64: + break; + /* use PROCESSOR_max to not set/unset the tune macro. */ + case PROCESSOR_max: + break; + } + + if (isa_flag & OPTION_MASK_ISA_MMX) + def_or_undef (parse_in, "__MMX__"); + if (isa_flag & OPTION_MASK_ISA_3DNOW) + def_or_undef (parse_in, "__3dNOW__"); + if (isa_flag & OPTION_MASK_ISA_3DNOW_A) + def_or_undef (parse_in, "__3dNOW_A__"); + if (isa_flag & OPTION_MASK_ISA_SSE) + def_or_undef (parse_in, "__SSE__"); + if (isa_flag & OPTION_MASK_ISA_SSE2) + def_or_undef (parse_in, "__SSE2__"); + if (isa_flag & OPTION_MASK_ISA_SSE3) + def_or_undef (parse_in, "__SSE3__"); + if (isa_flag & OPTION_MASK_ISA_SSSE3) + def_or_undef (parse_in, "__SSSE3__"); + if (isa_flag & OPTION_MASK_ISA_SSE4_1) + def_or_undef (parse_in, "__SSE4_1__"); + if (isa_flag & OPTION_MASK_ISA_SSE4_2) + def_or_undef (parse_in, "__SSE4_2__"); + if (isa_flag & OPTION_MASK_ISA_AES) + def_or_undef (parse_in, "__AES__"); + if (isa_flag & OPTION_MASK_ISA_PCLMUL) + def_or_undef (parse_in, "__PCLMUL__"); + if (isa_flag & OPTION_MASK_ISA_SSE4A) + def_or_undef (parse_in, "__SSE4A__"); + if (isa_flag & OPTION_MASK_ISA_SSE5) + def_or_undef (parse_in, "__SSE5__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) + def_or_undef (parse_in, "__SSE_MATH__"); + if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) + def_or_undef (parse_in, "__SSE2_MATH__"); +} + + +/* Hook to validate the current #pragma option and set the state, and update + the macros based on what was changed. */ + +static bool +ix86_pragma_option_parse (tree args) +{ + tree prev_tree = build_target_option_node (); + tree cur_tree; + struct cl_target_option *prev_opt; + struct cl_target_option *cur_opt; + int prev_isa; + int cur_isa; + int diff_isa; + enum processor_type prev_arch; + enum processor_type prev_tune; + enum processor_type cur_arch; + enum processor_type cur_tune; + + if (! args) + { + cur_tree = target_option_default_node; + cl_target_option_restore (TREE_TARGET_OPTION (cur_tree)); + } + else + { + cur_tree = ix86_valid_option_attribute_tree (args); + if (!cur_tree) + return false; + } + + target_option_current_node = cur_tree; + + /* Figure out the previous/current isa, arch, tune and the differences. */ + prev_opt = TREE_TARGET_OPTION (prev_tree); + cur_opt = TREE_TARGET_OPTION (cur_tree); + prev_isa = prev_opt->ix86_isa_flags; + cur_isa = cur_opt->ix86_isa_flags; + diff_isa = (prev_isa ^ cur_isa); + prev_arch = prev_opt->arch; + prev_tune = prev_opt->tune; + cur_arch = cur_opt->arch; + cur_tune = cur_opt->tune; + + /* If the same processor is used for both previous and current options, don't + change the macros. */ + if (cur_arch == prev_arch) + cur_arch = prev_arch = PROCESSOR_max; + + if (cur_tune == prev_tune) + cur_tune = prev_tune = PROCESSOR_max; + + /* Undef all of the macros for that are no longer current. */ + ix86_target_macros_internal (prev_isa & diff_isa, + prev_arch, + prev_tune, + prev_opt->fpmath, + cpp_undef); + + /* Define all of the macros for new options that were just turned on. */ + ix86_target_macros_internal (cur_isa & diff_isa, + cur_arch, + cur_tune, + cur_opt->fpmath, + cpp_define); + + return true; +} + +/* Function to tell the preprocessor about the defines for the current target. */ + +void +ix86_target_macros (void) +{ + /* 32/64-bit won't change with target specific options, so do the assert and + builtin_define_std calls here. */ + if (TARGET_64BIT) + { + cpp_assert (parse_in, "cpu=x86_64"); + cpp_assert (parse_in, "machine=x86_64"); + cpp_define (parse_in, "__amd64"); + cpp_define (parse_in, "__amd64__"); + cpp_define (parse_in, "__x86_64"); + cpp_define (parse_in, "__x86_64__"); + } + else + { + cpp_assert (parse_in, "cpu=i386"); + cpp_assert (parse_in, "machine=i386"); + builtin_define_std ("i386"); + } + + ix86_target_macros_internal (ix86_isa_flags, + ix86_arch, + ix86_tune, + ix86_fpmath, + cpp_define); +} + + +/* Register target pragmas. We need to add the hook for parsing #pragma GCC + option here rather than in i386.c since it will pull in various preprocessor + functions, and those are not present in languages like fortran without a + preprocessor. */ + +void +ix86_register_pragmas (void) +{ + /* Update pragma hook to allow parsing #pragma GCC option. */ + targetm.target_option.pragma_parse = ix86_pragma_option_parse; + +#ifdef REGISTER_SUBTARGET_PRAGMAS + REGISTER_SUBTARGET_PRAGMAS (); +#endif +} diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 634a4254f06..3276bd8e202 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ /* Functions in i386.c */ -extern void override_options (void); +extern void override_options (bool); extern void optimization_options (int, int); extern int ix86_can_use_return_insn_p (void); @@ -28,6 +28,7 @@ extern int ix86_frame_pointer_required (void); extern void ix86_setup_frame_addresses (void); extern void ix86_file_end (void); +extern int ix86_can_eliminate (int, int); extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int); extern void ix86_expand_prologue (void); extern void ix86_expand_epilogue (int); @@ -202,6 +203,7 @@ extern int ix86_constant_alignment (tree, int); extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *); extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *); extern int x86_field_alignment (tree, int); +extern tree ix86_valid_option_attribute_tree (tree); #endif extern rtx ix86_tls_get_addr (void); @@ -215,6 +217,10 @@ extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); extern bool ix86_sse5_valid_op_p (rtx [], rtx, int, bool, int, bool); extern void ix86_expand_sse5_multiple_memory (rtx [], int, enum machine_mode); +/* In i386-c.c */ +extern void ix86_target_macros (void); +extern void ix86_register_pragmas (void); + /* In winnt.c */ extern void i386_pe_unique_section (tree, int); extern void i386_pe_declare_function_type (FILE *, const char *, int); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b63deb834bf..8da85e16e5c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -48,7 +48,7 @@ along with GCC; see the file COPYING3. If not see #include "target-def.h" #include "langhooks.h" #include "cgraph.h" -#include "tree-gimple.h" +#include "gimple.h" #include "dwarf2.h" #include "df.h" #include "tm-constrs.h" @@ -75,8 +75,8 @@ static rtx legitimize_dllimport_symbol (rtx, bool); #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}} -static const -struct processor_costs size_cost = { /* costs for tuning for size */ +const +struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of an add instruction */ COSTS_N_BYTES (3), /* cost of a lea instruction */ COSTS_N_BYTES (2), /* variable shift costs */ @@ -1210,7 +1210,11 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_GENERIC (m_GENERIC32 | m_GENERIC64) /* Feature tests against the various tunings. */ -unsigned int ix86_tune_features[X86_TUNE_LAST] = { +unsigned char ix86_tune_features[X86_TUNE_LAST]; + +/* Feature tests against the various tunings used to create ix86_tune_features + based on the processor mask. */ +static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results negatively, so enabling for Generic64 seems like good code size tradeoff. We can't enable it for 32bit generic because it does not @@ -1443,7 +1447,11 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = { }; /* Feature tests against the various architecture variations. */ -unsigned int ix86_arch_features[X86_ARCH_LAST] = { +unsigned char ix86_arch_features[X86_ARCH_LAST]; + +/* Feature tests against the various architecture variations, used to create + ix86_arch_features based on the processor mask. */ +static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */ ~(m_386 | m_486 | m_PENT | m_K6), @@ -1700,7 +1708,8 @@ static int ix86_regparm; /* -mstackrealign option */ extern int ix86_force_align_arg_pointer; -static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; +static const char ix86_force_align_arg_pointer_string[] + = "force_align_arg_pointer"; static rtx (*ix86_gen_leave) (void); static rtx (*ix86_gen_pop1) (rtx); @@ -1709,10 +1718,21 @@ static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx); static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); +static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); /* Preferred alignment for stack boundary in bits. */ unsigned int ix86_preferred_stack_boundary; +/* Alignment for incoming stack boundary in bits specified at + command line. */ +static unsigned int ix86_user_incoming_stack_boundary; + +/* Default alignment for incoming stack boundary in bits. */ +static unsigned int ix86_default_incoming_stack_boundary; + +/* Alignment for incoming stack boundary in bits. */ +unsigned int ix86_incoming_stack_boundary; + /* Values 1-5: see jump.c */ int ix86_branch_cost; @@ -1773,6 +1793,26 @@ static void ix86_compute_frame_layout (struct ix86_frame *); static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, rtx, rtx, int); +enum ix86_function_specific_strings +{ + IX86_FUNCTION_SPECIFIC_ARCH, + IX86_FUNCTION_SPECIFIC_TUNE, + IX86_FUNCTION_SPECIFIC_FPMATH, + IX86_FUNCTION_SPECIFIC_MAX +}; + +static char *ix86_target_string (int, int, const char *, const char *, + const char *, bool); +static void ix86_debug_options (void) ATTRIBUTE_UNUSED; +static void ix86_function_specific_save (struct cl_target_option *); +static void ix86_function_specific_restore (struct cl_target_option *); +static void ix86_function_specific_print (FILE *, int, + struct cl_target_option *); +static bool ix86_valid_option_attribute_p (tree, tree, tree, int); +static bool ix86_valid_option_attribute_inner_p (tree, char *[]); +static bool ix86_can_inline_p (tree, tree); +static void ix86_set_current_function (tree); + /* The svr4 ABI for the i386 says that records and unions are returned in memory. */ @@ -1780,6 +1820,10 @@ static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, #define DEFAULT_PCC_STRUCT_RETURN 1 #endif +/* Whether -mtune= or -march= were specified */ +static int ix86_tune_defaulted; +static int ix86_arch_specified; + /* Bit flags that specify the ISA we are compiling for. */ int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; @@ -1815,6 +1859,18 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_SSE5_SET \ (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET) +/* AES and PCLMUL need SSE2 because they use xmm registers */ +#define OPTION_MASK_ISA_AES_SET \ + (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET) +#define OPTION_MASK_ISA_PCLMUL_SET \ + (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET) + +#define OPTION_MASK_ISA_ABM_SET \ + (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) +#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT +#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 +#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF + /* Define a set of ISAs which aren't available when a given ISA is disabled. MMX and SSE ISAs are handled separately. */ @@ -1844,14 +1900,73 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_SSE4A_UNSET \ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET) - #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5 +#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES +#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL +#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM +#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT +#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 +#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF /* Vectorization library interface and handlers. */ tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); +/* Processor target table, indexed by processor number */ +struct ptt +{ + const struct processor_costs *cost; /* Processor costs */ + const int align_loop; /* Default alignments. */ + const int align_loop_max_skip; + const int align_jump; + const int align_jump_max_skip; + const int align_func; +}; + +static const struct ptt processor_target_table[PROCESSOR_max] = +{ + {&i386_cost, 4, 3, 4, 3, 4}, + {&i486_cost, 16, 15, 16, 15, 16}, + {&pentium_cost, 16, 7, 16, 7, 16}, + {&pentiumpro_cost, 16, 15, 16, 10, 16}, + {&geode_cost, 0, 0, 0, 0, 0}, + {&k6_cost, 32, 7, 32, 7, 32}, + {&athlon_cost, 16, 7, 16, 7, 16}, + {&pentium4_cost, 0, 0, 0, 0, 0}, + {&k8_cost, 16, 7, 16, 7, 16}, + {&nocona_cost, 0, 0, 0, 0, 0}, + {&core2_cost, 16, 10, 16, 10, 16}, + {&generic32_cost, 16, 7, 16, 7, 16}, + {&generic64_cost, 16, 10, 16, 10, 16}, + {&amdfam10_cost, 32, 24, 32, 7, 32} +}; + +static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = +{ + "generic", + "i386", + "i486", + "pentium", + "pentium-mmx", + "pentiumpro", + "pentium2", + "pentium3", + "pentium4", + "pentium-m", + "prescott", + "nocona", + "core2", + "geode", + "k6", + "k6-2", + "k6-3", + "athlon", + "athlon-4", + "k8", + "amdfam10" +}; + /* Implement TARGET_HANDLE_OPTION. */ static bool @@ -2002,11 +2117,295 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) } return true; + case OPT_mabm: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET; + } + return true; + + case OPT_mpopcnt: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET; + } + return true; + + case OPT_msahf: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET; + } + return true; + + case OPT_mcx16: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; + } + return true; + + case OPT_maes: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_AES_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET; + } + return true; + + case OPT_mpclmul: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET; + } + return true; + default: return true; } } + +/* Return a string the documents the current -m options. The caller is + responsible for freeing the string. */ + +static char * +ix86_target_string (int isa, int flags, const char *arch, const char *tune, + const char *fpmath, bool add_nl_p) +{ + struct ix86_target_opts + { + const char *option; /* option string */ + int mask; /* isa mask options */ + }; + + /* This table is ordered so that options like -msse5 or -msse4.2 that imply + preceding options while match those first. */ + static struct ix86_target_opts isa_opts[] = + { + { "-m64", OPTION_MASK_ISA_64BIT }, + { "-msse5", OPTION_MASK_ISA_SSE5 }, + { "-msse4a", OPTION_MASK_ISA_SSE4A }, + { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, + { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, + { "-mssse3", OPTION_MASK_ISA_SSSE3 }, + { "-msse3", OPTION_MASK_ISA_SSE3 }, + { "-msse2", OPTION_MASK_ISA_SSE2 }, + { "-msse", OPTION_MASK_ISA_SSE }, + { "-m3dnow", OPTION_MASK_ISA_3DNOW }, + { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, + { "-mmmx", OPTION_MASK_ISA_MMX }, + { "-mabm", OPTION_MASK_ISA_ABM }, + { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, + { "-maes", OPTION_MASK_ISA_AES }, + { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, + }; + + /* Flag options. */ + static struct ix86_target_opts flag_opts[] = + { + { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, + { "-m80387", MASK_80387 }, + { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, + { "-malign-double", MASK_ALIGN_DOUBLE }, + { "-mcld", MASK_CLD }, + { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, + { "-mieee-fp", MASK_IEEE_FP }, + { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, + { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, + { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, + { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, + { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, + { "-mno-fused-madd", MASK_NO_FUSED_MADD }, + { "-mno-push-args", MASK_NO_PUSH_ARGS }, + { "-mno-red-zone", MASK_NO_RED_ZONE }, + { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, + { "-mrecip", MASK_RECIP }, + { "-mrtd", MASK_RTD }, + { "-msseregparm", MASK_SSEREGPARM }, + { "-mstack-arg-probe", MASK_STACK_PROBE }, + { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, + }; + + const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0]) + + sizeof (flag_opts) / sizeof (flag_opts[0]) + + 6)][2]; + + char isa_other[40]; + char target_other[40]; + unsigned num = 0; + unsigned i, j; + char *ret; + char *ptr; + size_t len; + size_t line_len; + size_t sep_len; + + memset (opts, '\0', sizeof (opts)); + + /* Add -march= option. */ + if (arch) + { + opts[num][0] = "-march="; + opts[num++][1] = arch; + } + + /* Add -mtune= option. */ + if (tune) + { + opts[num][0] = "-mtune="; + opts[num++][1] = tune; + } + + /* Pick out the options in isa options. */ + for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++) + { + if ((isa & isa_opts[i].mask) != 0) + { + opts[num++][0] = isa_opts[i].option; + isa &= ~ isa_opts[i].mask; + } + } + + if (isa && add_nl_p) + { + opts[num++][0] = isa_other; + sprintf (isa_other, "(other isa: 0x%x)", isa); + } + + /* Add flag options. */ + for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++) + { + if ((flags & flag_opts[i].mask) != 0) + { + opts[num++][0] = flag_opts[i].option; + flags &= ~ flag_opts[i].mask; + } + } + + if (flags && add_nl_p) + { + opts[num++][0] = target_other; + sprintf (target_other, "(other flags: 0x%x)", isa); + } + + /* Add -fpmath= option. */ + if (fpmath) + { + opts[num][0] = "-mfpmath="; + opts[num++][1] = fpmath; + } + + /* Any options? */ + if (num == 0) + return NULL; + + gcc_assert (num < sizeof (opts) / sizeof (opts[0])); + + /* Size the string. */ + len = 0; + sep_len = (add_nl_p) ? 3 : 1; + for (i = 0; i < num; i++) + { + len += sep_len; + for (j = 0; j < 2; j++) + if (opts[i][j]) + len += strlen (opts[i][j]); + } + + /* Build the string. */ + ret = ptr = (char *) xmalloc (len); + line_len = 0; + + for (i = 0; i < num; i++) + { + size_t len2[2]; + + for (j = 0; j < 2; j++) + len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; + + if (i != 0) + { + *ptr++ = ' '; + line_len++; + + if (add_nl_p && line_len + len2[0] + len2[1] > 70) + { + *ptr++ = '\\'; + *ptr++ = '\n'; + line_len = 0; + } + } + + for (j = 0; j < 2; j++) + if (opts[i][j]) + { + memcpy (ptr, opts[i][j], len2[j]); + ptr += len2[j]; + line_len += len2[j]; + } + } + + *ptr = '\0'; + gcc_assert (ret + len >= ptr); + + return ret; +} + +/* Function that is callable from the debugger to print the current + options. */ +void +ix86_debug_options (void) +{ + char *opts = ix86_target_string (ix86_isa_flags, target_flags, + ix86_arch_string, ix86_tune_string, + ix86_fpmath_string, true); + if (opts) + { + fprintf (stderr, "%s\n\n", opts); + free (opts); + } + else + fprintf (stderr, "<no options>\n\n"); + + return; +} + /* Sometimes certain combinations of command options do not make sense on a particular target machine. You can define a macro `OVERRIDE_OPTIONS' to take account of this. This macro, if @@ -2017,68 +2416,17 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ void -override_options (void) +override_options (bool main_args_p) { int i; - int ix86_tune_defaulted = 0; - int ix86_arch_specified = 0; unsigned int ix86_arch_mask, ix86_tune_mask; + const char *prefix; + const char *suffix; + const char *sw; /* Comes from final.c -- no real reason to change it. */ #define MAX_CODE_ALIGN 16 - static struct ptt - { - const struct processor_costs *cost; /* Processor costs */ - const int align_loop; /* Default alignments. */ - const int align_loop_max_skip; - const int align_jump; - const int align_jump_max_skip; - const int align_func; - } - const processor_target_table[PROCESSOR_max] = - { - {&i386_cost, 4, 3, 4, 3, 4}, - {&i486_cost, 16, 15, 16, 15, 16}, - {&pentium_cost, 16, 7, 16, 7, 16}, - {&pentiumpro_cost, 16, 15, 16, 10, 16}, - {&geode_cost, 0, 0, 0, 0, 0}, - {&k6_cost, 32, 7, 32, 7, 32}, - {&athlon_cost, 16, 7, 16, 7, 16}, - {&pentium4_cost, 0, 0, 0, 0, 0}, - {&k8_cost, 16, 7, 16, 7, 16}, - {&nocona_cost, 0, 0, 0, 0, 0}, - {&core2_cost, 16, 10, 16, 10, 16}, - {&generic32_cost, 16, 7, 16, 7, 16}, - {&generic64_cost, 16, 10, 16, 10, 16}, - {&amdfam10_cost, 32, 24, 32, 7, 32} - }; - - static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = - { - "generic", - "i386", - "i486", - "pentium", - "pentium-mmx", - "pentiumpro", - "pentium2", - "pentium3", - "pentium4", - "pentium-m", - "prescott", - "nocona", - "core2", - "geode", - "k6", - "k6-2", - "k6-3", - "athlon", - "athlon-4", - "k8", - "amdfam10" - }; - enum pta_flags { PTA_SSE = 1 << 0, @@ -2197,6 +2545,21 @@ override_options (void) int const pta_size = ARRAY_SIZE (processor_alias_table); + /* Set up prefix/suffix so the error messages refer to either the command + line argument, or the attribute(option). */ + if (main_args_p) + { + prefix = "-m"; + suffix = ""; + sw = "switch"; + } + else + { + prefix = "option(\""; + suffix = "\")"; + sw = "attribute"; + } + #ifdef SUBTARGET_OVERRIDE_OPTIONS SUBTARGET_OVERRIDE_OPTIONS; #endif @@ -2246,8 +2609,15 @@ override_options (void) else ix86_tune_string = "generic32"; } + /* If this call is for setting the option attribute, allow the + generic32/generic64 that was previously set. */ + else if (!main_args_p + && (!strcmp (ix86_tune_string, "generic32") + || !strcmp (ix86_tune_string, "generic64"))) + ; else if (!strncmp (ix86_tune_string, "generic", 7)) - error ("bad value (%s) for -mtune= switch", ix86_tune_string); + error ("bad value (%s) for %stune=%s %s", + ix86_tune_string, prefix, suffix, sw); } else { @@ -2288,11 +2658,13 @@ override_options (void) else if (!strcmp (ix86_stringop_string, "unrolled_loop")) stringop_alg = unrolled_loop; else - error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string); + error ("bad value (%s) for %sstringop-strategy=%s %s", + ix86_stringop_string, prefix, suffix, sw); } if (!strcmp (ix86_tune_string, "x86-64")) - warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " - "-mtune=generic instead as appropriate."); + warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " + "%stune=k8%s or %stune=generic%s instead as appropriate.", + prefix, suffix, prefix, suffix, prefix, suffix); if (!ix86_arch_string) ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; @@ -2300,9 +2672,11 @@ override_options (void) ix86_arch_specified = 1; if (!strcmp (ix86_arch_string, "generic")) - error ("generic CPU can be used only for -mtune= switch"); + error ("generic CPU can be used only for %stune=%s %s", + prefix, suffix, sw); if (!strncmp (ix86_arch_string, "generic", 7)) - error ("bad value (%s) for -march= switch", ix86_arch_string); + error ("bad value (%s) for %sarch=%s %s", + ix86_arch_string, prefix, suffix, sw); if (ix86_cmodel_string != 0) { @@ -2319,7 +2693,8 @@ override_options (void) else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) ix86_cmodel = CM_KERNEL; else - error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); + error ("bad value (%s) for %scmodel=%s %s", + ix86_cmodel_string, prefix, suffix, sw); } else { @@ -2342,7 +2717,8 @@ override_options (void) else if (!strcmp (ix86_asm_string, "att")) ix86_asm_dialect = ASM_ATT; else - error ("bad value (%s) for -masm= switch", ix86_asm_string); + error ("bad value (%s) for %sasm=%s %s", + ix86_asm_string, prefix, suffix, sw); } if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) error ("code model %qs not supported in the %s bit mode", @@ -2395,31 +2771,37 @@ override_options (void) if (processor_alias_table[i].flags & PTA_SSE5 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5)) ix86_isa_flags |= OPTION_MASK_ISA_SSE5; - - if (processor_alias_table[i].flags & PTA_ABM) - x86_abm = true; - if (processor_alias_table[i].flags & PTA_CX16) - x86_cmpxchg16b = true; - if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) - x86_popcnt = true; + if (processor_alias_table[i].flags & PTA_ABM + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) + ix86_isa_flags |= OPTION_MASK_ISA_ABM; + if (processor_alias_table[i].flags & PTA_CX16 + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) + ix86_isa_flags |= OPTION_MASK_ISA_CX16; + if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) + ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; + if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)) + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) + ix86_isa_flags |= OPTION_MASK_ISA_SAHF; + if (processor_alias_table[i].flags & PTA_AES + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) + ix86_isa_flags |= OPTION_MASK_ISA_AES; + if (processor_alias_table[i].flags & PTA_PCLMUL + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) + ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) x86_prefetch_sse = true; - if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))) - x86_sahf = true; - if (processor_alias_table[i].flags & PTA_AES) - x86_aes = true; - if (processor_alias_table[i].flags & PTA_PCLMUL) - x86_pclmul = true; break; } if (i == pta_size) - error ("bad value (%s) for -march= switch", ix86_arch_string); + error ("bad value (%s) for %sarch=%s %s", + ix86_arch_string, prefix, suffix, sw); ix86_arch_mask = 1u << ix86_arch; for (i = 0; i < X86_ARCH_LAST; ++i) - ix86_arch_features[i] &= ix86_arch_mask; + ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); for (i = 0; i < pta_size; i++) if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) @@ -2451,22 +2833,15 @@ override_options (void) break; } if (i == pta_size) - error ("bad value (%s) for -mtune= switch", ix86_tune_string); - - /* Enable SSE2 if AES or PCLMUL is enabled. */ - if ((x86_aes || x86_pclmul) - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET; - } + error ("bad value (%s) for %stune=%s %s", + ix86_tune_string, prefix, suffix, sw); ix86_tune_mask = 1u << ix86_tune; for (i = 0; i < X86_TUNE_LAST; ++i) - ix86_tune_features[i] &= ix86_tune_mask; + ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); if (optimize_size) - ix86_cost = &size_cost; + ix86_cost = &ix86_size_cost; else ix86_cost = processor_target_table[ix86_tune].cost; @@ -2477,10 +2852,11 @@ override_options (void) if (ix86_regparm_string) { if (TARGET_64BIT) - warning (0, "-mregparm is ignored in 64-bit mode"); + warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix); i = atoi (ix86_regparm_string); if (i < 0 || i > REGPARM_MAX) - error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); + error ("%sregparm=%d%s is not between 0 and %d", + prefix, i, suffix, REGPARM_MAX); else ix86_regparm = i; } @@ -2492,12 +2868,14 @@ override_options (void) Remove this code in GCC 3.2 or later. */ if (ix86_align_loops_string) { - warning (0, "-malign-loops is obsolete, use -falign-loops"); + warning (0, "%salign-loops%s is obsolete, use %salign-loops%s", + prefix, suffix, prefix, suffix); if (align_loops == 0) { i = atoi (ix86_align_loops_string); if (i < 0 || i > MAX_CODE_ALIGN) - error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + error ("%salign-loops=%d%s is not between 0 and %d", + prefix, i, suffix, MAX_CODE_ALIGN); else align_loops = 1 << i; } @@ -2505,12 +2883,14 @@ override_options (void) if (ix86_align_jumps_string) { - warning (0, "-malign-jumps is obsolete, use -falign-jumps"); + warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s", + prefix, suffix, prefix, suffix); if (align_jumps == 0) { i = atoi (ix86_align_jumps_string); if (i < 0 || i > MAX_CODE_ALIGN) - error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + error ("%salign-loops=%d%s is not between 0 and %d", + prefix, i, suffix, MAX_CODE_ALIGN); else align_jumps = 1 << i; } @@ -2518,12 +2898,14 @@ override_options (void) if (ix86_align_funcs_string) { - warning (0, "-malign-functions is obsolete, use -falign-functions"); + warning (0, "%salign-functions%s is obsolete, use %salign-functions%s", + prefix, suffix, prefix, suffix); if (align_functions == 0) { i = atoi (ix86_align_funcs_string); if (i < 0 || i > MAX_CODE_ALIGN) - error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); + error ("%salign-loops=%d%s is not between 0 and %d", + prefix, i, suffix, MAX_CODE_ALIGN); else align_functions = 1 << i; } @@ -2551,7 +2933,7 @@ override_options (void) { i = atoi (ix86_branch_cost_string); if (i < 0 || i > 5) - error ("-mbranch-cost=%d is not between 0 and 5", i); + error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix); else ix86_branch_cost = i; } @@ -2559,7 +2941,7 @@ override_options (void) { i = atoi (ix86_section_threshold_string); if (i < 0) - error ("-mlarge-data-threshold=%d is negative", i); + error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix); else ix86_section_threshold = i; } @@ -2573,8 +2955,8 @@ override_options (void) else if (strcmp (ix86_tls_dialect_string, "sun") == 0) ix86_tls_dialect = TLS_DIALECT_SUN; else - error ("bad value (%s) for -mtls-dialect= switch", - ix86_tls_dialect_string); + error ("bad value (%s) for %stls-dialect=%s %s", + ix86_tls_dialect_string, prefix, suffix, sw); } if (ix87_precision_string) @@ -2597,7 +2979,7 @@ override_options (void) | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit); if (TARGET_RTD) - warning (0, "-mrtd is ignored in 64bit mode"); + warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix); } else { @@ -2643,27 +3025,50 @@ override_options (void) /* Turn on popcnt instruction for -msse4.2 or -mabm. */ if (TARGET_SSE4_2 || TARGET_ABM) - x86_popcnt = true; + ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit; - /* Validate -mpreferred-stack-boundary= value, or provide default. - The default of 128 bits is for Pentium III's SSE __m128. We can't - change it because of optimize_size. Otherwise, we can't mix object - files compiled with -Os and -On. */ - ix86_preferred_stack_boundary = 128; + /* Validate -mpreferred-stack-boundary= value or default it to + PREFERRED_STACK_BOUNDARY_DEFAULT. */ + ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; if (ix86_preferred_stack_boundary_string) { i = atoi (ix86_preferred_stack_boundary_string); if (i < (TARGET_64BIT ? 4 : 2) || i > 12) - error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, - TARGET_64BIT ? 4 : 2); + error ("%spreferred-stack-boundary=%d%s is not between %d and 12", + prefix, i, suffix, TARGET_64BIT ? 4 : 2); else ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; } + /* Set the default value for -mstackrealign. */ + if (ix86_force_align_arg_pointer == -1) + ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; + + /* Validate -mincoming-stack-boundary= value or default it to + MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ + if (ix86_force_align_arg_pointer) + ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY; + else + ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; + ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; + if (ix86_incoming_stack_boundary_string) + { + i = atoi (ix86_incoming_stack_boundary_string); + if (i < (TARGET_64BIT ? 4 : 2) || i > 12) + error ("-mincoming-stack-boundary=%d is not between %d and 12", + i, TARGET_64BIT ? 4 : 2); + else + { + ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT; + ix86_incoming_stack_boundary + = ix86_user_incoming_stack_boundary; + } + } + /* Accept -msseregparm only if at least SSE support is enabled. */ if (TARGET_SSEREGPARM && ! TARGET_SSE) - error ("-msseregparm used without SSE enabled"); + error ("%ssseregparm%s used without SSE enabled", prefix, suffix); ix86_fpmath = TARGET_FPMATH_DEFAULT; if (ix86_fpmath_string != 0) @@ -2681,7 +3086,10 @@ override_options (void) ix86_fpmath = FPMATH_SSE; } else if (! strcmp (ix86_fpmath_string, "387,sse") - || ! strcmp (ix86_fpmath_string, "sse,387")) + || ! strcmp (ix86_fpmath_string, "387+sse") + || ! strcmp (ix86_fpmath_string, "sse,387") + || ! strcmp (ix86_fpmath_string, "sse+387") + || ! strcmp (ix86_fpmath_string, "both")) { if (!TARGET_SSE) { @@ -2697,7 +3105,8 @@ override_options (void) ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387); } else - error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); + error ("bad value (%s) for %sfpmath=%s %s", + ix86_fpmath_string, prefix, suffix, sw); } /* If the i387 is disabled, then do not return values in it. */ @@ -2713,7 +3122,8 @@ override_options (void) ix86_veclib_handler = ix86_veclibabi_acml; else error ("unknown vectorization library ABI type (%s) for " - "-mveclibabi= switch", ix86_veclibabi_string); + "%sveclibabi=%s %s", ix86_veclibabi_string, + prefix, suffix, sw); } if ((x86_accumulate_outgoing_args & ix86_tune_mask) @@ -2732,7 +3142,8 @@ override_options (void) { if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) warning (0, "unwind tables currently require either a frame pointer " - "or -maccumulate-outgoing-args for correctness"); + "or %saccumulate-outgoing-args%s for correctness", + prefix, suffix); target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; } @@ -2743,8 +3154,8 @@ override_options (void) && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) { if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) - warning (0, "stack probing requires -maccumulate-outgoing-args " - "for correctness"); + warning (0, "stack probing requires %saccumulate-outgoing-args%s " + "for correctness", prefix, suffix); target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; } @@ -2791,6 +3202,7 @@ override_options (void) ix86_gen_sub3_carry = gen_subdi3_carry_rex64; ix86_gen_one_cmpl2 = gen_one_cmpldi2; ix86_gen_monitor = gen_sse3_monitor64; + ix86_gen_andsp = gen_anddi3; } else { @@ -2801,6 +3213,7 @@ override_options (void) ix86_gen_sub3_carry = gen_subsi3_carry; ix86_gen_one_cmpl2 = gen_one_cmplsi2; ix86_gen_monitor = gen_sse3_monitor; + ix86_gen_andsp = gen_andsi3; } #ifdef USE_IX86_CLD @@ -2808,7 +3221,499 @@ override_options (void) if (!TARGET_64BIT) target_flags |= MASK_CLD & ~target_flags_explicit; #endif + + /* Save the initial options in case the user does function specific options */ + if (main_args_p) + target_option_default_node = target_option_current_node + = build_target_option_node (); +} + +/* Save the current options */ + +static void +ix86_function_specific_save (struct cl_target_option *ptr) +{ + gcc_assert (IN_RANGE (ix86_arch, 0, 255)); + gcc_assert (IN_RANGE (ix86_tune, 0, 255)); + gcc_assert (IN_RANGE (ix86_fpmath, 0, 255)); + gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255)); + + ptr->arch = ix86_arch; + ptr->tune = ix86_tune; + ptr->fpmath = ix86_fpmath; + ptr->branch_cost = ix86_branch_cost; + ptr->tune_defaulted = ix86_tune_defaulted; + ptr->arch_specified = ix86_arch_specified; + ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit; + ptr->target_flags_explicit = target_flags_explicit; +} + +/* Restore the current options */ + +static void +ix86_function_specific_restore (struct cl_target_option *ptr) +{ + enum processor_type old_tune = ix86_tune; + enum processor_type old_arch = ix86_arch; + unsigned int ix86_arch_mask, ix86_tune_mask; + int i; + + ix86_arch = ptr->arch; + ix86_tune = ptr->tune; + ix86_fpmath = ptr->fpmath; + ix86_branch_cost = ptr->branch_cost; + ix86_tune_defaulted = ptr->tune_defaulted; + ix86_arch_specified = ptr->arch_specified; + ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit; + target_flags_explicit = ptr->target_flags_explicit; + + /* Recreate the arch feature tests if the arch changed */ + if (old_arch != ix86_arch) + { + ix86_arch_mask = 1u << ix86_arch; + for (i = 0; i < X86_ARCH_LAST; ++i) + ix86_arch_features[i] + = !!(initial_ix86_arch_features[i] & ix86_arch_mask); + } + + /* Recreate the tune optimization tests */ + if (old_tune != ix86_tune) + { + ix86_tune_mask = 1u << ix86_tune; + for (i = 0; i < X86_TUNE_LAST; ++i) + ix86_tune_features[i] + = !!(initial_ix86_tune_features[i] & ix86_tune_mask); + } +} + +/* Print the current options */ + +static void +ix86_function_specific_print (FILE *file, int indent, + struct cl_target_option *ptr) +{ + char *target_string + = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags, + NULL, NULL, NULL, false); + + fprintf (file, "%*sarch = %d (%s)\n", + indent, "", + ptr->arch, + ((ptr->arch < TARGET_CPU_DEFAULT_max) + ? cpu_names[ptr->arch] + : "<unknown>")); + + fprintf (file, "%*stune = %d (%s)\n", + indent, "", + ptr->tune, + ((ptr->tune < TARGET_CPU_DEFAULT_max) + ? cpu_names[ptr->tune] + : "<unknown>")); + + fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath, + (ptr->fpmath & FPMATH_387) ? ", 387" : "", + (ptr->fpmath & FPMATH_SSE) ? ", sse" : ""); + fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); + + if (target_string) + { + fprintf (file, "%*s%s\n", indent, "", target_string); + free (target_string); + } +} + + +/* Inner function to process the attribute((option(...))), take an argument and + set the current options from the argument. If we have a list, recursively go + over the list. */ + +static bool +ix86_valid_option_attribute_inner_p (tree args, char *p_strings[]) +{ + char *next_optstr; + bool ret = true; + +#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } +#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } +#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } +#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } + + enum ix86_opt_type + { + ix86_opt_unknown, + ix86_opt_yes, + ix86_opt_no, + ix86_opt_str, + ix86_opt_isa + }; + + static const struct + { + const char *string; + size_t len; + enum ix86_opt_type type; + int opt; + int mask; + } attrs[] = { + /* isa options */ + IX86_ATTR_ISA ("3dnow", OPT_m3dnow), + IX86_ATTR_ISA ("abm", OPT_mabm), + IX86_ATTR_ISA ("aes", OPT_maes), + IX86_ATTR_ISA ("mmx", OPT_mmmx), + IX86_ATTR_ISA ("pclmul", OPT_mpclmul), + IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), + IX86_ATTR_ISA ("sse", OPT_msse), + IX86_ATTR_ISA ("sse2", OPT_msse2), + IX86_ATTR_ISA ("sse3", OPT_msse3), + IX86_ATTR_ISA ("sse4", OPT_msse4), + IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), + IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), + IX86_ATTR_ISA ("sse4a", OPT_msse4a), + IX86_ATTR_ISA ("sse5", OPT_msse5), + IX86_ATTR_ISA ("ssse3", OPT_mssse3), + + /* string options */ + IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), + IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH), + IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), + + /* flag options */ + IX86_ATTR_YES ("cld", + OPT_mcld, + MASK_CLD), + + IX86_ATTR_NO ("fancy-math-387", + OPT_mfancy_math_387, + MASK_NO_FANCY_MATH_387), + + IX86_ATTR_NO ("fused-madd", + OPT_mfused_madd, + MASK_NO_FUSED_MADD), + + IX86_ATTR_YES ("ieee-fp", + OPT_mieee_fp, + MASK_IEEE_FP), + + IX86_ATTR_YES ("inline-all-stringops", + OPT_minline_all_stringops, + MASK_INLINE_ALL_STRINGOPS), + + IX86_ATTR_YES ("inline-stringops-dynamically", + OPT_minline_stringops_dynamically, + MASK_INLINE_STRINGOPS_DYNAMICALLY), + + IX86_ATTR_NO ("align-stringops", + OPT_mno_align_stringops, + MASK_NO_ALIGN_STRINGOPS), + + IX86_ATTR_YES ("recip", + OPT_mrecip, + MASK_RECIP), + + }; + + /* If this is a list, recurse to get the options. */ + if (TREE_CODE (args) == TREE_LIST) + { + bool ret = true; + + for (; args; args = TREE_CHAIN (args)) + if (TREE_VALUE (args) + && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings)) + ret = false; + + return ret; + } + + else if (TREE_CODE (args) != STRING_CST) + gcc_unreachable (); + + /* Handle multiple arguments separated by commas. */ + next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); + + while (next_optstr && *next_optstr != '\0') + { + char *p = next_optstr; + char *orig_p = p; + char *comma = strchr (next_optstr, ','); + const char *opt_string; + size_t len, opt_len; + int opt; + bool opt_set_p; + char ch; + unsigned i; + enum ix86_opt_type type = ix86_opt_unknown; + int mask = 0; + + if (comma) + { + *comma = '\0'; + len = comma - next_optstr; + next_optstr = comma + 1; + } + else + { + len = strlen (p); + next_optstr = NULL; + } + + /* Recognize no-xxx. */ + if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') + { + opt_set_p = false; + p += 3; + len -= 3; + } + else + opt_set_p = true; + + /* Find the option. */ + ch = *p; + opt = N_OPTS; + for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++) + { + type = attrs[i].type; + opt_len = attrs[i].len; + if (ch == attrs[i].string[0] + && ((type != ix86_opt_str) ? len == opt_len : len > opt_len) + && memcmp (p, attrs[i].string, opt_len) == 0) + { + opt = attrs[i].opt; + mask = attrs[i].mask; + opt_string = attrs[i].string; + break; + } + } + + /* Process the option. */ + if (opt == N_OPTS) + { + error ("attribute(option(\"%s\")) is unknown", orig_p); + ret = false; + } + + else if (type == ix86_opt_isa) + ix86_handle_option (opt, p, opt_set_p); + + else if (type == ix86_opt_yes || type == ix86_opt_no) + { + if (type == ix86_opt_no) + opt_set_p = !opt_set_p; + + if (opt_set_p) + target_flags |= mask; + else + target_flags &= ~mask; + } + + else if (type == ix86_opt_str) + { + if (p_strings[opt]) + { + error ("option(\"%s\") was already specified", opt_string); + ret = false; + } + else + p_strings[opt] = xstrdup (p + opt_len); + } + + else + gcc_unreachable (); + } + + return ret; +} + +/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ + +tree +ix86_valid_option_attribute_tree (tree args) +{ + const char *orig_arch_string = ix86_arch_string; + const char *orig_tune_string = ix86_tune_string; + const char *orig_fpmath_string = ix86_fpmath_string; + int orig_tune_defaulted = ix86_tune_defaulted; + int orig_arch_specified = ix86_arch_specified; + char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL }; + tree t = NULL_TREE; + int i; + struct cl_target_option *def + = TREE_TARGET_OPTION (target_option_default_node); + + /* Process each of the options on the chain. */ + if (! ix86_valid_option_attribute_inner_p (args, option_strings)) + return NULL_TREE; + + /* If the changed options are different from the default, rerun override_options, + and then save the options away. The string options are are attribute options, + and will be undone when we copy the save structure. */ + if (ix86_isa_flags != def->ix86_isa_flags + || target_flags != def->target_flags + || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] + || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] + || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) + { + /* If we are using the default tune= or arch=, undo the string assigned, + and use the default. */ + if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) + ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH]; + else if (!orig_arch_specified) + ix86_arch_string = NULL; + + if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) + ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE]; + else if (orig_tune_defaulted) + ix86_tune_string = NULL; + + /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ + if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) + ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]; + else if (!TARGET_64BIT && TARGET_SSE) + ix86_fpmath_string = "sse,387"; + + /* Do any overrides, such as arch=xxx, or tune=xxx support. */ + override_options (false); + + /* Save the current options unless we are validating options for + #pragma. */ + t = build_target_option_node (); + + ix86_arch_string = orig_arch_string; + ix86_tune_string = orig_tune_string; + ix86_fpmath_string = orig_fpmath_string; + + /* Free up memory allocated to hold the strings */ + for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) + if (option_strings[i]) + free (option_strings[i]); + } + + return t; +} + +/* Hook to validate attribute((option("string"))). */ + +static bool +ix86_valid_option_attribute_p (tree fndecl, + tree ARG_UNUSED (name), + tree args, + int ARG_UNUSED (flags)) +{ + struct cl_target_option cur_opts; + bool ret = true; + tree new_opts; + + cl_target_option_save (&cur_opts); + new_opts = ix86_valid_option_attribute_tree (args); + if (!new_opts) + ret = false; + + else if (fndecl) + DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts; + + cl_target_option_restore (&cur_opts); + return ret; +} + + +/* Hook to determine if one function can safely inline another. */ + +static bool +ix86_can_inline_p (tree caller, tree callee) +{ + bool ret = false; + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + + /* If callee has no option attributes, then it is ok to inline. */ + if (!callee_tree) + ret = true; + + /* If caller has no option attributes, but callee does then it is not ok to + inline. */ + else if (!caller_tree) + ret = false; + + else + { + struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); + struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); + + /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function + can inline a SSE2 function but a SSE2 function can't inline a SSE5 + function. */ + if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags) + != callee_opts->ix86_isa_flags) + ret = false; + + /* See if we have the same non-isa options. */ + else if (caller_opts->target_flags != callee_opts->target_flags) + ret = false; + + /* See if arch, tune, etc. are the same. */ + else if (caller_opts->arch != callee_opts->arch) + ret = false; + + else if (caller_opts->tune != callee_opts->tune) + ret = false; + + else if (caller_opts->fpmath != callee_opts->fpmath) + ret = false; + + else if (caller_opts->branch_cost != callee_opts->branch_cost) + ret = false; + + else + ret = true; + } + + return ret; } + + +/* Remember the last target of ix86_set_current_function. */ +static GTY(()) tree ix86_previous_fndecl; + +/* Establish appropriate back-end context for processing the function + FNDECL. The argument might be NULL to indicate processing at top + level, outside of any function scope. */ +static void +ix86_set_current_function (tree fndecl) +{ + /* Only change the context if the function changes. This hook is called + several times in the course of compiling a function, and we don't want to + slow things down too much or call target_reinit when it isn't safe. */ + if (fndecl && fndecl != ix86_previous_fndecl) + { + tree old_tree = (ix86_previous_fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) + : NULL_TREE); + + tree new_tree = (fndecl + ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) + : NULL_TREE); + + ix86_previous_fndecl = fndecl; + if (old_tree == new_tree) + ; + + else if (new_tree) + { + cl_target_option_restore (TREE_TARGET_OPTION (new_tree)); + target_reinit (); + } + + else if (old_tree) + { + struct cl_target_option *def + = TREE_TARGET_OPTION (target_option_current_node); + + cl_target_option_restore (def); + target_reinit (); + } + } +} + /* Return true if this goes in large data/bss. */ @@ -3134,11 +4039,6 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) return false; - /* If we forced aligned the stack, then sibcalling would unalign the - stack, which may break the called function. */ - if (cfun->machine->force_align_arg_pointer) - return false; - /* Otherwise okay. That also includes certain types of indirect calls. */ return true; } @@ -3189,15 +4089,6 @@ ix86_handle_cconv_attribute (tree *node, tree name, *no_add_attrs = true; } - if (!TARGET_64BIT - && lookup_attribute (ix86_force_align_arg_pointer_string, - TYPE_ATTRIBUTES (*node)) - && compare_tree_int (cst, REGPARM_MAX-1)) - { - error ("%s functions limited to %d register parameters", - ix86_force_align_arg_pointer_string, REGPARM_MAX-1); - } - return NULL_TREE; } @@ -3341,7 +4232,7 @@ ix86_function_regparm (const_tree type, const_tree decl) /* Use register calling convention for local functions when possible. */ if (decl && TREE_CODE (decl) == FUNCTION_DECL - && flag_unit_at_a_time && !profile_flag) + && !profile_flag) { /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); @@ -3359,8 +4250,7 @@ ix86_function_regparm (const_tree type, const_tree decl) /* We can't use regparm(3) for nested functions as these use static chain pointer in third argument. */ if (local_regparm == 3 - && (decl_function_context (decl) - || ix86_force_align_arg_pointer) + && decl_function_context (decl) && !DECL_NO_STATIC_CHAIN (decl)) local_regparm = 2; @@ -3369,13 +4259,11 @@ ix86_function_regparm (const_tree type, const_tree decl) the callee DECL_STRUCT_FUNCTION is gone, so we fall back to scanning the attributes for the self-realigning property. */ f = DECL_STRUCT_FUNCTION (decl); - if (local_regparm == 3 - && (f ? !!f->machine->force_align_arg_pointer - : !!lookup_attribute (ix86_force_align_arg_pointer_string, - TYPE_ATTRIBUTES (TREE_TYPE (decl))))) - local_regparm = 2; + /* Since current internal arg pointer won't conflict with + parameter passing regs, so no need to change stack + realignment and adjust regparm number. - /* Each fixed register usage increases register pressure, + Each fixed register usage increases register pressure, so less registers should be used for argument passing. This functionality can be overriden by an explicit regparm value. */ @@ -3428,7 +4316,7 @@ ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) /* For local functions, pass up to SSE_REGPARM_MAX SFmode (and DFmode for SSE2) arguments in SSE registers. */ - if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) + if (decl && TARGET_SSE_MATH && !profile_flag) { /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); @@ -5286,14 +6174,6 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) /* Indicate to allocate space on the stack for varargs save area. */ ix86_save_varrargs_registers = 1; - /* We need 16-byte stack alignment to save SSE registers. If user - asked for lower preferred_stack_boundary, lets just hope that he knows - what he is doing and won't varargs SSE values. - - We also may end up assuming that only 64bit values are stored in SSE - register let some floating point program work. */ - if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT) - crtl->stack_alignment_needed = BIGGEST_ALIGNMENT; save_area = frame_pointer_rtx; set = get_varargs_alias_set (); @@ -5459,8 +6339,8 @@ ix86_va_start (tree valist, rtx nextarg) if (cfun->va_list_gpr_size) { type = TREE_TYPE (gpr); - t = build2 (GIMPLE_MODIFY_STMT, type, gpr, - build_int_cst (type, n_gpr * 8)); + t = build2 (MODIFY_EXPR, type, + gpr, build_int_cst (type, n_gpr * 8)); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } @@ -5468,7 +6348,7 @@ ix86_va_start (tree valist, rtx nextarg) if (cfun->va_list_fpr_size) { type = TREE_TYPE (fpr); - t = build2 (GIMPLE_MODIFY_STMT, type, fpr, + t = build2 (MODIFY_EXPR, type, fpr, build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -5476,11 +6356,11 @@ ix86_va_start (tree valist, rtx nextarg) /* Find the overflow area. */ type = TREE_TYPE (ovf); - t = make_tree (type, virtual_incoming_args_rtx); + t = make_tree (type, crtl->args.internal_arg_pointer); if (words != 0) t = build2 (POINTER_PLUS_EXPR, type, t, size_int (words * UNITS_PER_WORD)); - t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t); + t = build2 (MODIFY_EXPR, type, ovf, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -5490,7 +6370,7 @@ ix86_va_start (tree valist, rtx nextarg) Prologue of the function save it right above stack frame. */ type = TREE_TYPE (sav); t = make_tree (type, frame_pointer_rtx); - t = build2 (GIMPLE_MODIFY_STMT, type, sav, t); + t = build2 (MODIFY_EXPR, type, sav, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } @@ -5499,7 +6379,8 @@ ix86_va_start (tree valist, rtx nextarg) /* Implement va_arg. */ static tree -ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) +ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) { static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; tree f_gpr, f_fpr, f_ovf, f_sav; @@ -5511,6 +6392,7 @@ ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) int indirect_p = 0; tree ptrtype; enum machine_mode nat_mode; + int arg_boundary; /* Only 64bit target needs something special. */ if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) @@ -5628,16 +6510,14 @@ ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) /* int_addr = gpr + sav; */ t = fold_convert (sizetype, gpr); t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); - t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t); - gimplify_and_add (t, pre_p); + gimplify_assign (int_addr, t, pre_p); } if (needed_sseregs) { /* sse_addr = fpr + sav; */ t = fold_convert (sizetype, fpr); t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); - t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t); - gimplify_and_add (t, pre_p); + gimplify_assign (sse_addr, t, pre_p); } if (need_temp) { @@ -5646,8 +6526,7 @@ ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) /* addr = &temp; */ t = build1 (ADDR_EXPR, build_pointer_type (type), temp); - t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t); - gimplify_and_add (t, pre_p); + gimplify_assign (addr, t, pre_p); for (i = 0; i < XVECLEN (container, 0); i++) { @@ -5680,8 +6559,7 @@ ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) size_int (INTVAL (XEXP (slot, 1)))); dest = build_va_arg_indirect_ref (dest_addr); - t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src); - gimplify_and_add (t, pre_p); + gimplify_assign (dest, src, pre_p); } } @@ -5689,33 +6567,38 @@ ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) { t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); - t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t); - gimplify_and_add (t, pre_p); + gimplify_assign (gpr, t, pre_p); } + if (needed_sseregs) { t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); - t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t); - gimplify_and_add (t, pre_p); + gimplify_assign (fpr, t, pre_p); } - t = build1 (GOTO_EXPR, void_type_node, lab_over); - gimplify_and_add (t, pre_p); + gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); - t = build1 (LABEL_EXPR, void_type_node, lab_false); - append_to_statement_list (t, pre_p); + gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); } /* ... otherwise out of the overflow area. */ + /* When we align parameter on stack for caller, if the parameter + alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be + aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee + here with caller. */ + arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type); + if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) + arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; + /* Care for on-stack alignment if needed. */ - if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 + if (arg_boundary <= 64 || integer_zerop (TYPE_SIZE (type))) t = ovf; else { - HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; + HOST_WIDE_INT align = arg_boundary / 8; t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf, size_int (align - 1)); t = fold_convert (sizetype, t); @@ -5724,20 +6607,14 @@ ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) t = fold_convert (TREE_TYPE (ovf), t); } gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); - - t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t); - gimplify_and_add (t2, pre_p); + gimplify_assign (addr, t, pre_p); t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, size_int (rsize * UNITS_PER_WORD)); - t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t); - gimplify_and_add (t, pre_p); + gimplify_assign (unshare_expr (ovf), t, pre_p); if (container) - { - t = build1 (LABEL_EXPR, void_type_node, lab_over); - append_to_statement_list (t, pre_p); - } + gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); ptrtype = build_pointer_type (type); addr = fold_convert (ptrtype, addr); @@ -5815,7 +6692,7 @@ standard_80387_constant_p (rtx x) /* For XFmode constants, try to find a special 80387 instruction when optimizing for size or on those CPUs that benefit from them. */ if (mode == XFmode - && (optimize_size || TARGET_EXT_80387_CONSTANTS)) + && (optimize_insn_for_size_p () || TARGET_EXT_80387_CONSTANTS)) { int i; @@ -6236,9 +7113,14 @@ ix86_select_alt_pic_regnum (void) if (current_function_is_leaf && !crtl->profile && !ix86_current_function_calls_tls_descriptor) { - int i; + int i, drap; + /* Can't use the same register for both PIC and DRAP. */ + if (crtl->drap_reg) + drap = REGNO (crtl->drap_reg); + else + drap = -1; for (i = 2; i >= 0; --i) - if (!df_regs_ever_live_p (i)) + if (i != drap && !df_regs_ever_live_p (i)) return i; } @@ -6274,8 +7156,8 @@ ix86_save_reg (unsigned int regno, int maybe_eh_return) } } - if (cfun->machine->force_align_arg_pointer - && regno == REGNO (cfun->machine->force_align_arg_pointer)) + if (crtl->drap_reg + && regno == REGNO (crtl->drap_reg)) return 1; return (df_regs_ever_live_p (regno) @@ -6298,6 +7180,24 @@ ix86_nsaved_regs (void) return nregs; } +/* Given FROM and TO register numbers, say whether this elimination is + allowed. If stack alignment is needed, we can only replace argument + pointer with hard frame pointer, or replace frame pointer with stack + pointer. Otherwise, frame pointer elimination is automatically + handled and all other eliminations are valid. */ + +int +ix86_can_eliminate (int from, int to) +{ + if (stack_realign_fp) + return ((from == ARG_POINTER_REGNUM + && to == HARD_FRAME_POINTER_REGNUM) + || (from == FRAME_POINTER_REGNUM + && to == STACK_POINTER_REGNUM)); + else + return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1; +} + /* Return the offset between two registers, one to be eliminated, and the other its replacement, at the start of a routine. */ @@ -6341,6 +7241,10 @@ ix86_compute_frame_layout (struct ix86_frame *frame) stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; + gcc_assert (!size || stack_alignment_needed); + gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); + gcc_assert (preferred_alignment <= stack_alignment_needed); + /* During reload iteration the amount of registers saved can change. Recompute the value as needed. Do not recompute when amount of registers didn't change as reload does multiple calls to the function and does not @@ -6383,18 +7287,10 @@ ix86_compute_frame_layout (struct ix86_frame *frame) frame->hard_frame_pointer_offset = offset; - /* Do some sanity checking of stack_alignment_needed and - preferred_alignment, since i386 port is the only using those features - that may break easily. */ - - gcc_assert (!size || stack_alignment_needed); - gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); - gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); - gcc_assert (stack_alignment_needed - <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); - - if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) - stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; + /* Set offset to aligned because the realigned frame starts from + here. */ + if (stack_realign_fp) + offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed; /* Register save area */ offset += frame->nregs * UNITS_PER_WORD; @@ -6560,38 +7456,131 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) RTX_FRAME_RELATED_P (insn) = 1; } -/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ +/* Find an available register to be used as dynamic realign argument + pointer regsiter. Such a register will be written in prologue and + used in begin of body, so it must not be + 1. parameter passing register. + 2. GOT pointer. + We reuse static-chain register if it is available. Otherwise, we + use DI for i386 and R13 for x86-64. We chose R13 since it has + shorter encoding. + + Return: the regno of chosen register. */ + +static unsigned int +find_drap_reg (void) +{ + tree decl = cfun->decl; + + if (TARGET_64BIT) + { + /* Use R13 for nested function or function need static chain. + Since function with tail call may use any caller-saved + registers in epilogue, DRAP must not use caller-saved + register in such case. */ + if ((decl_function_context (decl) + && !DECL_NO_STATIC_CHAIN (decl)) + || crtl->tail_call_emit) + return R13_REG; + + return R10_REG; + } + else + { + /* Use DI for nested function or function need static chain. + Since function with tail call may use any caller-saved + registers in epilogue, DRAP must not use caller-saved + register in such case. */ + if ((decl_function_context (decl) + && !DECL_NO_STATIC_CHAIN (decl)) + || crtl->tail_call_emit) + return DI_REG; + + /* Reuse static chain register if it isn't used for parameter + passing. */ + if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2 + && !lookup_attribute ("fastcall", + TYPE_ATTRIBUTES (TREE_TYPE (decl)))) + return CX_REG; + else + return DI_REG; + } +} + +/* Update incoming stack boundary and estimated stack alignment. */ + +static void +ix86_update_stack_boundary (void) +{ + /* Prefer the one specified at command line. */ + ix86_incoming_stack_boundary + = (ix86_user_incoming_stack_boundary + ? ix86_user_incoming_stack_boundary + : ix86_default_incoming_stack_boundary); + + /* Incoming stack alignment can be changed on individual functions + via force_align_arg_pointer attribute. We use the smallest + incoming stack boundary. */ + if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY + && lookup_attribute (ix86_force_align_arg_pointer_string, + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY; + + /* Stack at entrance of main is aligned by runtime. We use the + smallest incoming stack boundary. */ + if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY + && DECL_NAME (current_function_decl) + && MAIN_NAME_P (DECL_NAME (current_function_decl)) + && DECL_FILE_SCOPE_P (current_function_decl)) + ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY; + + /* x86_64 vararg needs 16byte stack alignment for register save + area. */ + if (TARGET_64BIT + && cfun->stdarg + && crtl->stack_alignment_estimated < 128) + crtl->stack_alignment_estimated = 128; +} + +/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is + needed or an rtx for DRAP otherwise. */ static rtx -ix86_internal_arg_pointer (void) +ix86_get_drap_rtx (void) { - bool has_force_align_arg_pointer = - (0 != lookup_attribute (ix86_force_align_arg_pointer_string, - TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); - if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN - && DECL_NAME (current_function_decl) - && MAIN_NAME_P (DECL_NAME (current_function_decl)) - && DECL_FILE_SCOPE_P (current_function_decl)) - || ix86_force_align_arg_pointer - || has_force_align_arg_pointer) - { - /* Nested functions can't realign the stack due to a register - conflict. */ - if (DECL_CONTEXT (current_function_decl) - && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) - { - if (ix86_force_align_arg_pointer) - warning (0, "-mstackrealign ignored for nested functions"); - if (has_force_align_arg_pointer) - error ("%s not supported for nested functions", - ix86_force_align_arg_pointer_string); - return virtual_incoming_args_rtx; - } - cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG); - return copy_to_reg (cfun->machine->force_align_arg_pointer); + if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS) + crtl->need_drap = true; + + if (stack_realign_drap) + { + /* Assign DRAP to vDRAP and returns vDRAP */ + unsigned int regno = find_drap_reg (); + rtx drap_vreg; + rtx arg_ptr; + rtx seq, insn; + + arg_ptr = gen_rtx_REG (Pmode, regno); + crtl->drap_reg = arg_ptr; + + start_sequence (); + drap_vreg = copy_to_reg (arg_ptr); + seq = get_insns (); + end_sequence (); + + insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); + RTX_FRAME_RELATED_P (insn) = 1; + return drap_vreg; } else - return virtual_incoming_args_rtx; + return NULL; +} + +/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ + +static rtx +ix86_internal_arg_pointer (void) +{ + return virtual_incoming_args_rtx; } /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. @@ -6618,6 +7607,34 @@ ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) } } +/* Finalize stack_realign_needed flag, which will guide prologue/epilogue + to be generated in correct form. */ +static void +ix86_finalize_stack_realign_flags (void) +{ + /* Check if stack realign is really needed after reload, and + stores result in cfun */ + unsigned int incoming_stack_boundary + = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary + ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); + unsigned int stack_realign = (incoming_stack_boundary + < (current_function_is_leaf + ? crtl->max_used_stack_slot_alignment + : crtl->stack_alignment_needed)); + + if (crtl->stack_realign_finalized) + { + /* After stack_realign_needed is finalized, we can't no longer + change it. */ + gcc_assert (crtl->stack_realign_needed == stack_realign); + } + else + { + crtl->stack_realign_needed = stack_realign; + crtl->stack_realign_finalized = true; + } +} + /* Expand the prologue into a bunch of separate insns. */ void @@ -6628,52 +7645,56 @@ ix86_expand_prologue (void) struct ix86_frame frame; HOST_WIDE_INT allocate; + ix86_finalize_stack_realign_flags (); + + /* DRAP should not coexist with stack_realign_fp */ + gcc_assert (!(crtl->drap_reg && stack_realign_fp)); + ix86_compute_frame_layout (&frame); - if (cfun->machine->force_align_arg_pointer) + /* Emit prologue code to adjust stack alignment and setup DRAP, in case + of DRAP is needed and stack realignment is really needed after reload */ + if (crtl->drap_reg && crtl->stack_realign_needed) { rtx x, y; + int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; + int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] + ? 0 : UNITS_PER_WORD); + + gcc_assert (stack_realign_drap); /* Grab the argument pointer. */ - x = plus_constant (stack_pointer_rtx, 4); - y = cfun->machine->force_align_arg_pointer; - insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); - RTX_FRAME_RELATED_P (insn) = 1; + x = plus_constant (stack_pointer_rtx, + (UNITS_PER_WORD + param_ptr_offset)); + y = crtl->drap_reg; - /* The unwind info consists of two parts: install the fafp as the cfa, - and record the fafp as the "save register" of the stack pointer. - The later is there in order that the unwinder can see where it - should restore the stack pointer across the and insn. */ - x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); - x = gen_rtx_SET (VOIDmode, y, x); - RTX_FRAME_RELATED_P (x) = 1; - y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), - UNSPEC_REG_SAVE); - y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); - RTX_FRAME_RELATED_P (y) = 1; - x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); - x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); - REG_NOTES (insn) = x; + /* Only need to push parameter pointer reg if it is caller + saved reg */ + if (!call_used_regs[REGNO (crtl->drap_reg)]) + { + /* Push arg pointer reg */ + insn = emit_insn (gen_push (y)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); + RTX_FRAME_RELATED_P (insn) = 1; /* Align the stack. */ - emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-16))); - - /* And here we cheat like madmen with the unwind info. We force the - cfa register back to sp+4, which is exactly what it was at the - start of the function. Re-pushing the return address results in - the return at the same spot relative to the cfa, and thus is - correct wrt the unwind info. */ - x = cfun->machine->force_align_arg_pointer; - x = gen_frame_mem (Pmode, plus_constant (x, -4)); - insn = emit_insn (gen_push (x)); + insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-align_bytes))); RTX_FRAME_RELATED_P (insn) = 1; - x = GEN_INT (4); - x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); - x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); - x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); - REG_NOTES (insn) = x; + /* Replicate the return address on the stack so that return + address can be reached via (argp - 1) slot. This is needed + to implement macro RETURN_ADDR_RTX and intrinsic function + expand_builtin_return_addr etc. */ + x = crtl->drap_reg; + x = gen_frame_mem (Pmode, + plus_constant (x, -UNITS_PER_WORD)); + insn = emit_insn (gen_push (x)); + RTX_FRAME_RELATED_P (insn) = 1; } /* Note: AT&T enter does NOT have reversed args. Enter is probably @@ -6688,6 +7709,18 @@ ix86_expand_prologue (void) RTX_FRAME_RELATED_P (insn) = 1; } + if (stack_realign_fp) + { + int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; + gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); + + /* Align the stack. */ + insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-align_bytes))); + RTX_FRAME_RELATED_P (insn) = 1; + } + allocate = frame.to_allocate; if (!frame.save_regs_using_mov) @@ -6702,7 +7735,9 @@ ix86_expand_prologue (void) a red zone location */ if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)) - ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx + ix86_emit_save_regs_using_mov ((frame_pointer_needed + && !crtl->stack_realign_needed) + ? hard_frame_pointer_rtx : stack_pointer_rtx, -frame.nregs * UNITS_PER_WORD); @@ -6761,8 +7796,11 @@ ix86_expand_prologue (void) && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))) { - if (!frame_pointer_needed || !frame.to_allocate) - ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); + if (!frame_pointer_needed + || !frame.to_allocate + || crtl->stack_realign_needed) + ix86_emit_save_regs_using_mov (stack_pointer_rtx, + frame.to_allocate); else ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, -frame.nregs * UNITS_PER_WORD); @@ -6813,6 +7851,16 @@ ix86_expand_prologue (void) emit_insn (gen_blockage ()); } + if (crtl->drap_reg && !crtl->stack_realign_needed) + { + /* vDRAP is setup but after reload it turns out stack realign + isn't necessary, here we will emit prologue to setup DRAP + without stack realign adjustment */ + int drap_bp_offset = UNITS_PER_WORD * 2; + rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset); + insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x)); + } + /* Emit cld instruction if stringops are used in the function. */ if (TARGET_CLD && ix86_current_function_needs_cld) emit_insn (gen_cld ()); @@ -6854,10 +7902,17 @@ void ix86_expand_epilogue (int style) { int regno; - int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; + int sp_valid; struct ix86_frame frame; HOST_WIDE_INT offset; + ix86_finalize_stack_realign_flags (); + + /* When stack is realigned, SP must be valid. */ + sp_valid = (!frame_pointer_needed + || current_function_sp_is_unchanging + || stack_realign_fp); + ix86_compute_frame_layout (&frame); /* Calculate start of saved registers relative to ebp. Special care @@ -6891,11 +7946,16 @@ ix86_expand_epilogue (int style) { /* Restore registers. We can use ebp or esp to address the memory locations. If both are available, default to ebp, since offsets - are known to be small. Only exception is esp pointing directly to the - end of block of saved registers, where we may simplify addressing - mode. */ + are known to be small. Only exception is esp pointing directly + to the end of block of saved registers, where we may simplify + addressing mode. - if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) + If we are realigning stack with bp and sp, regs restore can't + be addressed by bp. sp must be used instead. */ + + if (!frame_pointer_needed + || (sp_valid && !frame.to_allocate) + || stack_realign_fp) ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate, style == 2); else @@ -6907,6 +7967,9 @@ ix86_expand_epilogue (int style) { rtx tmp, sa = EH_RETURN_STACKADJ_RTX; + /* Stack align doesn't work with eh_return. */ + gcc_assert (!crtl->stack_realign_needed); + if (frame_pointer_needed) { tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); @@ -6948,10 +8011,16 @@ ix86_expand_epilogue (int style) else { /* First step is to deallocate the stack frame so that we can - pop the registers. */ + pop the registers. + + If we realign stack with frame pointer, then stack pointer + won't be able to recover via lea $offset(%bp), %sp, because + there is a padding area between bp and sp for realign. + "add $to_allocate, %sp" must be used instead. */ if (!sp_valid) { gcc_assert (frame_pointer_needed); + gcc_assert (!stack_realign_fp); pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, GEN_INT (offset), style); @@ -6970,15 +8039,31 @@ ix86_expand_epilogue (int style) if (TARGET_USE_LEAVE) emit_insn ((*ix86_gen_leave) ()); else - emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); + { + /* For stack realigned really happens, recover stack + pointer to hard frame pointer is a must, if not using + leave. */ + if (stack_realign_fp) + pro_epilogue_adjust_stack (stack_pointer_rtx, + hard_frame_pointer_rtx, + const0_rtx, style); + emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); + } } } - if (cfun->machine->force_align_arg_pointer) + if (crtl->drap_reg && crtl->stack_realign_needed) { - emit_insn (gen_addsi3 (stack_pointer_rtx, - cfun->machine->force_align_arg_pointer, - GEN_INT (-4))); + int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] + ? 0 : UNITS_PER_WORD); + gcc_assert (stack_realign_drap); + emit_insn ((*ix86_gen_add3) (stack_pointer_rtx, + crtl->drap_reg, + GEN_INT (-(UNITS_PER_WORD + + param_ptr_offset)))); + if (!call_used_regs[REGNO (crtl->drap_reg)]) + emit_insn ((*ix86_gen_pop1) (crtl->drap_reg)); + } /* Sibcall epilogues don't want a return instruction. */ @@ -8245,7 +9330,8 @@ get_dllimport_decl (tree decl) name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); name = targetm.strip_name_encoding (name); - prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__"; + prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 + ? "*__imp_" : "*__imp__"; namelen = strlen (name); prefixlen = strlen (prefix); imp_name = (char *) alloca (namelen + prefixlen + 1); @@ -10455,7 +11541,7 @@ ix86_expand_clear (rtx dest) tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ - if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) + if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())) { rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); @@ -10722,7 +11808,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (MEM_P (op1)) { /* If we're optimizing for size, movups is the smallest. */ - if (optimize_size) + if (optimize_insn_for_size_p ()) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); @@ -10804,7 +11890,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) else if (MEM_P (op0)) { /* If we're optimizing for size, movups is the smallest. */ - if (optimize_size) + if (optimize_insn_for_size_p ()) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); @@ -12008,7 +13094,7 @@ ix86_fp_comparison_sahf_cost (enum rtx_code code) enum rtx_code bypass_code, first_code, second_code; /* Return arbitrarily high cost when instruction is not preferred - this avoids gcc from using it. */ - if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))) + if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))) return 1024; ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; @@ -12493,7 +13579,7 @@ ix86_expand_branch (enum rtx_code code, rtx label) optimizing for size. */ if ((code == EQ || code == NE) - && (!optimize_size + && (!optimize_insn_for_size_p () || hi[1] == const0_rtx || lo[1] == const0_rtx)) { rtx xor0, xor1; @@ -14545,7 +15631,7 @@ ix86_split_long_move (rtx operands[]) } /* If optimizing for size, attempt to locally unCSE nonzero constants. */ - if (optimize_size) + if (optimize_insn_for_size_p ()) { for (j = 0; j < nparts - 1; j++) if (CONST_INT_P (operands[6 + j]) @@ -14576,7 +15662,7 @@ ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) ? gen_addsi3 : gen_adddi3) (operand, operand, operand)); } - else if (!optimize_size + else if (!optimize_insn_for_size_p () && count * ix86_cost->add <= ix86_cost->shift_const) { int i; @@ -14659,7 +15745,7 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) { rtx x; - if (TARGET_PARTIAL_REG_STALL && !optimize_size) + if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); else x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); @@ -14691,7 +15777,7 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) /* For -1 << N, we can avoid the shld instruction, because we know that we're shifting 0...31/63 ones into a -1. */ emit_move_insn (low[0], constm1_rtx); - if (optimize_size) + if (optimize_insn_for_size_p ()) emit_move_insn (high[0], low[0]); else emit_move_insn (high[0], constm1_rtx); @@ -14714,10 +15800,13 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) ix86_expand_clear (scratch); emit_insn ((mode == DImode ? gen_x86_shift_adj_1 - : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); + : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2], + scratch)); } else - emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_2 + : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2])); } void @@ -14791,11 +15880,13 @@ ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) GEN_INT (single_width - 1))); emit_insn ((mode == DImode ? gen_x86_shift_adj_1 - : gen_x86_64_shift_adj) (low[0], high[0], operands[2], - scratch)); + : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], + scratch)); } else - emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_3 + : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2])); } } @@ -14854,11 +15945,13 @@ ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) ix86_expand_clear (scratch); emit_insn ((mode == DImode ? gen_x86_shift_adj_1 - : gen_x86_64_shift_adj) (low[0], high[0], operands[2], - scratch)); + : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], + scratch)); } else - emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); + emit_insn ((mode == DImode + ? gen_x86_shift_adj_2 + : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2])); } } @@ -15537,16 +16630,19 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, || (alg != rep_prefix_1_byte \ && alg != rep_prefix_4_byte \ && alg != rep_prefix_8_byte)) + const struct processor_costs *cost; + + cost = optimize_insn_for_size_p () ? &ix86_size_cost : ix86_cost; *dynamic_check = -1; if (memset) - algs = &ix86_cost->memset[TARGET_64BIT != 0]; + algs = &cost->memset[TARGET_64BIT != 0]; else - algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; + algs = &cost->memcpy[TARGET_64BIT != 0]; if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg)) return stringop_alg; /* rep; movq or rep; movl is the smallest variant. */ - else if (optimize_size) + else if (optimize_insn_for_size_p ()) { if (!count || (count & 3)) return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte; @@ -16461,7 +17557,7 @@ ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 && !TARGET_INLINE_ALL_STRINGOPS - && !optimize_size + && !optimize_insn_for_size_p () && (!CONST_INT_P (align) || INTVAL (align) < 4)) return 0; @@ -16469,7 +17565,7 @@ ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) scratch1 = gen_reg_rtx (Pmode); if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 - && !optimize_size) + && !optimize_insn_for_size_p ()) { /* Well it seems that some optimizer does not combine a call like foo(strlen(bar), strlen(bar)); @@ -17633,6 +18729,8 @@ enum ix86_builtins IX86_BUILTIN_MOVNTPD, IX86_BUILTIN_MOVNTDQ, + IX86_BUILTIN_MOVQ128, + /* SSE2 MMX */ IX86_BUILTIN_MASKMOVDQU, IX86_BUILTIN_MOVMSKPD, @@ -18113,22 +19211,29 @@ enum ix86_builtins /* Table for the ix86 builtin decls. */ static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; -/* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so, - * if the target_flags include one of MASK. Stores the function decl - * in the ix86_builtins array. - * Returns the function decl or NULL_TREE, if the builtin was not added. */ +/* Table to record which ISA options the builtin needs. */ +static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; + +/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK + * of which isa_flags to use in the ix86_builtins_isa array. Stores the + * function decl in the ix86_builtins array. Returns the function decl or + * NULL_TREE, if the builtin was not added. + * + * Record all builtins, even if it isn't an instruction set in the current ISA + * in case the user uses function specific options for a different ISA. When + * the builtin is expanded, check at that time whether it is valid. */ static inline tree def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) { tree decl = NULL_TREE; - if (mask & ix86_isa_flags - && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)) + if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) { decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE); ix86_builtins[(int) code] = decl; + ix86_builtins_isa[(int) code] = mask; } return decl; @@ -18755,6 +19860,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, + /* SSE2 MMX */ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, @@ -19171,9 +20278,10 @@ static const struct builtin_description bdesc_multi_arg[] = { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, }; -/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX - is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX - builtins. */ +/* Set up all the MMX/SSE builtins, even builtins for instructions that are not + in the current target ISA to allow the user to compile particular modules + with different target specific options that differ from the command line + options. */ static void ix86_init_mmx_sse_builtins (void) { @@ -20112,23 +21220,15 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT); /* AES */ - if (TARGET_AES) - { - /* Define AES built-in functions only if AES is enabled. */ - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); - } + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); /* PCLMUL */ - if (TARGET_PCLMUL) - { - /* Define PCLMUL built-in function only if PCLMUL is enabled. */ - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); - } + def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); /* Access to the vec_init patterns. */ ftype = build_function_type_list (V2SI_type_node, integer_type_node, @@ -20383,8 +21483,7 @@ ix86_init_builtins (void) ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; TREE_READONLY (decl) = 1; - if (TARGET_MMX) - ix86_init_mmx_sse_builtins (); + ix86_init_mmx_sse_builtins (); if (TARGET_64BIT) ix86_init_builtins_va_builtins_abi (); } @@ -21339,7 +22438,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, bool last_arg_constant = false; const struct insn_data *insn_p = &insn_data[icode]; enum machine_mode tmode = insn_p->operand[0].mode; - enum { load, store } class; + enum { load, store } klass; switch ((enum ix86_special_builtin_type) d->flag) { @@ -21351,7 +22450,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case V4SF_FTYPE_PCFLOAT: case V2DF_FTYPE_PCDOUBLE: nargs = 1; - class = load; + klass = load; memory = 0; break; case VOID_FTYPE_PV2SF_V4SF: @@ -21362,14 +22461,14 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case VOID_FTYPE_PDI_DI: case VOID_FTYPE_PINT_INT: nargs = 1; - class = store; + klass = store; /* Reserve memory operand for target. */ memory = ARRAY_SIZE (args); break; case V4SF_FTYPE_V4SF_PCV2SF: case V2DF_FTYPE_V2DF_PCDOUBLE: nargs = 2; - class = load; + klass = load; memory = 1; break; default: @@ -21378,7 +22477,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, gcc_assert (nargs <= ARRAY_SIZE (args)); - if (class == store) + if (klass == store) { arg = CALL_EXPR_ARG (exp, 0); op = expand_normal (arg); @@ -21455,7 +22554,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, if (! pat) return 0; emit_insn (pat); - return class == store ? 0 : target; + return klass == store ? 0 : target; } /* Return the integer constant in ARG. Constrain it to be in the range @@ -21600,6 +22699,28 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, enum machine_mode mode0, mode1, mode2; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + /* Determine whether the builtin function is available under the current ISA. + Originally the builtin was not created if it wasn't applicable to the + current ISA based on the command line switches. With function specific + options, we need to check in the context of the function making the call + whether it is supported. */ + if (ix86_builtins_isa[fcode] + && !(ix86_builtins_isa[fcode] & ix86_isa_flags)) + { + char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL, + NULL, NULL, false); + + if (!opts) + error ("%qE needs unknown isa option", fndecl); + else + { + gcc_assert (opts != NULL); + error ("%qE needs isa option %s", fndecl, opts); + free (opts); + } + return const0_rtx; + } + switch (fcode) { case IX86_BUILTIN_MASKMOVQ: @@ -22027,8 +23148,10 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) } -/* Returns a decl of a function that implements conversion of the - input vector of type TYPE, or NULL_TREE if it is not available. */ +/* Returns a decl of a function that implements conversion of an integer vector + into a floating-point vector, or vice-versa. TYPE is the type of the integer + side of the conversion. + Return NULL_TREE if it is not available. */ static tree ix86_vectorize_builtin_conversion (unsigned int code, tree type) @@ -22050,7 +23173,7 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type) case FIX_TRUNC_EXPR: switch (TYPE_MODE (type)) { - case V4SFmode: + case V4SImode: return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; default: return NULL_TREE; @@ -22068,7 +23191,7 @@ static tree ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool sqrt ATTRIBUTE_UNUSED) { - if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations)) return NULL_TREE; @@ -22300,16 +23423,16 @@ ix86_preferred_output_reload_class (rtx x, enum reg_class regclass) } static enum reg_class -ix86_secondary_reload (bool in_p, rtx x, enum reg_class class, +ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass, enum machine_mode mode, secondary_reload_info *sri ATTRIBUTE_UNUSED) { /* QImode spills from non-QI registers require intermediate register on 32bit targets. */ if (!in_p && mode == QImode && !TARGET_64BIT - && (class == GENERAL_REGS - || class == LEGACY_REGS - || class == INDEX_REGS)) + && (rclass == GENERAL_REGS + || rclass == LEGACY_REGS + || rclass == INDEX_REGS)) { int regno; @@ -23707,7 +24830,7 @@ ix86_pad_returns (void) bool replace = false; if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN - || !maybe_hot_bb_p (bb)) + || optimize_bb_for_size_p (bb)) continue; for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) if (active_insn_p (prev) || LABEL_P (prev)) @@ -24061,7 +25184,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, else tmp = new_target; - emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, + emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, GEN_INT (1), GEN_INT (one_var == 1 ? 0 : 1), GEN_INT (one_var == 2 ? 0+4 : 1+4), @@ -24625,7 +25748,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) /* target = X A B B */ ix86_expand_vector_set (false, target, val, 0); /* target = A X C D */ - emit_insn (gen_sse_shufps_1 (target, target, tmp, + emit_insn (gen_sse_shufps_v4sf (target, target, tmp, GEN_INT (1), GEN_INT (0), GEN_INT (2+4), GEN_INT (3+4))); return; @@ -24636,7 +25759,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) /* tmp = X B C D */ ix86_expand_vector_set (false, tmp, val, 0); /* target = A B X D */ - emit_insn (gen_sse_shufps_1 (target, target, tmp, + emit_insn (gen_sse_shufps_v4sf (target, target, tmp, GEN_INT (0), GEN_INT (1), GEN_INT (0+4), GEN_INT (3+4))); return; @@ -24647,7 +25770,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) /* tmp = X B C D */ ix86_expand_vector_set (false, tmp, val, 0); /* target = A B X D */ - emit_insn (gen_sse_shufps_1 (target, target, tmp, + emit_insn (gen_sse_shufps_v4sf (target, target, tmp, GEN_INT (0), GEN_INT (1), GEN_INT (2+4), GEN_INT (0+4))); return; @@ -24768,7 +25891,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) case 1: case 3: tmp = gen_reg_rtx (mode); - emit_insn (gen_sse_shufps_1 (tmp, vec, vec, + emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, GEN_INT (elt), GEN_INT (elt), GEN_INT (elt+4), GEN_INT (elt+4))); break; @@ -24885,7 +26008,7 @@ ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) emit_insn (gen_sse_movhlps (tmp1, in, in)); emit_insn (fn (tmp2, tmp1, in)); - emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, + emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, GEN_INT (1), GEN_INT (1), GEN_INT (1+4), GEN_INT (1+4))); emit_insn (fn (dest, tmp2, tmp3)); @@ -25020,7 +26143,7 @@ ix86_emit_fp_unordered_jump (rtx label) emit_insn (gen_x86_fnstsw_1 (reg)); - if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)) + if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) { emit_insn (gen_x86_sahf_1 (reg)); @@ -26378,6 +27501,10 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference #undef TARGET_INTERNAL_ARG_POINTER #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer +#undef TARGET_UPDATE_STACK_BOUNDARY +#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary +#undef TARGET_GET_DRAP_RTX +#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec #undef TARGET_STRICT_ARGUMENT_NAMING @@ -26420,6 +27547,30 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function + +#undef TARGET_OPTION_VALID_ATTRIBUTE_P +#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p + +#undef TARGET_OPTION_SAVE +#define TARGET_OPTION_SAVE ix86_function_specific_save + +#undef TARGET_OPTION_RESTORE +#define TARGET_OPTION_RESTORE ix86_function_specific_restore + +#undef TARGET_OPTION_PRINT +#define TARGET_OPTION_PRINT ix86_function_specific_print + +#undef TARGET_OPTION_CAN_INLINE_P +#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p + +#undef TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION +#define TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION true + +#undef TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION +#define TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION true + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d17e414eb5f..23871561544 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -49,6 +49,13 @@ along with GCC; see the file COPYING3. If not see #define TARGET_SSE4A OPTION_ISA_SSE4A #define TARGET_SSE5 OPTION_ISA_SSE5 #define TARGET_ROUND OPTION_ISA_ROUND +#define TARGET_ABM OPTION_ISA_ABM +#define TARGET_POPCNT OPTION_ISA_POPCNT +#define TARGET_SAHF OPTION_ISA_SAHF +#define TARGET_AES OPTION_ISA_AES +#define TARGET_PCLMUL OPTION_ISA_PCLMUL +#define TARGET_CMPXCHG16B OPTION_ISA_CX16 + /* SSE5 and SSE4.1 define the same round instructions */ #define OPTION_MASK_ISA_ROUND (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE5) @@ -163,6 +170,10 @@ struct processor_costs { }; extern const struct processor_costs *ix86_cost; +extern const struct processor_costs ix86_size_cost; + +#define ix86_cur_cost() \ + (optimize_insn_for_size_p () ? &ix86_size_cost: ix86_cost) /* Macros used in the machine description to test the flags. */ @@ -286,7 +297,7 @@ enum ix86_tune_indices { X86_TUNE_LAST }; -extern unsigned int ix86_tune_features[X86_TUNE_LAST]; +extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_USE_LEAVE ix86_tune_features[X86_TUNE_USE_LEAVE] #define TARGET_PUSH_MEMORY ix86_tune_features[X86_TUNE_PUSH_MEMORY] @@ -380,7 +391,7 @@ enum ix86_arch_indices { X86_ARCH_LAST }; -extern unsigned int ix86_arch_features[X86_ARCH_LAST]; +extern unsigned char ix86_arch_features[X86_ARCH_LAST]; #define TARGET_CMOVE ix86_arch_features[X86_ARCH_CMOVE] #define TARGET_CMPXCHG ix86_arch_features[X86_ARCH_CMPXCHG] @@ -392,15 +403,7 @@ extern unsigned int ix86_arch_features[X86_ARCH_LAST]; extern int x86_prefetch_sse; -#define TARGET_ABM x86_abm -#define TARGET_CMPXCHG16B x86_cmpxchg16b -#define TARGET_POPCNT x86_popcnt #define TARGET_PREFETCH_SSE x86_prefetch_sse -#define TARGET_SAHF x86_sahf -#define TARGET_RECIP x86_recip -#define TARGET_FUSED_MADD x86_fused_muladd -#define TARGET_AES (TARGET_SSE2 && x86_aes) -#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul) #define ASSEMBLER_DIALECT (ix86_asm_dialect) @@ -475,7 +478,7 @@ enum calling_abi Don't use this macro to turn on various extra optimizations for `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ -#define OVERRIDE_OPTIONS override_options () +#define OVERRIDE_OPTIONS override_options (true) /* Define this to change the optimizations performed by default. */ #define OPTIMIZATION_OPTIONS(LEVEL, SIZE) \ @@ -537,196 +540,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #endif /* Target CPU builtins. */ -#define TARGET_CPU_CPP_BUILTINS() \ - do \ - { \ - size_t arch_len = strlen (ix86_arch_string); \ - size_t tune_len = strlen (ix86_tune_string); \ - int last_arch_char = ix86_arch_string[arch_len - 1]; \ - int last_tune_char = ix86_tune_string[tune_len - 1]; \ - \ - if (TARGET_64BIT) \ - { \ - builtin_assert ("cpu=x86_64"); \ - builtin_assert ("machine=x86_64"); \ - builtin_define ("__amd64"); \ - builtin_define ("__amd64__"); \ - builtin_define ("__x86_64"); \ - builtin_define ("__x86_64__"); \ - } \ - else \ - { \ - builtin_assert ("cpu=i386"); \ - builtin_assert ("machine=i386"); \ - builtin_define_std ("i386"); \ - } \ - \ - /* Built-ins based on -march=. */ \ - switch (ix86_arch) \ - { \ - case PROCESSOR_I386: \ - break; \ - case PROCESSOR_I486: \ - builtin_define ("__i486"); \ - builtin_define ("__i486__"); \ - break; \ - case PROCESSOR_PENTIUM: \ - builtin_define ("__i586"); \ - builtin_define ("__i586__"); \ - builtin_define ("__pentium"); \ - builtin_define ("__pentium__"); \ - if (last_arch_char == 'x') \ - builtin_define ("__pentium_mmx__"); \ - break; \ - case PROCESSOR_PENTIUMPRO: \ - builtin_define ("__i686"); \ - builtin_define ("__i686__"); \ - builtin_define ("__pentiumpro"); \ - builtin_define ("__pentiumpro__"); \ - break; \ - case PROCESSOR_GEODE: \ - builtin_define ("__geode"); \ - builtin_define ("__geode__"); \ - break; \ - case PROCESSOR_K6: \ - builtin_define ("__k6"); \ - builtin_define ("__k6__"); \ - if (last_arch_char == '2') \ - builtin_define ("__k6_2__"); \ - else if (last_arch_char == '3') \ - builtin_define ("__k6_3__"); \ - break; \ - case PROCESSOR_ATHLON: \ - builtin_define ("__athlon"); \ - builtin_define ("__athlon__"); \ - /* Only plain "athlon" lacks SSE. */ \ - if (last_arch_char != 'n') \ - builtin_define ("__athlon_sse__"); \ - break; \ - case PROCESSOR_K8: \ - builtin_define ("__k8"); \ - builtin_define ("__k8__"); \ - break; \ - case PROCESSOR_AMDFAM10: \ - builtin_define ("__amdfam10"); \ - builtin_define ("__amdfam10__"); \ - break; \ - case PROCESSOR_PENTIUM4: \ - builtin_define ("__pentium4"); \ - builtin_define ("__pentium4__"); \ - break; \ - case PROCESSOR_NOCONA: \ - builtin_define ("__nocona"); \ - builtin_define ("__nocona__"); \ - break; \ - case PROCESSOR_CORE2: \ - builtin_define ("__core2"); \ - builtin_define ("__core2__"); \ - break; \ - case PROCESSOR_GENERIC32: \ - case PROCESSOR_GENERIC64: \ - case PROCESSOR_max: \ - gcc_unreachable (); \ - } \ - \ - /* Built-ins based on -mtune=. */ \ - switch (ix86_tune) \ - { \ - case PROCESSOR_I386: \ - builtin_define ("__tune_i386__"); \ - break; \ - case PROCESSOR_I486: \ - builtin_define ("__tune_i486__"); \ - break; \ - case PROCESSOR_PENTIUM: \ - builtin_define ("__tune_i586__"); \ - builtin_define ("__tune_pentium__"); \ - if (last_tune_char == 'x') \ - builtin_define ("__tune_pentium_mmx__"); \ - break; \ - case PROCESSOR_PENTIUMPRO: \ - builtin_define ("__tune_i686__"); \ - builtin_define ("__tune_pentiumpro__"); \ - switch (last_tune_char) \ - { \ - case '3': \ - builtin_define ("__tune_pentium3__"); \ - /* FALLTHRU */ \ - case '2': \ - builtin_define ("__tune_pentium2__"); \ - break; \ - } \ - break; \ - case PROCESSOR_GEODE: \ - builtin_define ("__tune_geode__"); \ - break; \ - case PROCESSOR_K6: \ - builtin_define ("__tune_k6__"); \ - if (last_tune_char == '2') \ - builtin_define ("__tune_k6_2__"); \ - else if (last_tune_char == '3') \ - builtin_define ("__tune_k6_3__"); \ - break; \ - case PROCESSOR_ATHLON: \ - builtin_define ("__tune_athlon__"); \ - /* Only plain "athlon" lacks SSE. */ \ - if (last_tune_char != 'n') \ - builtin_define ("__tune_athlon_sse__"); \ - break; \ - case PROCESSOR_K8: \ - builtin_define ("__tune_k8__"); \ - break; \ - case PROCESSOR_AMDFAM10: \ - builtin_define ("__tune_amdfam10__"); \ - break; \ - case PROCESSOR_PENTIUM4: \ - builtin_define ("__tune_pentium4__"); \ - break; \ - case PROCESSOR_NOCONA: \ - builtin_define ("__tune_nocona__"); \ - break; \ - case PROCESSOR_CORE2: \ - builtin_define ("__tune_core2__"); \ - break; \ - case PROCESSOR_GENERIC32: \ - case PROCESSOR_GENERIC64: \ - break; \ - case PROCESSOR_max: \ - gcc_unreachable (); \ - } \ - \ - if (TARGET_MMX) \ - builtin_define ("__MMX__"); \ - if (TARGET_3DNOW) \ - builtin_define ("__3dNOW__"); \ - if (TARGET_3DNOW_A) \ - builtin_define ("__3dNOW_A__"); \ - if (TARGET_SSE) \ - builtin_define ("__SSE__"); \ - if (TARGET_SSE2) \ - builtin_define ("__SSE2__"); \ - if (TARGET_SSE3) \ - builtin_define ("__SSE3__"); \ - if (TARGET_SSSE3) \ - builtin_define ("__SSSE3__"); \ - if (TARGET_SSE4_1) \ - builtin_define ("__SSE4_1__"); \ - if (TARGET_SSE4_2) \ - builtin_define ("__SSE4_2__"); \ - if (TARGET_AES) \ - builtin_define ("__AES__"); \ - if (TARGET_PCLMUL) \ - builtin_define ("__PCLMUL__"); \ - if (TARGET_SSE4A) \ - builtin_define ("__SSE4A__"); \ - if (TARGET_SSE5) \ - builtin_define ("__SSE5__"); \ - if (TARGET_SSE_MATH && TARGET_SSE) \ - builtin_define ("__SSE_MATH__"); \ - if (TARGET_SSE_MATH && TARGET_SSE2) \ - builtin_define ("__SSE2_MATH__"); \ - } \ - while (0) +#define TARGET_CPU_CPP_BUILTINS() ix86_target_macros () + +/* Target Pragmas. */ +#define REGISTER_TARGET_PRAGMAS() ix86_register_pragmas () enum target_cpu_default { @@ -835,16 +652,32 @@ enum target_cpu_default #define STACK_BOUNDARY (TARGET_64BIT && DEFAULT_ABI == MS_ABI ? 128 \ : BITS_PER_WORD) +/* Stack boundary of the main function guaranteed by OS. */ +#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32) + +/* Minimum stack boundary. */ +#define MIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32) + /* Boundary (in *bits*) on which the stack pointer prefers to be aligned; the compiler cannot rely on having this alignment. */ #define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary -/* As of July 2001, many runtimes do not align the stack properly when - entering main. This causes expand_main_function to forcibly align - the stack, which results in aligned frames for functions called from - main, though it does nothing for the alignment of main itself. */ -#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \ - (ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT) +/* It should be MIN_STACK_BOUNDARY. But we set it to 128 bits for + both 32bit and 64bit, to support codes that need 128 bit stack + alignment for SSE instructions, but can't realign the stack. */ +#define PREFERRED_STACK_BOUNDARY_DEFAULT 128 + +/* 1 if -mstackrealign should be turned on by default. It will + generate an alternate prologue and epilogue that realigns the + runtime stack if nessary. This supports mixing codes that keep a + 4-byte aligned stack, as specified by i386 psABI, with codes that + need a 16-byte aligned stack, as required by SSE instructions. If + STACK_REALIGN_DEFAULT is 1 and PREFERRED_STACK_BOUNDARY_DEFAULT is + 128, stacks for all functions may be realigned. */ +#define STACK_REALIGN_DEFAULT 0 + +/* Boundary (in *bits*) on which the incoming stack is aligned. */ +#define INCOMING_STACK_BOUNDARY ix86_incoming_stack_boundary /* Target OS keeps a vector-aligned (128-bit, 16-byte) stack. This is mandatory for the 64-bit ABI, and may or may not be true for other @@ -871,6 +704,9 @@ enum target_cpu_default #define BIGGEST_ALIGNMENT 128 +/* Maximum stack alignment. */ +#define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT + /* Decide whether a variable of mode MODE should be 128 bit aligned. */ #define ALIGN_MODE_128(MODE) \ ((MODE) == XFmode || SSE_REG_MODE_P (MODE)) @@ -1297,7 +1133,7 @@ do { \ the pic register when possible. The change is visible after the prologue has been emitted. */ -#define REAL_PIC_OFFSET_TABLE_REGNUM 3 +#define REAL_PIC_OFFSET_TABLE_REGNUM BX_REG #define PIC_OFFSET_TABLE_REGNUM \ ((TARGET_64BIT && ix86_cmodel == CM_SMALL_PIC) \ @@ -1816,12 +1652,9 @@ typedef struct ix86_args { { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} \ /* Given FROM and TO register numbers, say whether this elimination is - allowed. Frame pointer elimination is automatically handled. - - All other eliminations are valid. */ + allowed. */ -#define CAN_ELIMINATE(FROM, TO) \ - ((TO) == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1) +#define CAN_ELIMINATE(FROM, TO) ix86_can_eliminate ((FROM), (TO)) /* Define the offset between two registers, one to be eliminated, and the other its replacement, at the start of a routine. */ @@ -2390,6 +2223,7 @@ enum asm_dialect { extern enum asm_dialect ix86_asm_dialect; extern unsigned int ix86_preferred_stack_boundary; +extern unsigned int ix86_incoming_stack_boundary; extern int ix86_branch_cost, ix86_section_threshold; /* Smallest class containing REGNO. */ @@ -2491,7 +2325,6 @@ struct machine_function GTY(()) { struct stack_local_entry *stack_locals; const char *some_ld_name; - rtx force_align_arg_pointer; int save_varrargs_registers; int accesses_prev_frame; int optimize_mode_switching[MAX_386_ENTITIES]; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 45e5595fab2..f8d4c7db118 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -245,6 +245,7 @@ [(AX_REG 0) (DX_REG 1) (CX_REG 2) + (BX_REG 3) (SI_REG 4) (DI_REG 5) (BP_REG 6) @@ -254,6 +255,7 @@ (FPCR_REG 19) (R10_REG 39) (R11_REG 40) + (R13_REG 42) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -1417,7 +1419,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (match_operand:SI 1 "const0_operand" "")) (clobber (reg:CC FLAGS_REG))] - "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" + "reload_completed" "xor{l}\t%0, %0" [(set_attr "type" "alu1") (set_attr "mode" "SI") @@ -1428,8 +1430,7 @@ (match_operand:SI 1 "immediate_operand" "i")) (clobber (reg:CC FLAGS_REG))] "reload_completed - && operands[1] == constm1_rtx - && (TARGET_MOVE_M1_VIA_OR || optimize_size)" + && operands[1] == constm1_rtx" { operands[1] = constm1_rtx; return "or{l}\t{%1, %0|%0, %1}"; @@ -1708,8 +1709,7 @@ [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) (match_operand:HI 1 "const0_operand" "")) (clobber (reg:CC FLAGS_REG))] - "reload_completed - && ((!TARGET_USE_MOV0 && !TARGET_PARTIAL_REG_STALL) || optimize_size)" + "reload_completed" "xor{w}\t%0, %0" [(set_attr "type" "alu1") (set_attr "mode" "HI") @@ -1865,7 +1865,7 @@ [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q")) (match_operand:QI 1 "const0_operand" "")) (clobber (reg:CC FLAGS_REG))] - "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" + "reload_completed" "xor{b}\t%0, %0" [(set_attr "type" "alu1") (set_attr "mode" "QI") @@ -2191,7 +2191,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (match_operand:DI 1 "const0_operand" "")) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && (!TARGET_USE_MOV0 || optimize_size) + "TARGET_64BIT && reload_completed" "xor{l}\t%k0, %k0"; [(set_attr "type" "alu1") @@ -2202,7 +2202,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (match_operand:DI 1 "const_int_operand" "i")) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && (TARGET_MOVE_M1_VIA_OR || optimize_size) + "TARGET_64BIT && reload_completed && operands[1] == constm1_rtx" { @@ -4500,13 +4500,16 @@ (use (match_dup 2)) (clobber (match_scratch:<ssevecmode> 3 "")) (clobber (match_scratch:<ssevecmode> 4 ""))])] - "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH && !optimize_size" + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" { enum machine_mode mode = <MODE>mode; enum machine_mode vecmode = <ssevecmode>mode; REAL_VALUE_TYPE TWO31r; rtx two31; + if (optimize_insn_for_size_p ()) + FAIL; + real_ldexp (&TWO31r, &dconst1, 31); two31 = const_double_from_real_value (TWO31r, mode); two31 = ix86_build_const_vector (mode, true, two31); @@ -8213,7 +8216,7 @@ (match_operand:SF 2 "nonimmediate_operand" "")))] "TARGET_80387 || TARGET_SSE_MATH" { - if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size + if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { @@ -10917,7 +10920,7 @@ (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector")]) -(define_expand "x86_64_shift_adj" +(define_expand "x86_64_shift_adj_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") (const_int 64)) @@ -10933,6 +10936,34 @@ "TARGET_64BIT" "") +(define_expand "x86_64_shift_adj_2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "TARGET_64BIT" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + ix86_expand_clear (operands[1]); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + (define_expand "ashldi3" [(set (match_operand:DI 0 "shiftdi_operand" "") (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "") @@ -11880,7 +11911,35 @@ "" "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;") -(define_insn "*ashrdi3_63_rex64" +(define_expand "x86_64_shift_adj_3" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashrdi3_63_rex64 (operands[1], operands[1], GEN_INT (63))); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_insn "ashrdi3_63_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0") (match_operand:DI 2 "const_int_operand" "i,i"))) @@ -16853,10 +16912,13 @@ UNSPEC_FPATAN)) (clobber (match_scratch:XF 6 ""))])] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { int i; + if (optimize_insn_for_size_p ()) + FAIL; + for (i = 2; i < 6; i++) operands[i] = gen_reg_rtx (XFmode); @@ -16869,11 +16931,14 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_asinxf2 (op0, op1)); emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); @@ -16891,10 +16956,13 @@ UNSPEC_FPATAN)) (clobber (match_scratch:XF 6 ""))])] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { int i; + if (optimize_insn_for_size_p ()) + FAIL; + for (i = 2; i < 6; i++) operands[i] = gen_reg_rtx (XFmode); @@ -16907,11 +16975,14 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_acosxf2 (op0, op1)); emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); @@ -17066,8 +17137,11 @@ [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { + if (optimize_insn_for_size_p ()) + FAIL; + ix86_emit_i387_log1p (operands[0], operands[1]); DONE; }) @@ -17078,9 +17152,14 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); + rtx op0; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]); @@ -17148,10 +17227,15 @@ [(use (match_operand:SI 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1])); emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); @@ -17164,10 +17248,15 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); @@ -17212,10 +17301,13 @@ (unspec:XF [(match_dup 8) (match_dup 4)] UNSPEC_FSCALE_EXP))])] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { int i; + if (optimize_insn_for_size_p ()) + FAIL; + for (i = 3; i < 10; i++) operands[i] = gen_reg_rtx (XFmode); @@ -17226,9 +17318,14 @@ [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op2 = gen_reg_rtx (XFmode); + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); @@ -17241,10 +17338,15 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_expxf2 (op0, op1)); @@ -17256,9 +17358,14 @@ [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op2 = gen_reg_rtx (XFmode); + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); @@ -17271,10 +17378,15 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_exp10xf2 (op0, op1)); @@ -17286,9 +17398,14 @@ [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op2 = gen_reg_rtx (XFmode); + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); @@ -17301,10 +17418,15 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_exp2xf2 (op0, op1)); @@ -17336,10 +17458,13 @@ (set (match_operand:XF 0 "register_operand" "") (plus:XF (match_dup 12) (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { int i; + if (optimize_insn_for_size_p ()) + FAIL; + for (i = 2; i < 13; i++) operands[i] = gen_reg_rtx (XFmode); @@ -17355,10 +17480,15 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_expm1xf2 (op0, op1)); @@ -17377,8 +17507,11 @@ (unspec:XF [(match_dup 1) (match_dup 3)] UNSPEC_FSCALE_EXP))])] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { + if (optimize_insn_for_size_p ()) + FAIL; + operands[3] = gen_reg_rtx (XFmode); operands[4] = gen_reg_rtx (XFmode); }) @@ -17390,10 +17523,15 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); @@ -17410,8 +17548,11 @@ (unspec:XF [(match_dup 1) (match_dup 2)] UNSPEC_FSCALE_EXP))])] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { + if (optimize_insn_for_size_p ()) + FAIL; + operands[3] = gen_reg_rtx (XFmode); }) @@ -17422,11 +17563,16 @@ "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); - rtx op2 = gen_reg_rtx (XFmode); + rtx op0, op1, op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + op2 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_extend<mode>xf2 (op2, operands[2])); @@ -17465,13 +17611,13 @@ || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || !optimize_size))" + && !flag_trapping_math)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || !optimize_size)) + && !flag_trapping_math) { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (0x04))); @@ -17495,9 +17641,10 @@ [(match_operand:MODEF 0 "register_operand" "") (match_operand:MODEF 1 "nonimmediate_operand" "")] "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math && !flag_rounding_math - && !optimize_size" + && !flag_trapping_math && !flag_rounding_math" { + if (optimize_insn_for_size_p ()) + FAIL; if (TARGET_64BIT || (<MODE>mode != DFmode)) ix86_expand_round (operand0, operand1); else @@ -17648,9 +17795,10 @@ (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT) - && !flag_trapping_math && !flag_rounding_math - && !optimize_size" + && !flag_trapping_math && !flag_rounding_math" { + if (optimize_insn_for_size_p ()) + FAIL; ix86_expand_lround (operand0, operand1); DONE; }) @@ -17698,8 +17846,10 @@ [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { + if (optimize_insn_for_size_p ()) + FAIL; emit_insn (gen_frndintxf2_floor (operands[0], operands[1])); DONE; }) @@ -17710,15 +17860,16 @@ "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size) + && flag_unsafe_math_optimizations) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || !optimize_size))" + && !flag_trapping_math)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !flag_trapping_math - && (TARGET_ROUND || !optimize_size)) + && (TARGET_ROUND || optimize_insn_for_speed_p ())) { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (0x01))); @@ -17729,9 +17880,13 @@ } else { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_frndintxf2_floor (op0, op1)); @@ -17902,9 +18057,10 @@ [(match_operand:DI 0 "nonimmediate_operand" "") (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT - && !flag_trapping_math - && !optimize_size" + && !flag_trapping_math" { + if (optimize_insn_for_size_p ()) + FAIL; ix86_expand_lfloorceil (operand0, operand1, true); DONE; }) @@ -17913,9 +18069,10 @@ [(match_operand:SI 0 "nonimmediate_operand" "") (match_operand:MODEF 1 "register_operand" "")] "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (!optimize_size || !TARGET_64BIT)" + && !flag_trapping_math" { + if (optimize_insn_for_size_p () && TARGET_64BIT) + FAIL; ix86_expand_lfloorceil (operand0, operand1, true); DONE; }) @@ -17963,8 +18120,10 @@ [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { + if (optimize_insn_for_size_p ()) + FAIL; emit_insn (gen_frndintxf2_ceil (operands[0], operands[1])); DONE; }) @@ -17975,18 +18134,19 @@ "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size) + && flag_unsafe_math_optimizations) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || !optimize_size))" + && !flag_trapping_math)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !flag_trapping_math - && (TARGET_ROUND || !optimize_size)) + && (TARGET_ROUND || optimize_insn_for_speed_p ())) { if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (0x02))); + else if (optimize_insn_for_size_p ()) + FAIL; else if (TARGET_64BIT || (<MODE>mode != DFmode)) ix86_expand_floorceil (operand0, operand1, false); else @@ -17994,9 +18154,13 @@ } else { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_frndintxf2_ceil (op0, op1)); @@ -18226,8 +18390,10 @@ [(use (match_operand:XF 0 "register_operand" "")) (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations && !optimize_size" + && flag_unsafe_math_optimizations" { + if (optimize_insn_for_size_p ()) + FAIL; emit_insn (gen_frndintxf2_trunc (operands[0], operands[1])); DONE; }) @@ -18238,18 +18404,19 @@ "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations && !optimize_size) + && flag_unsafe_math_optimizations) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || !optimize_size))" + && !flag_trapping_math)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !flag_trapping_math - && (TARGET_ROUND || !optimize_size)) + && (TARGET_ROUND || optimize_insn_for_speed_p ())) { if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (0x03))); + else if (optimize_insn_for_size_p ()) + FAIL; else if (TARGET_64BIT || (<MODE>mode != DFmode)) ix86_expand_trunc (operand0, operand1); else @@ -18257,9 +18424,13 @@ } else { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + rtx op0, op1; + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); emit_insn (gen_extend<mode>xf2 (op1, operands[1])); emit_insn (gen_frndintxf2_trunc (op0, op1)); @@ -18450,7 +18621,7 @@ operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); /* Can't use this if the user has appropriated esi or edi. */ - if ((TARGET_SINGLE_STRINGOP || optimize_size) + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) { emit_insn (gen_strmov_singleop (operands[0], operands[1], @@ -18469,7 +18640,7 @@ (match_operand 4 "" "")) (set (match_operand 2 "register_operand" "") (match_operand 5 "" ""))])] - "TARGET_SINGLE_STRINGOP || optimize_size" + "" "ix86_current_function_needs_cld = 1;") (define_insn "*strmovdi_rex_1" @@ -18481,7 +18652,7 @@ (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) (const_int 8)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "movsq" [(set_attr "type" "str") (set_attr "mode" "DI") @@ -18496,7 +18667,7 @@ (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (match_dup 3) (const_int 4)))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "!TARGET_64BIT" "movs{l|d}" [(set_attr "type" "str") (set_attr "mode" "SI") @@ -18511,7 +18682,7 @@ (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) (const_int 4)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "movs{l|d}" [(set_attr "type" "str") (set_attr "mode" "SI") @@ -18526,7 +18697,7 @@ (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (match_dup 3) (const_int 2)))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "!TARGET_64BIT" "movsw" [(set_attr "type" "str") (set_attr "memory" "both") @@ -18541,7 +18712,7 @@ (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) (const_int 2)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "movsw" [(set_attr "type" "str") (set_attr "memory" "both") @@ -18556,7 +18727,7 @@ (set (match_operand:SI 1 "register_operand" "=S") (plus:SI (match_dup 3) (const_int 1)))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "!TARGET_64BIT" "movsb" [(set_attr "type" "str") (set_attr "memory" "both") @@ -18571,7 +18742,7 @@ (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) (const_int 1)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "movsb" [(set_attr "type" "str") (set_attr "memory" "both") @@ -18733,7 +18904,7 @@ operands[3] = gen_rtx_PLUS (Pmode, operands[0], GEN_INT (GET_MODE_SIZE (GET_MODE (operands[2])))); - if (TARGET_SINGLE_STRINGOP || optimize_size) + if (TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) { emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], operands[3])); @@ -18746,7 +18917,7 @@ (match_operand 2 "register_operand" "")) (set (match_operand 0 "register_operand" "") (match_operand 3 "" ""))])] - "TARGET_SINGLE_STRINGOP || optimize_size" + "" "ix86_current_function_needs_cld = 1;") (define_insn "*strsetdi_rex_1" @@ -18755,7 +18926,7 @@ (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) (const_int 8)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "stosq" [(set_attr "type" "str") (set_attr "memory" "store") @@ -18767,7 +18938,7 @@ (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_dup 1) (const_int 4)))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "!TARGET_64BIT" "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") @@ -18779,7 +18950,7 @@ (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) (const_int 4)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") @@ -18791,7 +18962,7 @@ (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_dup 1) (const_int 2)))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "!TARGET_64BIT" "stosw" [(set_attr "type" "str") (set_attr "memory" "store") @@ -18803,7 +18974,7 @@ (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) (const_int 2)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "stosw" [(set_attr "type" "str") (set_attr "memory" "store") @@ -18815,7 +18986,7 @@ (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_dup 1) (const_int 1)))] - "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "!TARGET_64BIT" "stosb" [(set_attr "type" "str") (set_attr "memory" "store") @@ -18827,7 +18998,7 @@ (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) (const_int 1)))] - "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "TARGET_64BIT" "stosb" [(set_attr "type" "str") (set_attr "memory" "store") @@ -18932,10 +19103,13 @@ (match_operand:BLK 2 "general_operand" ""))) (use (match_operand 3 "general_operand" "")) (use (match_operand 4 "immediate_operand" ""))] - "! optimize_size || TARGET_INLINE_ALL_STRINGOPS" + "" { rtx addr1, addr2, out, outlow, count, countreg, align; + if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) + FAIL; + /* Can't use this if the user has appropriated esi or edi. */ if (fixed_regs[SI_REG] || fixed_regs[DI_REG]) FAIL; @@ -19856,7 +20030,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && reload_completed && ! TARGET_FAST_PREFIX - && ! optimize_size + && optimize_insn_for_speed_p () /* Ensure that the operand will remain sign-extended immediate. */ && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" [(set (match_dup 0) @@ -19876,7 +20050,8 @@ "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" [(parallel [(set (match_dup 0) (neg:SI (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] @@ -19889,7 +20064,8 @@ "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" [(set (match_dup 0) (not:SI (match_dup 1)))] "operands[0] = gen_lowpart (SImode, operands[0]); @@ -19904,7 +20080,8 @@ "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE && (GET_MODE (operands[0]) == HImode || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" [(set (match_dup 0) (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] "operands[0] = gen_lowpart (SImode, operands[0]); @@ -19920,7 +20097,7 @@ [(set (match_operand:SI 0 "push_operand" "") (match_operand:SI 1 "memory_operand" "")) (match_scratch:SI 2 "r")] - "!optimize_size && !TARGET_PUSH_MEMORY + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] @@ -19930,7 +20107,7 @@ [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "memory_operand" "")) (match_scratch:DI 2 "r")] - "!optimize_size && !TARGET_PUSH_MEMORY + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] @@ -19942,7 +20119,7 @@ [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "memory_operand" "")) (match_scratch:SF 2 "r")] - "!optimize_size && !TARGET_PUSH_MEMORY + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] @@ -19952,7 +20129,7 @@ [(set (match_operand:HI 0 "push_operand" "") (match_operand:HI 1 "memory_operand" "")) (match_scratch:HI 2 "r")] - "!optimize_size && !TARGET_PUSH_MEMORY + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] @@ -19962,7 +20139,7 @@ [(set (match_operand:QI 0 "push_operand" "") (match_operand:QI 1 "memory_operand" "")) (match_scratch:QI 2 "q")] - "!optimize_size && !TARGET_PUSH_MEMORY + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] @@ -19974,10 +20151,10 @@ [(match_scratch:SI 1 "r") (set (match_operand:SI 0 "memory_operand" "") (const_int 0))] - "! optimize_size + "optimize_insn_for_speed_p () && ! TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 1) (const_int 0)) (clobber (reg:CC FLAGS_REG))]) @@ -19988,10 +20165,10 @@ [(match_scratch:HI 1 "r") (set (match_operand:HI 0 "memory_operand" "") (const_int 0))] - "! optimize_size + "optimize_insn_for_speed_p () && ! TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC FLAGS_REG))]) @@ -20002,10 +20179,10 @@ [(match_scratch:QI 1 "q") (set (match_operand:QI 0 "memory_operand" "") (const_int 0))] - "! optimize_size + "optimize_insn_for_speed_p () && ! TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC FLAGS_REG))]) @@ -20016,9 +20193,9 @@ [(match_scratch:SI 2 "r") (set (match_operand:SI 0 "memory_operand" "") (match_operand:SI 1 "immediate_operand" ""))] - "! optimize_size + "optimize_insn_for_speed_p () && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn" + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -20027,9 +20204,9 @@ [(match_scratch:HI 2 "r") (set (match_operand:HI 0 "memory_operand" "") (match_operand:HI 1 "immediate_operand" ""))] - "! optimize_size + "optimize_insn_for_speed_p () && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn" + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -20038,9 +20215,9 @@ [(match_scratch:QI 2 "q") (set (match_operand:QI 0 "memory_operand" "") (match_operand:QI 1 "immediate_operand" ""))] - "! optimize_size + "optimize_insn_for_speed_p () && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cost->large_insn" + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] "") @@ -20052,7 +20229,7 @@ [(match_operand:SI 2 "memory_operand" "") (const_int 0)])) (match_scratch:SI 3 "r")] - " ! optimize_size && ix86_match_ccmode (insn, CCNOmode)" + "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" [(set (match_dup 3) (match_dup 2)) (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] "") @@ -20071,7 +20248,7 @@ (define_peephole2 [(set (match_operand:SI 0 "nonimmediate_operand" "") (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] - "!optimize_size + "optimize_insn_for_speed_p () && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], SImode))) @@ -20085,7 +20262,7 @@ (define_peephole2 [(set (match_operand:HI 0 "nonimmediate_operand" "") (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] - "!optimize_size + "optimize_insn_for_speed_p () && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], HImode))) @@ -20099,7 +20276,7 @@ (define_peephole2 [(set (match_operand:QI 0 "nonimmediate_operand" "") (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] - "!optimize_size + "optimize_insn_for_speed_p () && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], QImode))) @@ -20198,7 +20375,7 @@ [(match_dup 0) (match_operand:SI 1 "memory_operand" "")])) (clobber (reg:CC FLAGS_REG))])] - "! optimize_size && ! TARGET_READ_MODIFY" + "optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 2)])) @@ -20212,7 +20389,7 @@ [(match_operand:SI 1 "memory_operand" "") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] - "! optimize_size && ! TARGET_READ_MODIFY" + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 2) (match_dup 0)])) @@ -20232,7 +20409,7 @@ [(match_dup 0) (match_operand:SI 1 "nonmemory_operand" "")])) (clobber (reg:CC FLAGS_REG))])] - "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 2) (match_dup 1)])) @@ -20247,7 +20424,7 @@ [(match_operand:SI 1 "nonmemory_operand" "") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] - "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) @@ -20260,7 +20437,7 @@ [(set (match_operand 0 "register_operand" "") (match_operand 1 "const0_operand" ""))] "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD - && (! TARGET_USE_MOV0 || optimize_size) + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) && GENERAL_REG_P (operands[0]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) @@ -20274,7 +20451,7 @@ (const_int 0))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode) - && (! TARGET_USE_MOV0 || optimize_size) + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) (clobber (reg:CC FLAGS_REG))])]) @@ -20286,7 +20463,7 @@ "(GET_MODE (operands[0]) == HImode || GET_MODE (operands[0]) == SImode || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) - && (optimize_size || TARGET_MOVE_M1_VIA_OR) + && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) (clobber (reg:CC FLAGS_REG))])] @@ -20377,7 +20554,7 @@ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_4" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) (clobber (mem:BLK (scratch)))])]) @@ -20387,7 +20564,7 @@ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_8" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) @@ -20398,7 +20575,7 @@ [(match_scratch:SI 0 "r") (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) (clobber (reg:CC FLAGS_REG))])] - "optimize_size || !TARGET_SUB_ESP_4" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) @@ -20406,7 +20583,7 @@ [(match_scratch:SI 0 "r") (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) (clobber (reg:CC FLAGS_REG))])] - "optimize_size || !TARGET_SUB_ESP_8" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) @@ -20417,7 +20594,7 @@ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_4" + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (mem:BLK (scratch)))])] @@ -20431,7 +20608,7 @@ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_8" + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (mem:BLK (scratch)))]) @@ -20444,7 +20621,7 @@ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size" + "optimize_insn_for_size_p ()" [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) (clobber (mem:BLK (scratch)))]) @@ -20480,7 +20657,7 @@ [(match_scratch:SI 0 "r") (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) (clobber (reg:CC FLAGS_REG))])] - "optimize_size" + "optimize_insn_for_size_p ()" [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) @@ -20509,7 +20686,7 @@ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_4" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) (clobber (mem:BLK (scratch)))])]) @@ -20519,7 +20696,7 @@ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_SUB_ESP_8" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) @@ -20530,7 +20707,7 @@ [(match_scratch:DI 0 "r") (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) (clobber (reg:CC FLAGS_REG))])] - "optimize_size || !TARGET_SUB_ESP_4" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" [(clobber (match_dup 0)) (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) @@ -20538,7 +20715,7 @@ [(match_scratch:DI 0 "r") (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) (clobber (reg:CC FLAGS_REG))])] - "optimize_size || !TARGET_SUB_ESP_8" + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" [(clobber (match_dup 0)) (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) @@ -20549,7 +20726,7 @@ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_4" + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (mem:BLK (scratch)))])] @@ -20563,7 +20740,7 @@ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size || !TARGET_ADD_ESP_8" + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (mem:BLK (scratch)))]) @@ -20576,7 +20753,7 @@ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] - "optimize_size" + "optimize_insn_for_size_p ()" [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) (clobber (mem:BLK (scratch)))]) @@ -20612,7 +20789,7 @@ [(match_scratch:DI 0 "r") (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) (clobber (reg:CC FLAGS_REG))])] - "optimize_size" + "optimize_insn_for_size_p ()" [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) @@ -20640,7 +20817,7 @@ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") (match_operand:SI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "!optimize_size + "optimize_insn_for_speed_p () && (INTVAL (operands[2]) == 3 || INTVAL (operands[2]) == 5 || INTVAL (operands[2]) == 9)" @@ -20672,7 +20849,7 @@ (match_operand:DI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT - && !optimize_size + && optimize_insn_for_speed_p () && (INTVAL (operands[2]) == 3 || INTVAL (operands[2]) == 5 || INTVAL (operands[2]) == 9)" @@ -20690,7 +20867,7 @@ (mult:DI (match_operand:DI 1 "memory_operand" "") (match_operand:DI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) @@ -20703,7 +20880,7 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) @@ -20717,7 +20894,7 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" "")))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) @@ -20734,7 +20911,7 @@ (match_operand:DI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:DI 3 "r")] - "TARGET_SLOW_IMUL_IMM8 && !optimize_size + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) @@ -20750,7 +20927,7 @@ (match_operand:SI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:SI 3 "r")] - "TARGET_SLOW_IMUL_IMM8 && !optimize_size + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) @@ -20766,7 +20943,7 @@ (match_operand:HI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:HI 3 "r")] - "TARGET_SLOW_IMUL_IMM8 && !optimize_size" + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 75c94ba771e..fc59b77ac7b 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -18,24 +18,58 @@ ; along with GCC; see the file COPYING3. If not see ; <http://www.gnu.org/licenses/>. +;; Definitions to add to the cl_target_option structure +;; -march= processor +TargetSave +unsigned char arch + +;; -mtune= processor +TargetSave +unsigned char tune + +;; -mfpath= +TargetSave +unsigned char fpmath + +;; branch cost +TargetSave +unsigned char branch_cost + +;; which flags were passed by the user +TargetSave +int ix86_isa_flags_explicit + +;; which flags were passed by the user +TargetSave +int target_flags_explicit + +;; whether -mtune was not specified +TargetSave +unsigned char tune_defaulted + +;; whether -march was specified +TargetSave +unsigned char arch_specified + +;; x86 options m128bit-long-double -Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) +Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save sizeof(long double) is 16 m80387 -Target Report Mask(80387) +Target Report Mask(80387) Save Use hardware fp m96bit-long-double -Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) +Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) Save sizeof(long double) is 12 maccumulate-outgoing-args -Target Report Mask(ACCUMULATE_OUTGOING_ARGS) +Target Report Mask(ACCUMULATE_OUTGOING_ARGS) Save Reserve space for outgoing arguments in the function prologue malign-double -Target Report Mask(ALIGN_DOUBLE) +Target Report Mask(ALIGN_DOUBLE) Save Align some doubles on dword boundary malign-functions= @@ -51,7 +85,7 @@ Target RejectNegative Joined Var(ix86_align_loops_string) Loop code aligned to this power of 2 malign-stringops -Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) +Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) Save Align destination of the string operations march= @@ -75,11 +109,15 @@ Target RejectNegative Joined Var(ix86_cmodel_string) Use given x86-64 code model mfancy-math-387 -Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) +Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save Generate sin, cos, sqrt for FPU +mforce-drap +Target Report Var(ix86_force_drap) +Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack + mfp-ret-in-387 -Target Report Mask(FLOAT_RETURNS) +Target Report Mask(FLOAT_RETURNS) Save Return values of functions in FPU registers mfpmath= @@ -87,19 +125,19 @@ Target RejectNegative Joined Var(ix86_fpmath_string) Generate floating point mathematics using given instruction set mhard-float -Target RejectNegative Mask(80387) MaskExists +Target RejectNegative Mask(80387) MaskExists Save Use hardware fp mieee-fp -Target Report Mask(IEEE_FP) +Target Report Mask(IEEE_FP) Save Use IEEE math for fp comparisons minline-all-stringops -Target Report Mask(INLINE_ALL_STRINGOPS) +Target Report Mask(INLINE_ALL_STRINGOPS) Save Inline all known string operations minline-stringops-dynamically -Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY) +Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY) Save Inline memset/memcpy string operations, but perform inline version only for small blocks mintel-syntax @@ -107,23 +145,23 @@ Target Undocumented ;; Deprecated mms-bitfields -Target Report Mask(MS_BITFIELD_LAYOUT) +Target Report Mask(MS_BITFIELD_LAYOUT) Save Use native (MS) bitfield layout mno-align-stringops -Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented +Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented Save mno-fancy-math-387 -Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented +Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented Save mno-push-args -Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented +Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented Save mno-red-zone -Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented +Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented Save momit-leaf-frame-pointer -Target Report Mask(OMIT_LEAF_FRAME_POINTER) +Target Report Mask(OMIT_LEAF_FRAME_POINTER) Save Omit the frame pointer in leaf functions mpc @@ -134,12 +172,16 @@ mpreferred-stack-boundary= Target RejectNegative Joined Var(ix86_preferred_stack_boundary_string) Attempt to keep stack aligned to this power of 2 +mincoming-stack-boundary= +Target RejectNegative Joined Var(ix86_incoming_stack_boundary_string) +Assume incoming stack aligned to this power of 2 + mpush-args -Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) +Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) Save Use push instructions to save outgoing arguments mred-zone -Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) +Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) Save Use red-zone in the x86-64 code mregparm= @@ -147,23 +189,23 @@ Target RejectNegative Joined Var(ix86_regparm_string) Number of registers used to pass integer arguments mrtd -Target Report Mask(RTD) +Target Report Mask(RTD) Save Alternate calling convention msoft-float -Target InverseMask(80387) +Target InverseMask(80387) Save Do not use hardware fp msseregparm -Target RejectNegative Mask(SSEREGPARM) +Target RejectNegative Mask(SSEREGPARM) Save Use SSE register passing conventions for SF and DF mode mstackrealign -Target Report Var(ix86_force_align_arg_pointer) +Target Report Var(ix86_force_align_arg_pointer) Init(-1) Realign stack in prologue mstack-arg-probe -Target Report Mask(STACK_PROBE) +Target Report Mask(STACK_PROBE) Save Enable stack probing mstringop-strategy= @@ -186,104 +228,105 @@ mveclibabi= Target RejectNegative Joined Var(ix86_veclibabi_string) Vector library ABI to use +mrecip +Target Report Mask(RECIP) Save +Generate reciprocals instead of divss and sqrtss. + +mcld +Target Report Mask(CLD) Save +Generate cld instruction in the function prologue. + +mno-fused-madd +Target RejectNegative Report Mask(NO_FUSED_MADD) Undocumented Save + +mfused-madd +Target Report InverseMask(NO_FUSED_MADD, FUSED_MADD) Save +Enable automatic generation of fused floating point multiply-add instructions +if the ISA supports such instructions. The -mfused-madd option is on by +default. + ;; ISA support m32 -Target RejectNegative Negative(m64) Report InverseMask(ISA_64BIT) Var(ix86_isa_flags) VarExists +Target RejectNegative Negative(m64) Report InverseMask(ISA_64BIT) Var(ix86_isa_flags) VarExists Save Generate 32bit i386 code m64 -Target RejectNegative Negative(m32) Report Mask(ISA_64BIT) Var(ix86_isa_flags) VarExists +Target RejectNegative Negative(m32) Report Mask(ISA_64BIT) Var(ix86_isa_flags) VarExists Save Generate 64bit x86-64 code mmmx -Target Report Mask(ISA_MMX) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_MMX) Var(ix86_isa_flags) VarExists Save Support MMX built-in functions m3dnow -Target Report Mask(ISA_3DNOW) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_3DNOW) Var(ix86_isa_flags) VarExists Save Support 3DNow! built-in functions m3dnowa -Target Undocumented Mask(ISA_3DNOW_A) Var(ix86_isa_flags) VarExists +Target Undocumented Mask(ISA_3DNOW_A) Var(ix86_isa_flags) VarExists Save Support Athlon 3Dnow! built-in functions msse -Target Report Mask(ISA_SSE) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE) Var(ix86_isa_flags) VarExists Save Support MMX and SSE built-in functions and code generation msse2 -Target Report Mask(ISA_SSE2) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE2) Var(ix86_isa_flags) VarExists Save Support MMX, SSE and SSE2 built-in functions and code generation msse3 -Target Report Mask(ISA_SSE3) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE3) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation mssse3 -Target Report Mask(ISA_SSSE3) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSSE3) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation msse4.1 -Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation msse4.2 -Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation msse4 -Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists +Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation mno-sse4 -Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists +Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists Save Do not support SSE4.1 and SSE4.2 built-in functions and code generation msse4a -Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists Save Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation msse5 -Target Report Mask(ISA_SSE5) Var(ix86_isa_flags) VarExists +Target Report Mask(ISA_SSE5) Var(ix86_isa_flags) VarExists Save Support SSE5 built-in functions and code generation -;; Instruction support - -mcld -Target Report Mask(CLD) -Generate cld instruction in the function prologue. - mabm -Target Report RejectNegative Var(x86_abm) +Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save Support code generation of Advanced Bit Manipulation (ABM) instructions. -mcx16 -Target Report RejectNegative Var(x86_cmpxchg16b) -Support code generation of cmpxchg16b instruction. - mpopcnt -Target Report RejectNegative Var(x86_popcnt) +Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) VarExists Save Support code generation of popcnt instruction. +mcx16 +Target Report Mask(ISA_CX16) Var(ix86_isa_flags) VarExists Save +Support code generation of cmpxchg16b instruction. + msahf -Target Report RejectNegative Var(x86_sahf) +Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) VarExists Save Support code generation of sahf instruction in 64bit x86-64 code. -mrecip -Target Report RejectNegative Var(x86_recip) -Generate reciprocals instead of divss and sqrtss. - -mfused-madd -Target Report Var(x86_fused_muladd) Init(1) -Enable automatic generation of fused floating point multiply-add instructions -if the ISA supports such instructions. The -mfused-madd option is on by -default. - maes -Target Report RejectNegative Var(x86_aes) +Target Report Mask(ISA_AES) Var(ix86_isa_flags) VarExists Save Support AES built-in functions and code generation mpclmul -Target Report RejectNegative Var(x86_pclmul) +Target Report Mask(ISA_PCLMUL) Var(ix86_isa_flags) VarExists Save Support PCLMUL built-in functions and code generation diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 0a507e07a2f..8e77a30d353 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -65,9 +65,9 @@ (define_insn "*mov<mode>_internal_rex64" [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" - "=rm,r,!?y,!?y ,m ,!y,Y2,x,x ,m,r,x") + "=rm,r,!?y,!?y ,m ,!y,*Y2,x,x ,m,r,Yi") (match_operand:MMXMODEI8 1 "vector_move_operand" - "Cr ,m,C ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))] + "Cr ,m,C ,!?ym,!?y,*Y2,!y,C,xm,x,Yi,r"))] "TARGET_64BIT && TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -124,9 +124,9 @@ (define_insn "*movv2sf_internal_rex64" [(set (match_operand:V2SF 0 "nonimmediate_operand" - "=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x") + "=rm,r ,!?y,!?y ,m ,!y,*Y2,x,x,x,m,r,Yi") (match_operand:V2SF 1 "vector_move_operand" - "Cr ,m ,C ,!?ym,!y,Y2,!y,C,x,m,x,x,r"))] + "Cr ,m ,C ,!?ym,!y,*Y2,!y,C,x,m,x,Yi,r"))] "TARGET_64BIT && TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c1d306054ad..c8bf42d8193 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -36,6 +36,10 @@ (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) (define_mode_iterator SSEMODEF2P [V4SF V2DF]) +;; Int-float size matches +(define_mode_iterator SSEMODE4S [V4SF V4SI]) +(define_mode_iterator SSEMODE2D [V2DF V2DI]) + ;; Mapping from float mode to required SSE level (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) @@ -57,6 +61,10 @@ (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")]) +;; Mapping of vector modes to a vector mode of double size +(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI") + (V4SF "V8SF") (V4SI "V8SI")]) + ;; Number of scalar elements in each vector type (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") @@ -202,6 +210,18 @@ DONE; }) +(define_insn "sse2_movq128" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_concat:V2DI + (vec_select:DI + (match_operand:V2DI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (const_int 0)))] + "TARGET_SSE2" + "movq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + (define_insn "<sse>_movup<ssemodesuffixf2c>" [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m") (unspec:SSEMODEF2P @@ -2129,7 +2149,7 @@ "TARGET_SSE" { int mask = INTVAL (operands[3]); - emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], + emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2], GEN_INT ((mask >> 0) & 3), GEN_INT ((mask >> 2) & 3), GEN_INT (((mask >> 4) & 3) + 4), @@ -2137,12 +2157,12 @@ DONE; }) -(define_insn "sse_shufps_1" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_select:V4SF - (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) +(define_insn "sse_shufps_<mode>" + [(set (match_operand:SSEMODE4S 0 "register_operand" "=x") + (vec_select:SSEMODE4S + (vec_concat:<ssedoublesizemode> + (match_operand:SSEMODE4S 1 "register_operand" "0") + (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm")) (parallel [(match_operand 3 "const_0_to_3_operand" "") (match_operand 4 "const_0_to_3_operand" "") (match_operand 5 "const_4_to_7_operand" "") @@ -2540,18 +2560,62 @@ "TARGET_SSE2" { int mask = INTVAL (operands[3]); - emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2], + emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2], GEN_INT (mask & 1), GEN_INT (mask & 2 ? 3 : 2))); DONE; }) -(define_insn "sse2_shufpd_1" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_select:V2DF - (vec_concat:V4DF - (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "nonimmediate_operand" "xm")) +(define_expand "vec_extract_even<mode>" + [(set (match_operand:SSEMODE4S 0 "register_operand" "") + (vec_select:SSEMODE4S + (vec_concat:<ssedoublesizemode> + (match_operand:SSEMODE4S 1 "register_operand" "") + (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))] + "TARGET_SSE") + +(define_expand "vec_extract_odd<mode>" + [(set (match_operand:SSEMODE4S 0 "register_operand" "") + (vec_select:SSEMODE4S + (vec_concat:<ssedoublesizemode> + (match_operand:SSEMODE4S 1 "register_operand" "") + (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))] + "TARGET_SSE") + +(define_expand "vec_extract_even<mode>" + [(set (match_operand:SSEMODE2D 0 "register_operand" "") + (vec_select:SSEMODE2D + (vec_concat:<ssedoublesizemode> + (match_operand:SSEMODE2D 1 "register_operand" "") + (match_operand:SSEMODE2D 2 "nonimmediate_operand" "")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2") + +(define_expand "vec_extract_odd<mode>" + [(set (match_operand:SSEMODE2D 0 "register_operand" "") + (vec_select:SSEMODE2D + (vec_concat:<ssedoublesizemode> + (match_operand:SSEMODE2D 1 "register_operand" "") + (match_operand:SSEMODE2D 2 "nonimmediate_operand" "")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE2") + +(define_insn "sse2_shufpd_<mode>" + [(set (match_operand:SSEMODE2D 0 "register_operand" "=x") + (vec_select:SSEMODE2D + (vec_concat:<ssedoublesizemode> + (match_operand:SSEMODE2D 1 "register_operand" "0") + (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm")) (parallel [(match_operand 3 "const_0_to_1_operand" "") (match_operand 4 "const_2_to_3_operand" "")])))] "TARGET_SSE2" @@ -4195,6 +4259,46 @@ DONE; }) +(define_expand "vec_interleave_highv4sf" + [(set (match_operand:V4SF 0 "register_operand" "") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_SSE") + +(define_expand "vec_interleave_lowv4sf" + [(set (match_operand:V4SF 0 "register_operand" "") + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "nonimmediate_operand" "")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_SSE") + +(define_expand "vec_interleave_highv2df" + [(set (match_operand:V2DF 0 "register_operand" "") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")) + (parallel [(const_int 1) + (const_int 3)])))] + "TARGET_SSE2") + +(define_expand "vec_interleave_lowv2df" + [(set (match_operand:V2DF 0 "register_operand" "") + (vec_select:V2DF + (vec_concat:V4DF + (match_operand:V2DF 1 "register_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")) + (parallel [(const_int 0) + (const_int 2)])))] + "TARGET_SSE2") + (define_insn "sse2_packsswb" [(set (match_operand:V16QI 0 "register_operand" "=x") (vec_concat:V16QI @@ -4685,7 +4789,7 @@ "") (define_insn "*sse2_storeq_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r") (vec_select:DI (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o") (parallel [(const_int 0)])))] @@ -4848,26 +4952,25 @@ (set_attr "mode" "TI,V4SF,V2SF")]) (define_insn "vec_concatv2di" - [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x") + [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x") (vec_concat:V2DI - (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m") - (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))] + (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0") + (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))] "!TARGET_64BIT && TARGET_SSE" "@ movq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1} punpcklqdq\t{%2, %0|%0, %2} movlhps\t{%2, %0|%0, %2} - movhps\t{%2, %0|%0, %2} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") - (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "mode" "TI,TI,TI,V4SF,V2SF")]) (define_insn "*vec_concatv2di_rex64_sse4_1" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x,x") + [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x") (vec_concat:V2DI - (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m") - (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,x,m,0")))] + (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0") + (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))] "TARGET_64BIT && TARGET_SSE4_1" "@ pinsrq\t{$0x1, %2, %0|%0, %2, 0x1} @@ -4876,17 +4979,16 @@ movq2dq\t{%1, %0|%0, %1} punpcklqdq\t{%2, %0|%0, %2} movlhps\t{%2, %0|%0, %2} - movhps\t{%2, %0|%0, %2} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov") - (set_attr "prefix_extra" "1,*,*,*,*,*,*,*") - (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "prefix_extra" "1,*,*,*,*,*,*") + (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")]) (define_insn "*vec_concatv2di_rex64_sse" - [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x") + [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x") (vec_concat:V2DI - (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m") - (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))] + (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0") + (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))] "TARGET_64BIT && TARGET_SSE" "@ movq\t{%1, %0|%0, %1} @@ -4894,10 +4996,9 @@ movq2dq\t{%1, %0|%0, %1} punpcklqdq\t{%2, %0|%0, %2} movlhps\t{%2, %0|%0, %2} - movhps\t{%2, %0|%0, %2} - movlps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov") - (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")]) + movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") + (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")]) (define_expand "vec_unpacku_hi_v16qi" [(match_operand:V8HI 0 "register_operand" "") diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 new file mode 100644 index 00000000000..4c0c046dae6 --- /dev/null +++ b/gcc/config/i386/t-i386 @@ -0,0 +1,13 @@ +i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h \ + $(REAL_H) insn-config.h conditions.h output.h insn-codes.h \ + $(INSN_ATTR_H) $(FLAGS_H) $(C_COMMON_H) except.h $(FUNCTION_H) \ + $(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \ + $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \ + $(TREE_GIMPLE_H) dwarf2.h $(DF_H) tm-constrs.h $(PARAMS_H) + +i386-c.o: $(srcdir)/config/i386/i386-c.c \ + $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(TM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(FLAGS_H) $(C_COMMON_H) $(GGC_H) \ + $(TARGET_H) $(TARGET_DEF_H) $(CPPLIB_H) $(C_PRAGMA_H) + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/i386-c.c diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c index 7fdd89471a8..8ef79058a7d 100644 --- a/gcc/config/i386/winnt.c +++ b/gcc/config/i386/winnt.c @@ -531,16 +531,16 @@ i386_pe_asm_output_aligned_decl_common (FILE *stream, tree decl, /* Mark a function appropriately. This should only be called for functions for which we are not emitting COFF debugging information. FILE is the assembler output file, NAME is the name of the - function, and PUBLIC is nonzero if the function is globally + function, and PUB is nonzero if the function is globally visible. */ void -i386_pe_declare_function_type (FILE *file, const char *name, int public) +i386_pe_declare_function_type (FILE *file, const char *name, int pub) { fprintf (file, "\t.def\t"); assemble_name (file, name); fprintf (file, ";\t.scl\t%d;\t.type\t%d;\t.endef\n", - public ? (int) C_EXT : (int) C_STAT, + pub ? (int) C_EXT : (int) C_STAT, (int) DT_FCN << N_BTSHFT); } |