diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 17:20:51 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 17:20:51 +0000 |
commit | 2d9d01985a7a7866916fafa19c5c296702e69714 (patch) | |
tree | 259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config | |
parent | c8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff) | |
download | gcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz |
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{merging with even more of GCC 6, using subversion 1.9
svn merge -r227001:227400 ^/trunk ;
there is some gengtype issue before svn r228000... }}
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233281 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
70 files changed, 2524 insertions, 1038 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 0f4f2b97022..e3a90b5e4dd 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -886,30 +886,6 @@ typedef enum SIMD_ARG_STOP } builtin_simd_arg; -/* Relayout the decl of a function arg. Keep the RTL component the same, - as varasm.c ICEs. It doesn't like reinitializing the RTL - on PARM decls. Something like this needs to be done when compiling a - file without SIMD and then tagging a function with +simd and using SIMD - intrinsics in there. The types will have been laid out assuming no SIMD, - so we want to re-lay them out. */ - -static void -aarch64_relayout_simd_param (tree arg) -{ - tree argdecl = arg; - if (TREE_CODE (argdecl) == SSA_NAME) - argdecl = SSA_NAME_VAR (argdecl); - - if (argdecl - && (TREE_CODE (argdecl) == PARM_DECL - || TREE_CODE (argdecl) == VAR_DECL)) - { - rtx rtl = NULL_RTX; - rtl = DECL_RTL_IF_SET (argdecl); - relayout_decl (argdecl); - SET_DECL_RTL (argdecl, rtl); - } -} static rtx aarch64_simd_expand_args (rtx target, int icode, int have_retval, @@ -940,7 +916,6 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, { tree arg = CALL_EXPR_ARG (exp, opc - have_retval); enum machine_mode mode = insn_data[icode].operand[opc].mode; - aarch64_relayout_simd_param (arg); op[opc] = expand_normal (arg); switch (thisarg) diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def index a7b00f6975d..53bbef46eb2 100644 --- a/gcc/config/aarch64/aarch64-fusion-pairs.def +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def @@ -20,19 +20,17 @@ /* Pairs of instructions which can be fused. before including this file, define a macro: - AARCH64_FUSION_PAIR (name, internal_name, index_bit) + AARCH64_FUSION_PAIR (name, internal_name) Where: NAME is a string giving a friendly name for the instructions to fuse. INTERNAL_NAME gives the internal name suitable for appending to - AARCH64_FUSE_ to give an enum name. - INDEX_BIT is the bit to set in the bitmask of supported fusion - operations. */ - -AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK, 0) -AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD, 1) -AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK, 2) -AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR, 3) -AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH, 4) + AARCH64_FUSE_ to give an enum name. */ + +AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK) +AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD) +AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK) +AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR) +AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 1762cc8d58f..b261a0f7c3c 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -34,11 +34,6 @@ should contain a whitespace-separated list of the strings in 'Features' that are required. Their order is not important. */ -/* V8 Architecture Extensions. - This list currently contains example extensions for CPUs that implement - AArch64, and therefore serves as a template for adding more CPUs in the - future. */ - AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp") AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "asimd") AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2") diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 0b09d49f670..ff1985137b3 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -73,8 +73,12 @@ enum aarch64_symbol_context SYMBOL_SMALL_TLSGD SYMBOL_SMALL_TLSDESC - SYMBOL_SMALL_GOTTPREL - SYMBOL_TLSLE + SYMBOL_SMALL_TLSIE + SYMBOL_TINY_TLSIE + SYMBOL_TLSLE12 + SYMBOL_TLSLE24 + SYMBOL_TLSLE32 + SYMBOL_TLSLE48 Each of these represents a thread-local symbol, and corresponds to the thread local storage relocation operator for the symbol being referred to. @@ -108,10 +112,14 @@ enum aarch64_symbol_type SYMBOL_SMALL_GOT_4G, SYMBOL_SMALL_TLSGD, SYMBOL_SMALL_TLSDESC, - SYMBOL_SMALL_GOTTPREL, + SYMBOL_SMALL_TLSIE, SYMBOL_TINY_ABSOLUTE, SYMBOL_TINY_GOT, - SYMBOL_TLSLE, + SYMBOL_TINY_TLSIE, + SYMBOL_TLSLE12, + SYMBOL_TLSLE24, + SYMBOL_TLSLE32, + SYMBOL_TLSLE48, SYMBOL_FORCE_TO_MEM }; @@ -201,41 +209,46 @@ struct tune_params unsigned int extra_tuning_flags; }; -#define AARCH64_FUSION_PAIR(x, name, index) \ - AARCH64_FUSE_##name = (1 << index), +#define AARCH64_FUSION_PAIR(x, name) \ + AARCH64_FUSE_##name##_index, /* Supported fusion operations. */ -enum aarch64_fusion_pairs +enum aarch64_fusion_pairs_index { - AARCH64_FUSE_NOTHING = 0, #include "aarch64-fusion-pairs.def" - -/* Hacky macro to build AARCH64_FUSE_ALL. The sequence below expands - to: - AARCH64_FUSE_ALL = 0 | AARCH64_FUSE_index1 | AARCH64_FUSE_index2 ... */ + AARCH64_FUSE_index_END +}; #undef AARCH64_FUSION_PAIR -#define AARCH64_FUSION_PAIR(x, name, y) \ - | AARCH64_FUSE_##name - AARCH64_FUSE_ALL = 0 +#define AARCH64_FUSION_PAIR(x, name) \ + AARCH64_FUSE_##name = (1u << AARCH64_FUSE_##name##_index), +/* Supported fusion operations. */ +enum aarch64_fusion_pairs +{ + AARCH64_FUSE_NOTHING = 0, #include "aarch64-fusion-pairs.def" + AARCH64_FUSE_ALL = (1u << AARCH64_FUSE_index_END) - 1 }; #undef AARCH64_FUSION_PAIR -#define AARCH64_EXTRA_TUNING_OPTION(x, name, index) \ - AARCH64_EXTRA_TUNE_##name = (1 << index), +#define AARCH64_EXTRA_TUNING_OPTION(x, name) \ + AARCH64_EXTRA_TUNE_##name##_index, +/* Supported tuning flags indexes. */ +enum aarch64_extra_tuning_flags_index +{ +#include "aarch64-tuning-flags.def" + AARCH64_EXTRA_TUNE_index_END +}; +#undef AARCH64_EXTRA_TUNING_OPTION + + +#define AARCH64_EXTRA_TUNING_OPTION(x, name) \ + AARCH64_EXTRA_TUNE_##name = (1u << AARCH64_EXTRA_TUNE_##name##_index), /* Supported tuning flags. */ enum aarch64_extra_tuning_flags { AARCH64_EXTRA_TUNE_NONE = 0, #include "aarch64-tuning-flags.def" - -/* Hacky macro to build the "all" flag mask. - Expands to 0 | AARCH64_TUNE_index0 | AARCH64_TUNE_index1 , etc. */ -#undef AARCH64_EXTRA_TUNING_OPTION -#define AARCH64_EXTRA_TUNING_OPTION(x, name, y) \ - | AARCH64_EXTRA_TUNE_##name - AARCH64_EXTRA_TUNE_ALL = 0 -#include "aarch64-tuning-flags.def" + AARCH64_EXTRA_TUNE_ALL = (1u << AARCH64_EXTRA_TUNE_index_END) - 1 }; #undef AARCH64_EXTRA_TUNING_OPTION @@ -310,12 +323,14 @@ rtx aarch64_simd_gen_const_vector_dup (machine_mode, int); bool aarch64_simd_mem_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool); rtx aarch64_tls_get_addr (void); +std::string aarch64_get_extension_string_for_isa_flags (unsigned long); tree aarch64_fold_builtin (tree, int, tree *, bool); unsigned aarch64_dbx_register_number (unsigned); unsigned aarch64_trampoline_size (void); void aarch64_asm_output_labelref (FILE *, const char *); void aarch64_cpu_cpp_builtins (cpp_reader *); void aarch64_elf_asm_named_section (const char *, unsigned, tree); +const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *); void aarch64_err_no_fpadvsimd (machine_mode, const char *); void aarch64_expand_epilogue (bool); void aarch64_expand_mov_immediate (rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index 01aaca83594..628386b5a1d 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -20,15 +20,13 @@ /* Additional control over certain tuning parameters. Before including this file, define a macro: - AARCH64_EXTRA_TUNING_OPTION (name, internal_name, index_bit) + AARCH64_EXTRA_TUNING_OPTION (name, internal_name) Where: NAME is a string giving a friendly name for the tuning flag. INTERNAL_NAME gives the internal name suitable for appending to - AARCH64_TUNE_ to give an enum name. - INDEX_BIT is the bit to set in the bitmask of supported tuning - flags. */ + AARCH64_TUNE_ to give an enum name. */ -AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS, 0) +AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index aa268aeff4d..bc612e47d4f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -150,7 +150,6 @@ static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); static bool aarch64_vector_mode_supported_p (machine_mode); -static unsigned bit_count (unsigned HOST_WIDE_INT); static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, const unsigned char *sel); static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); @@ -172,7 +171,7 @@ struct aarch64_flag_desc unsigned int flag; }; -#define AARCH64_FUSION_PAIR(name, internal_name, y) \ +#define AARCH64_FUSION_PAIR(name, internal_name) \ { name, AARCH64_FUSE_##internal_name }, static const struct aarch64_flag_desc aarch64_fusible_pairs[] = { @@ -183,7 +182,7 @@ static const struct aarch64_flag_desc aarch64_fusible_pairs[] = }; #undef AARCH64_FUION_PAIR -#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \ +#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \ { name, AARCH64_EXTRA_TUNE_##internal_name }, static const struct aarch64_flag_desc aarch64_tuning_flags[] = { @@ -587,6 +586,29 @@ static const char * const aarch64_condition_codes[] = "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" }; +/* Generate code to enable conditional branches in functions over 1 MiB. */ +const char * +aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, + const char * branch_format) +{ + rtx_code_label * tmp_label = gen_label_rtx (); + char label_buf[256]; + char buffer[128]; + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest, + CODE_LABEL_NUMBER (tmp_label)); + const char *label_ptr = targetm.strip_name_encoding (label_buf); + rtx dest_label = operands[pos_label]; + operands[pos_label] = tmp_label; + + snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr); + output_asm_insn (buffer, operands); + + snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr); + operands[pos_label] = dest_label; + output_asm_insn (buffer, operands); + return ""; +} + void aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg) { @@ -931,7 +953,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, The generate instruction sequence for accessing global variable is: - ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym] + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym] Only one instruction needed. But we must initialize pic_offset_table_rtx properly. We generate initialize insn for @@ -940,12 +962,12 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, The final instruction sequences will look like the following for multiply global variables access. - adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_ + adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_ - ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1] - ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2] - ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3] - ... */ + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1] + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2] + ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3] + ... */ rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); crtl->uses_pic_offset_table = 1; @@ -1081,7 +1103,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, return; } - case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_TLSIE: { /* In ILP32, the mode of dest can be either SImode or DImode, while the got entry is always of SImode size. The mode of @@ -1115,14 +1137,43 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, return; } - case SYMBOL_TLSLE: + case SYMBOL_TLSLE12: + case SYMBOL_TLSLE24: + case SYMBOL_TLSLE32: + case SYMBOL_TLSLE48: { + machine_mode mode = GET_MODE (dest); rtx tp = aarch64_load_tp (NULL); - if (GET_MODE (dest) != Pmode) - tp = gen_lowpart (GET_MODE (dest), tp); + if (mode != Pmode) + tp = gen_lowpart (mode, tp); + + switch (type) + { + case SYMBOL_TLSLE12: + emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si) + (dest, tp, imm)); + break; + case SYMBOL_TLSLE24: + emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si) + (dest, tp, imm)); + break; + case SYMBOL_TLSLE32: + emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si) + (dest, imm)); + emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3) + (dest, dest, tp)); + break; + case SYMBOL_TLSLE48: + emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si) + (dest, imm)); + emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3) + (dest, dest, tp)); + break; + default: + gcc_unreachable (); + } - emit_insn (gen_tlsle (dest, tp, imm)); set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); return; } @@ -1131,6 +1182,31 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, emit_insn (gen_ldr_got_tiny (dest, imm)); return; + case SYMBOL_TINY_TLSIE: + { + machine_mode mode = GET_MODE (dest); + rtx tp = aarch64_load_tp (NULL); + + if (mode == ptr_mode) + { + if (mode == DImode) + emit_insn (gen_tlsie_tiny_di (dest, imm, tp)); + else + { + tp = gen_lowpart (mode, tp); + emit_insn (gen_tlsie_tiny_si (dest, imm, tp)); + } + } + else + { + gcc_assert (mode == Pmode); + emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp)); + } + + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } + default: gcc_unreachable (); } @@ -1661,10 +1737,11 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) case SYMBOL_SMALL_TLSGD: case SYMBOL_SMALL_TLSDESC: - case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_TLSIE: case SYMBOL_SMALL_GOT_28K: case SYMBOL_SMALL_GOT_4G: case SYMBOL_TINY_GOT: + case SYMBOL_TINY_TLSIE: if (offset != const0_rtx) { gcc_assert(can_create_pseudo_p ()); @@ -1677,7 +1754,10 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) case SYMBOL_SMALL_ABSOLUTE: case SYMBOL_TINY_ABSOLUTE: - case SYMBOL_TLSLE: + case SYMBOL_TLSLE12: + case SYMBOL_TLSLE24: + case SYMBOL_TLSLE32: + case SYMBOL_TLSLE48: aarch64_load_symref_appropriately (dest, imm, sty); return; @@ -4163,19 +4243,6 @@ aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val) return aarch64_const_vec_all_same_in_range_p (x, val, val); } -static unsigned -bit_count (unsigned HOST_WIDE_INT value) -{ - unsigned count = 0; - - while (value) - { - count++; - value &= value - 1; - } - - return count; -} /* N Z C V. */ #define AARCH64_CC_V 1 @@ -4330,7 +4397,7 @@ aarch64_print_operand (FILE *f, rtx x, char code) return; } - asm_fprintf (f, "%u", bit_count (INTVAL (x))); + asm_fprintf (f, "%u", popcount_hwi (INTVAL (x))); break; case 'H': @@ -4556,11 +4623,11 @@ aarch64_print_operand (FILE *f, rtx x, char code) asm_fprintf (asm_out_file, ":tlsdesc:"); break; - case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_TLSIE: asm_fprintf (asm_out_file, ":gottprel:"); break; - case SYMBOL_TLSLE: + case SYMBOL_TLSLE24: asm_fprintf (asm_out_file, ":tprel:"); break; @@ -4589,11 +4656,15 @@ aarch64_print_operand (FILE *f, rtx x, char code) asm_fprintf (asm_out_file, ":tlsdesc_lo12:"); break; - case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_TLSIE: asm_fprintf (asm_out_file, ":gottprel_lo12:"); break; - case SYMBOL_TLSLE: + case SYMBOL_TLSLE12: + asm_fprintf (asm_out_file, ":tprel_lo12:"); + break; + + case SYMBOL_TLSLE24: asm_fprintf (asm_out_file, ":tprel_lo12_nc:"); break; @@ -4601,6 +4672,10 @@ aarch64_print_operand (FILE *f, rtx x, char code) asm_fprintf (asm_out_file, ":got:"); break; + case SYMBOL_TINY_TLSIE: + asm_fprintf (asm_out_file, ":gottprel:"); + break; + default: break; } @@ -4611,7 +4686,7 @@ aarch64_print_operand (FILE *f, rtx x, char code) switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) { - case SYMBOL_TLSLE: + case SYMBOL_TLSLE24: asm_fprintf (asm_out_file, ":tprel_hi12:"); break; default: @@ -7506,6 +7581,40 @@ aarch64_parse_one_override_token (const char* token, return; } +/* A checking mechanism for the implementation of the tls size. */ + +static void +initialize_aarch64_tls_size (struct gcc_options *opts) +{ + if (aarch64_tls_size == 0) + aarch64_tls_size = 24; + + switch (opts->x_aarch64_cmodel_var) + { + case AARCH64_CMODEL_TINY: + /* Both the default and maximum TLS size allowed under tiny is 1M which + needs two instructions to address, so we clamp the size to 24. */ + if (aarch64_tls_size > 24) + aarch64_tls_size = 24; + break; + case AARCH64_CMODEL_SMALL: + /* The maximum TLS size allowed under small is 4G. */ + if (aarch64_tls_size > 32) + aarch64_tls_size = 32; + break; + case AARCH64_CMODEL_LARGE: + /* The maximum TLS size allowed under large is 16E. + FIXME: 16E should be 64bit, we only support 48bit offset now. */ + if (aarch64_tls_size > 48) + aarch64_tls_size = 48; + break; + default: + gcc_unreachable (); + } + + return; +} + /* Parse STRING looking for options in the format: string :: option:string option :: name=substring @@ -7598,6 +7707,7 @@ aarch64_override_options_internal (struct gcc_options *opts) } initialize_aarch64_code_model (opts); + initialize_aarch64_tls_size (opts); aarch64_override_options_after_change_1 (opts); } @@ -7904,20 +8014,6 @@ initialize_aarch64_code_model (struct gcc_options *opts) aarch64_cmodel = opts->x_aarch64_cmodel_var; } -/* Print to F the architecture features specified by ISA_FLAGS. */ - -static void -aarch64_print_extension (FILE *f, unsigned long isa_flags) -{ - const struct aarch64_option_extension *opt = NULL; - - for (opt = all_extensions; opt->name != NULL; opt++) - if ((isa_flags & opt->flags_on) == opt->flags_on) - asm_fprintf (f, "+%s", opt->name); - - asm_fprintf (f, "\n"); -} - /* Implement TARGET_OPTION_SAVE. */ static void @@ -7950,10 +8046,12 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); unsigned long isa_flags = ptr->x_aarch64_isa_flags; const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch); + std::string extension + = aarch64_get_extension_string_for_isa_flags (isa_flags); fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name); - fprintf (file, "%*sselected arch = %s", indent, "", arch->name); - aarch64_print_extension (file, isa_flags); + fprintf (file, "%*sselected arch = %s%s\n", indent, "", + arch->name, extension.c_str ()); } static GTY(()) tree aarch64_previous_fndecl; @@ -8013,6 +8111,23 @@ aarch64_set_current_function (tree fndecl) = save_target_globals_default_opts (); } } + + if (!fndecl) + return; + + /* If we turned on SIMD make sure that any vector parameters are re-laid out + so that they use proper vector modes. */ + if (TARGET_SIMD) + { + tree parms = DECL_ARGUMENTS (fndecl); + for (; parms && parms != void_list_node; parms = TREE_CHAIN (parms)) + { + if (TREE_CODE (parms) == PARM_DECL + && VECTOR_TYPE_P (TREE_TYPE (parms)) + && DECL_MODE (parms) != TYPE_MODE (TREE_TYPE (parms))) + relayout_decl (parms); + } + } } /* Enum describing the various ways we can handle attributes. @@ -8683,10 +8798,26 @@ aarch64_classify_tls_symbol (rtx x) return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD; case TLS_MODEL_INITIAL_EXEC: - return SYMBOL_SMALL_GOTTPREL; + switch (aarch64_cmodel) + { + case AARCH64_CMODEL_TINY: + case AARCH64_CMODEL_TINY_PIC: + return SYMBOL_TINY_TLSIE; + default: + return SYMBOL_SMALL_TLSIE; + } case TLS_MODEL_LOCAL_EXEC: - return SYMBOL_TLSLE; + if (aarch64_tls_size == 12) + return SYMBOL_TLSLE12; + else if (aarch64_tls_size == 24) + return SYMBOL_TLSLE24; + else if (aarch64_tls_size == 32) + return SYMBOL_TLSLE32; + else if (aarch64_tls_size == 48) + return SYMBOL_TLSLE48; + else + gcc_unreachable (); case TLS_MODEL_EMULATED: case TLS_MODEL_NONE: @@ -9893,31 +10024,10 @@ sizetochar (int size) static bool aarch64_vect_float_const_representable_p (rtx x) { - int i = 0; - REAL_VALUE_TYPE r0, ri; - rtx x0, xi; - - if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT) - return false; - - x0 = CONST_VECTOR_ELT (x, 0); - if (!CONST_DOUBLE_P (x0)) - return false; - - REAL_VALUE_FROM_CONST_DOUBLE (r0, x0); - - for (i = 1; i < CONST_VECTOR_NUNITS (x); i++) - { - xi = CONST_VECTOR_ELT (x, i); - if (!CONST_DOUBLE_P (xi)) - return false; - - REAL_VALUE_FROM_CONST_DOUBLE (ri, xi); - if (!REAL_VALUES_EQUAL (r0, ri)) - return false; - } - - return aarch64_float_const_representable_p (x0); + rtx elt; + return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT + && const_vec_duplicate_p (x, &elt) + && aarch64_float_const_representable_p (elt)); } /* Return true for valid and false for invalid. */ @@ -10380,28 +10490,15 @@ aarch64_simd_dup_constant (rtx vals) { machine_mode mode = GET_MODE (vals); machine_mode inner_mode = GET_MODE_INNER (mode); - int n_elts = GET_MODE_NUNITS (mode); - bool all_same = true; rtx x; - int i; - - if (GET_CODE (vals) != CONST_VECTOR) - return NULL_RTX; - - for (i = 1; i < n_elts; ++i) - { - x = CONST_VECTOR_ELT (vals, i); - if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0))) - all_same = false; - } - if (!all_same) + if (!const_vec_duplicate_p (vals, &x)) return NULL_RTX; /* We can load this constant by using DUP and a constant in a single ARM register. This will be cheaper than a vector load. */ - x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0)); + x = copy_to_mode_reg (inner_mode, x); return gen_rtx_VEC_DUPLICATE (mode, x); } @@ -10677,8 +10774,11 @@ aarch64_declare_function_name (FILE *stream, const char* name, const struct processor *this_arch = aarch64_get_arch (targ_options->x_explicit_arch); - asm_fprintf (asm_out_file, "\t.arch %s", this_arch->name); - aarch64_print_extension (asm_out_file, targ_options->x_aarch64_isa_flags); + unsigned long isa_flags = targ_options->x_aarch64_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (isa_flags); + asm_fprintf (asm_out_file, "\t.arch %s%s\n", + this_arch->name, extension.c_str ()); /* Print the cpu name we're tuning for in the comments, might be useful to readers of the generated asm. */ diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 1be78fc16dd..1e5f5dbd4fa 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -887,18 +887,18 @@ extern enum aarch64_code_model aarch64_cmodel; {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \ {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, -#define BIG_LITTLE_SPEC \ - " %{mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})}" +#define MCPU_TO_MARCH_SPEC \ + " %{mcpu=*:-march=%:rewrite_mcpu(%{mcpu=*:%*})}" extern const char *aarch64_rewrite_mcpu (int argc, const char **argv); -#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \ +#define MCPU_TO_MARCH_SPEC_FUNCTIONS \ { "rewrite_mcpu", aarch64_rewrite_mcpu }, #if defined(__aarch64__) extern const char *host_detect_local_cpu (int argc, const char **argv); # define EXTRA_SPEC_FUNCTIONS \ { "local_cpu_detect", host_detect_local_cpu }, \ - BIG_LITTLE_CPU_SPEC_FUNCTIONS + MCPU_TO_MARCH_SPEC_FUNCTIONS # define MCPU_MTUNE_NATIVE_SPECS \ " %{march=native:%<march=native %:local_cpu_detect(arch)}" \ @@ -906,11 +906,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}" #else # define MCPU_MTUNE_NATIVE_SPECS "" -# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS +# define EXTRA_SPEC_FUNCTIONS MCPU_TO_MARCH_SPEC_FUNCTIONS #endif #define ASM_CPU_SPEC \ - BIG_LITTLE_SPEC + MCPU_TO_MARCH_SPEC #define EXTRA_SPECS \ { "asm_cpu_spec", ASM_CPU_SPEC } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 35255e91a95..25229824fb5 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -90,6 +90,7 @@ UNSPEC_GOTSMALLPIC28K UNSPEC_GOTSMALLTLS UNSPEC_GOTTINYPIC + UNSPEC_GOTTINYTLS UNSPEC_LD1 UNSPEC_LD2 UNSPEC_LD2_DUP @@ -117,7 +118,10 @@ UNSPEC_ST4_LANE UNSPEC_TLS UNSPEC_TLSDESC - UNSPEC_TLSLE + UNSPEC_TLSLE12 + UNSPEC_TLSLE24 + UNSPEC_TLSLE32 + UNSPEC_TLSLE48 UNSPEC_USHL_2S UNSPEC_VSTRUCTDUMMY UNSPEC_SP_SET @@ -181,6 +185,13 @@ (const_string "no") ] (const_string "yes"))) +;; Attribute that specifies whether we are dealing with a branch to a +;; label that is far away, i.e. further away than the maximum/minimum +;; representable in a signed 21-bits number. +;; 0 :=: no +;; 1 :=: yes +(define_attr "far_branch" "" (const_int 0)) + ;; ------------------------------------------------------------------- ;; Pipeline descriptions and scheduling ;; ------------------------------------------------------------------- @@ -308,8 +319,23 @@ (label_ref (match_operand 2 "" "")) (pc)))] "" - "b%m0\\t%l2" - [(set_attr "type" "branch")] + { + if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t"); + else + return "b%m0\\t%l2"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 0) + (const_int 1)))] ) (define_expand "casesi" @@ -488,9 +514,23 @@ (label_ref (match_operand 1 "" "")) (pc)))] "" - "<cbz>\\t%<w>0, %l1" - [(set_attr "type" "branch")] - + { + if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, "); + else + return "<cbz>\\t%<w>0, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) + (lt (minus (match_dup 1) (pc)) (const_int 1048572))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 0) + (const_int 1)))] ) (define_insn "*tb<optab><mode>1" @@ -506,8 +546,14 @@ { if (get_attr_length (insn) == 8) { - operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1])); - return "tst\t%<w>0, %1\;<bcond>\t%l2"; + if (get_attr_far_branch (insn) == 1) + return aarch64_gen_far_branch (operands, 2, "Ltb", + "<inv_tb>\\t%<w>0, %1, "); + else + { + operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1])); + return "tst\t%<w>0, %1\;<bcond>\t%l2"; + } } else return "<tbz>\t%<w>0, %1, %l2"; @@ -517,7 +563,13 @@ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) (lt (minus (match_dup 2) (pc)) (const_int 32764))) (const_int 4) - (const_int 8)))] + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) + (lt (minus (match_dup 2) (pc)) (const_int 1048572))) + (const_int 0) + (const_int 1)))] + ) (define_insn "*cb<optab><mode>1" @@ -530,12 +582,18 @@ { if (get_attr_length (insn) == 8) { - char buf[64]; - uint64_t val = ((uint64_t ) 1) - << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); - sprintf (buf, "tst\t%%<w>0, %" PRId64, val); - output_asm_insn (buf, operands); - return "<bcond>\t%l1"; + if (get_attr_far_branch (insn) == 1) + return aarch64_gen_far_branch (operands, 1, "Ltb", + "<inv_tb>\\t%<w>0, <sizem1>, "); + else + { + char buf[64]; + uint64_t val = ((uint64_t) 1) + << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); + sprintf (buf, "tst\t%%<w>0, %" PRId64, val); + output_asm_insn (buf, operands); + return "<bcond>\t%l1"; + } } else return "<tbz>\t%<w>0, <sizem1>, %l1"; @@ -545,7 +603,12 @@ (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) (lt (minus (match_dup 1) (pc)) (const_int 32764))) (const_int 4) - (const_int 8)))] + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) + (lt (minus (match_dup 1) (pc)) (const_int 1048572))) + (const_int 0) + (const_int 1)))] ) ;; ------------------------------------------------------------------- @@ -768,7 +831,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, NULL)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -3923,6 +3986,16 @@ [(set_attr "type" "bfm")] ) +(define_insn "*aarch64_bfi<GPI:mode><ALLX:mode>4" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") + (match_operand 1 "const_int_operand" "n") + (match_operand 2 "const_int_operand" "n")) + (zero_extend:GPI (match_operand:ALLX 3 "register_operand" "r")))] + "UINTVAL (operands[1]) <= <ALLX:sizen>" + "bfi\\t%<GPI:w>0, %<GPI:w>3, %2, %1" + [(set_attr "type" "bfm")] +) + (define_insn "*extr_insv_lower_reg<mode>" [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (match_operand 1 "const_int_operand" "n") @@ -4512,31 +4585,72 @@ (set_attr "length" "8")] ) -(define_expand "tlsle" - [(set (match_operand 0 "register_operand" "=r") - (unspec [(match_operand 1 "register_operand" "r") - (match_operand 2 "aarch64_tls_le_symref" "S")] - UNSPEC_TLSLE))] +(define_insn "tlsie_tiny_<mode>" + [(set (match_operand:PTR 0 "register_operand" "=&r") + (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S") + (match_operand:PTR 2 "register_operand" "r")] + UNSPEC_GOTTINYTLS))] "" -{ - machine_mode mode = GET_MODE (operands[0]); - emit_insn ((mode == DImode - ? gen_tlsle_di - : gen_tlsle_si) (operands[0], operands[1], operands[2])); - DONE; -}) + "ldr\\t%<w>0, %L1\;add\\t%<w>0, %<w>0, %<w>2" + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) -(define_insn "tlsle_<mode>" +(define_insn "tlsie_tiny_sidi" + [(set (match_operand:DI 0 "register_operand" "=&r") + (zero_extend:DI + (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S") + (match_operand:DI 2 "register_operand" "r") + ] + UNSPEC_GOTTINYTLS)))] + "" + "ldr\\t%w0, %L1\;add\\t%<w>0, %<w>0, %<w>2" + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) + +(define_insn "tlsle12_<mode>" [(set (match_operand:P 0 "register_operand" "=r") - (unspec:P [(match_operand:P 1 "register_operand" "r") - (match_operand 2 "aarch64_tls_le_symref" "S")] - UNSPEC_TLSLE))] + (unspec:P [(match_operand:P 1 "register_operand" "r") + (match_operand 2 "aarch64_tls_le_symref" "S")] + UNSPEC_TLSLE12))] "" - "add\\t%<w>0, %<w>1, #%G2, lsl #12\;add\\t%<w>0, %<w>0, #%L2" + "add\\t%<w>0, %<w>1, #%L2"; [(set_attr "type" "alu_sreg") + (set_attr "length" "4")] +) + +(define_insn "tlsle24_<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "register_operand" "r") + (match_operand 2 "aarch64_tls_le_symref" "S")] + UNSPEC_TLSLE24))] + "" + "add\\t%<w>0, %<w>1, #%G2, lsl #12\;add\\t%<w>0, %<w>0, #%L2" + [(set_attr "type" "multiple") (set_attr "length" "8")] ) +(define_insn "tlsle32_<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand 1 "aarch64_tls_le_symref" "S")] + UNSPEC_TLSLE32))] + "" + "movz\\t%<w>0, #:tprel_g1:%1\;movk\\t%<w>0, #:tprel_g0_nc:%1" + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) + +(define_insn "tlsle48_<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand 1 "aarch64_tls_le_symref" "S")] + UNSPEC_TLSLE48))] + "" + "movz\\t%<w>0, #:tprel_g2:%1\;movk\\t%<w>0, #:tprel_g1_nc:%1\;movk\\t%<w>0, #:tprel_g0_nc:%1" + [(set_attr "type" "multiple") + (set_attr "length" "12")] +) + (define_insn "tlsdesc_small_<mode>" [(set (reg:PTR R0_REGNUM) (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 37c2c509fe2..8642bdb74f3 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -96,6 +96,25 @@ mtls-dialect= Target RejectNegative Joined Enum(tls_type) Var(aarch64_tls_dialect) Init(TLS_DESCRIPTORS) Save Specify TLS dialect +mtls-size= +Target RejectNegative Joined Var(aarch64_tls_size) Enum(aarch64_tls_size) +Specifies bit size of immediate TLS offsets. Valid values are 12, 24, 32, 48. + +Enum +Name(aarch64_tls_size) Type(int) + +EnumValue +Enum(aarch64_tls_size) String(12) Value(12) + +EnumValue +Enum(aarch64_tls_size) String(24) Value(24) + +EnumValue +Enum(aarch64_tls_size) String(32) Value(32) + +EnumValue +Enum(aarch64_tls_size) String(48) Value(48) + march= Target RejectNegative ToLower Joined Var(aarch64_arch_string) -march=ARCH Use features of architecture ARCH diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b8a45d1d6ed..475aa6e6d37 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -817,9 +817,15 @@ ;; Emit cbz/cbnz depending on comparison type. (define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")]) +;; Emit inverted cbz/cbnz depending on comparison type. +(define_code_attr inv_cb [(eq "cbnz") (ne "cbz") (lt "cbz") (ge "cbnz")]) + ;; Emit tbz/tbnz depending on comparison type. (define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")]) +;; Emit inverted tbz/tbnz depending on comparison type. +(define_code_attr inv_tb [(eq "tbnz") (ne "tbz") (lt "tbz") (ge "tbnz")]) + ;; Max/min attributes. (define_code_attr maxmin [(smax "max") (smin "min") diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md index 914daf33a5a..cf9636862f2 100644 --- a/gcc/config/aarch64/thunderx.md +++ b/gcc/config/aarch64/thunderx.md @@ -39,7 +39,7 @@ (define_insn_reservation "thunderx_shift" 1 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "bfm,extend,shift_imm,shift_reg")) + (eq_attr "type" "bfm,extend,shift_imm,shift_reg,rbit,rev")) "thunderx_pipe0 | thunderx_pipe1") @@ -66,12 +66,18 @@ (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal")) "thunderx_pipe1 + thunderx_mult") -;; Multiply high instructions take an extra cycle and cause the muliply unit to -;; be busy for an extra cycle. +;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1 -;(define_insn_reservation "thunderx_mul_high" 5 +(define_insn_reservation "thunderx_crc32" 4 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "crc")) + "thunderx_pipe1 + thunderx_mult") + +;; crcx is 5 cycles and only happen on pipe 1 +;(define_insn_reservation "thunderx_crc64" 5 ; (and (eq_attr "tune" "thunderx") -; (eq_attr "type" "smull,umull")) +; (eq_attr "type" "crc") +; (eq_attr "mode" "DI")) ; "thunderx_pipe1 + thunderx_mult") (define_insn_reservation "thunderx_div32" 22 @@ -97,6 +103,11 @@ (eq_attr "type" "store2")) "thunderx_pipe0 + thunderx_pipe1") +;; Prefetch are single issued +;(define_insn_reservation "thunderx_prefetch" 1 +; (and (eq_attr "tune" "thunderx") +; (eq_attr "type" "prefetch")) +; "thunderx_pipe0 + thunderx_pipe1") ;; loads (and load pairs) from L1 take 3 cycles in pipe 0 (define_insn_reservation "thunderx_load" 3 @@ -121,10 +132,21 @@ (eq_attr "type" "fconsts,fconstd")) "thunderx_pipe1") -;; Moves between fp are 2 cycles including min/max/select/abs/neg +;; Moves between fp are 2 cycles including min/max (define_insn_reservation "thunderx_fmov" 2 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths")) + (eq_attr "type" "fmov,f_minmaxs,f_minmaxd")) + "thunderx_pipe1") + +;; ABS, and NEG are 1 cycle +(define_insn_reservation "thunderx_fabs" 1 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "ffariths,ffarithd")) + "thunderx_pipe1") + +(define_insn_reservation "thunderx_fcsel" 3 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "fcsel")) "thunderx_pipe1") (define_insn_reservation "thunderx_fmovgpr" 2 @@ -132,6 +154,11 @@ (eq_attr "type" "f_mrc, f_mcr")) "thunderx_pipe1") +(define_insn_reservation "thunderx_fcmp" 3 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "fcmps,fcmpd")) + "thunderx_pipe1") + (define_insn_reservation "thunderx_fmul" 6 (and (eq_attr "tune" "thunderx") (eq_attr "type" "fmacs,fmacd,fmuls,fmuld")) @@ -152,21 +179,21 @@ (eq_attr "type" "fsqrts")) "thunderx_pipe1 + thunderx_divide, thunderx_divide*13") -(define_insn_reservation "thunderx_fsqrtd" 28 +(define_insn_reservation "thunderx_fsqrtd" 31 (and (eq_attr "tune" "thunderx") (eq_attr "type" "fsqrtd")) - "thunderx_pipe1 + thunderx_divide, thunderx_divide*31") + "thunderx_pipe1 + thunderx_divide, thunderx_divide*27") ;; The rounding conversion inside fp is 4 cycles (define_insn_reservation "thunderx_frint" 4 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "f_rints,f_rintd")) + (eq_attr "type" "f_cvt,f_rints,f_rintd")) "thunderx_pipe1") ;; Float to integer with a move from int to/from float is 6 cycles (define_insn_reservation "thunderx_f_cvt" 6 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) + (eq_attr "type" "f_cvtf2i,f_cvti2f")) "thunderx_pipe1") ;; FP/SIMD load/stores happen in pipe 0 @@ -184,9 +211,12 @@ "thunderx_pipe0+thunderx_pipe1") ;; FP/SIMD Stores takes one cycle in pipe 0 +;; ST1 with one registers either multiple structures or single structure is +;; also one cycle. (define_insn_reservation "thunderx_simd_fp_store" 1 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q")) + (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \ + neon_store1_one_lane, neon_store1_one_lane_q")) "thunderx_pipe0") ;; 64bit neon store pairs are single issue for one cycle @@ -201,24 +231,38 @@ (eq_attr "type" "neon_store1_2reg_q")) "(thunderx_pipe0 + thunderx_pipe1)*2") +;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0 +(define_insn_reservation "thunderx_neon_ld1" 6 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "neon_load1_all_lanes")) + "thunderx_pipe0") ;; SIMD/NEON (q forms take an extra cycle) +;; SIMD For ThunderX is 64bit wide, -;; Thunder simd move instruction types - 2/3 cycles +;; ThunderX simd move instruction types - 2/3 cycles +;; ThunderX dup, ins is the same +;; ThunderX SIMD fabs/fneg instruction types (define_insn_reservation "thunderx_neon_move" 2 (and (eq_attr "tune" "thunderx") (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \ - neon_fp_compare_d, neon_move")) + neon_fp_compare_d, neon_move, neon_dup, \ + neon_ins, neon_from_gp, neon_to_gp, \ + neon_abs, neon_neg, \ + neon_fp_neg_s, neon_fp_abs_s")) "thunderx_pipe1 + thunderx_simd") (define_insn_reservation "thunderx_neon_move_q" 3 (and (eq_attr "tune" "thunderx") (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \ - neon_fp_compare_d_q, neon_move_q")) + neon_fp_compare_d_q, neon_move_q, neon_dup_q, \ + neon_ins_q, neon_from_gp_q, neon_to_gp_q, \ + neon_abs_q, neon_neg_q, \ + neon_fp_neg_s_q, neon_fp_neg_d_q, \ + neon_fp_abs_s_q, neon_fp_abs_d_q")) "thunderx_pipe1 + thunderx_simd, thunderx_simd") - -;; Thunder simd simple/add instruction types - 4/5 cycles +;; ThunderX simd simple/add instruction types - 4/5 cycles (define_insn_reservation "thunderx_neon_add" 4 (and (eq_attr "tune" "thunderx") @@ -227,7 +271,9 @@ neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \ neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \ neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \ - neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d")) + neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \ + neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \ + neon_fp_reduc_minmax_s")) "thunderx_pipe1 + thunderx_simd") ;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect @@ -240,13 +286,74 @@ neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \ neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \ neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \ - neon_add_long, neon_sub_long")) + neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \ + neon_arith_acc_q, neon_rev_q, \ + neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q")) "thunderx_pipe1 + thunderx_simd, thunderx_simd") +;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions +;; are 6/7 cycles +(define_insn_reservation "thunderx_neon_mult" 6 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \ + neon_mla_b, neon_mla_h, neon_mla_s, \ + neon_mla_h_scalar, neon_mla_s_scalar, \ + neon_ext, neon_shift_imm, neon_permute, \ + neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \ + neon_sat_shift_reg, neon_shift_acc, \ + neon_mul_b, neon_mul_h, neon_mul_s, \ + neon_mul_h_scalar, neon_mul_s_scalar, \ + neon_fp_mul_s_scalar, \ + neon_fp_mla_s_scalar")) + "thunderx_pipe1 + thunderx_simd") + +(define_insn_reservation "thunderx_neon_mult_q" 7 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \ + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \ + neon_mla_h_scalar_q, neon_mla_s_scalar_q, \ + neon_ext_q, neon_shift_imm_q, neon_permute_q, \ + neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \ + neon_sat_shift_reg_q, neon_shift_acc_q, \ + neon_shift_imm_long, \ + neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \ + neon_mul_h_scalar_q, neon_mul_s_scalar_q, \ + neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \ + neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \ + neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \ + neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q")) + "thunderx_pipe1 + thunderx_simd, thunderx_simd") + + +;; AES[ED] is 5 cycles +(define_insn_reservation "thunderx_crypto_aese" 5 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "crypto_aese")) + "thunderx_pipe1 + thunderx_simd, thunderx_simd") -;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle -(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q") -(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q") +;; AES{,I}MC is 3 cycles +(define_insn_reservation "thunderx_crypto_aesmc" 3 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "crypto_aesmc")) + "thunderx_pipe1 + thunderx_simd, thunderx_simd") + + +;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle +(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") +(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") +(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") + +;; 64bit TBL is emulated and takes 160 cycles +(define_insn_reservation "thunderx_tbl" 160 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "neon_tbl1")) + "(thunderx_pipe1+thunderx_pipe0)*160") + +;; 128bit TBL is emulated and takes 320 cycles +(define_insn_reservation "thunderx_tblq" 320 + (and (eq_attr "tune" "thunderx") + (eq_attr "type" "neon_tbl1_q")) + "(thunderx_pipe1+thunderx_pipe0)*320") ;; Assume both pipes are needed for unknown and multiple-instruction ;; patterns. diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index ca07cc7052b..32bb36eec33 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -3110,7 +3110,7 @@ alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], } tmp = gen_rtx_MEM (QImode, func); - tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx, + tmp = emit_call_insn (gen_call_value (reg, tmp, const0_rtx, const0_rtx, const0_rtx)); CALL_INSN_FUNCTION_USAGE (tmp) = usage; RTL_CONST_CALL_P (tmp) = 1; diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 7626d3f0233..5068f60ad6c 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -3646,7 +3646,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def index 3dafaa5fbc8..ddf6c3c330f 100644 --- a/gcc/config/arm/arm-arches.def +++ b/gcc/config/arm/arm-arches.def @@ -23,39 +23,40 @@ The NAME is the name of the architecture, represented as a string constant. The CORE is the identifier for a core representative of - this architecture. ARCH is the architecture revision. FLAGS are - the flags implied by the architecture. + this architecture. ARCH is the architecture revision. FLAGS is + the set of feature flags implied by the architecture. genopt.sh assumes no whitespace up to the first "," in each entry. */ -ARM_ARCH("armv2", arm2, 2, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2) -ARM_ARCH("armv2a", arm2, 2, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2) -ARM_ARCH("armv3", arm6, 3, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3) -ARM_ARCH("armv3m", arm7m, 3M, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M) -ARM_ARCH("armv4", arm7tdmi, 4, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4) +ARM_ARCH("armv2", arm2, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)) +ARM_ARCH("armv2a", arm2, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)) +ARM_ARCH("armv3", arm6, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3)) +ARM_ARCH("armv3m", arm7m, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M)) +ARM_ARCH("armv4", arm7tdmi, 4, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4)) /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no implementations that support it, so we will leave it out for now. */ -ARM_ARCH("armv4t", arm7tdmi, 4T, FL_CO_PROC | FL_FOR_ARCH4T) -ARM_ARCH("armv5", arm10tdmi, 5, FL_CO_PROC | FL_FOR_ARCH5) -ARM_ARCH("armv5t", arm10tdmi, 5T, FL_CO_PROC | FL_FOR_ARCH5T) -ARM_ARCH("armv5e", arm1026ejs, 5E, FL_CO_PROC | FL_FOR_ARCH5E) -ARM_ARCH("armv5te", arm1026ejs, 5TE, FL_CO_PROC | FL_FOR_ARCH5TE) -ARM_ARCH("armv6", arm1136js, 6, FL_CO_PROC | FL_FOR_ARCH6) -ARM_ARCH("armv6j", arm1136js, 6J, FL_CO_PROC | FL_FOR_ARCH6J) -ARM_ARCH("armv6k", mpcore, 6K, FL_CO_PROC | FL_FOR_ARCH6K) -ARM_ARCH("armv6z", arm1176jzs, 6Z, FL_CO_PROC | FL_FOR_ARCH6Z) -ARM_ARCH("armv6kz", arm1176jzs, 6KZ, FL_CO_PROC | FL_FOR_ARCH6KZ) -ARM_ARCH("armv6zk", arm1176jzs, 6KZ, FL_CO_PROC | FL_FOR_ARCH6KZ) -ARM_ARCH("armv6t2", arm1156t2s, 6T2, FL_CO_PROC | FL_FOR_ARCH6T2) -ARM_ARCH("armv6-m", cortexm1, 6M, FL_FOR_ARCH6M) -ARM_ARCH("armv6s-m", cortexm1, 6M, FL_FOR_ARCH6M) -ARM_ARCH("armv7", cortexa8, 7, FL_CO_PROC | FL_FOR_ARCH7) -ARM_ARCH("armv7-a", cortexa8, 7A, FL_CO_PROC | FL_FOR_ARCH7A) -ARM_ARCH("armv7ve", cortexa8, 7A, FL_CO_PROC | FL_FOR_ARCH7VE) -ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R) -ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M) -ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM) -ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A) -ARM_ARCH("armv8-a+crc",cortexa53, 8A,FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A) -ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT) -ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2) +ARM_ARCH("armv4t", arm7tdmi, 4T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T)) +ARM_ARCH("armv5", arm10tdmi, 5, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5)) +ARM_ARCH("armv5t", arm10tdmi, 5T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5T)) +ARM_ARCH("armv5e", arm1026ejs, 5E, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5E)) +ARM_ARCH("armv5te", arm1026ejs, 5TE, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5TE)) +ARM_ARCH("armv6", arm1136js, 6, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6)) +ARM_ARCH("armv6j", arm1136js, 6J, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6J)) +ARM_ARCH("armv6k", mpcore, 6K, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6K)) +ARM_ARCH("armv6z", arm1176jzs, 6Z, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6Z)) +ARM_ARCH("armv6kz", arm1176jzs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6KZ)) +ARM_ARCH("armv6zk", arm1176jzs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6KZ)) +ARM_ARCH("armv6t2", arm1156t2s, 6T2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6T2)) +ARM_ARCH("armv6-m", cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_FOR_ARCH6M)) +ARM_ARCH("armv6s-m", cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_FOR_ARCH6M)) +ARM_ARCH("armv7", cortexa8, 7, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7)) +ARM_ARCH("armv7-a", cortexa8, 7A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7A)) +ARM_ARCH("armv7ve", cortexa8, 7A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7VE)) +ARM_ARCH("armv7-r", cortexr4, 7R, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7R)) +ARM_ARCH("armv7-m", cortexm3, 7M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7M)) +ARM_ARCH("armv7e-m", cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7EM)) +ARM_ARCH("armv8-a", cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH8A)) +ARM_ARCH("armv8-a+crc",cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A)) +ARM_ARCH("iwmmxt", iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)) +ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)) + diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 030d8d19441..4391f17c655 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -769,13 +769,6 @@ arm_init_simd_builtin_types (void) int nelts = sizeof (arm_simd_types) / sizeof (arm_simd_types[0]); tree tdecl; - /* Initialize the HFmode scalar type. */ - arm_simd_floatHF_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode); - layout_type (arm_simd_floatHF_type_node); - (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node, - "__builtin_neon_hf"); - /* Poly types are a world of their own. In order to maintain legacy ABI, they get initialized using the old interface, and don't get an entry in our mangling table, consequently, they get default @@ -823,6 +816,8 @@ arm_init_simd_builtin_types (void) mangling. */ /* Continue with standard types. */ + /* The __builtin_simd{64,128}_float16 types are kept private unless + we have a scalar __fp16 type. */ arm_simd_types[Float16x4_t].eltype = arm_simd_floatHF_type_node; arm_simd_types[Float32x2_t].eltype = float_type_node; arm_simd_types[Float32x4_t].eltype = float_type_node; @@ -1106,10 +1101,11 @@ arm_init_neon_builtins (void) #undef NUM_DREG_TYPES #undef NUM_QREG_TYPES -#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ +#define def_mbuiltin(FLAGS, NAME, TYPE, CODE) \ do \ { \ - if ((MASK) & insn_flags) \ + const arm_feature_set flags = FLAGS; \ + if (ARM_FSET_CPU_SUBSET (flags, insn_flags)) \ { \ tree bdecl; \ bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \ @@ -1121,7 +1117,7 @@ arm_init_neon_builtins (void) struct builtin_description { - const unsigned int mask; + const arm_feature_set features; const enum insn_code icode; const char * const name; const enum arm_builtins code; @@ -1132,11 +1128,13 @@ struct builtin_description static const struct builtin_description bdesc_2arg[] = { #define IWMMXT_BUILTIN(code, string, builtin) \ - { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ + { ARM_FSET_MAKE_CPU1 (FL_IWMMXT), CODE_FOR_##code, \ + "__builtin_arm_" string, \ ARM_BUILTIN_##builtin, UNKNOWN, 0 }, #define IWMMXT2_BUILTIN(code, string, builtin) \ - { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \ + { ARM_FSET_MAKE_CPU1 (FL_IWMMXT2), CODE_FOR_##code, \ + "__builtin_arm_" string, \ ARM_BUILTIN_##builtin, UNKNOWN, 0 }, IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) @@ -1219,10 +1217,12 @@ static const struct builtin_description bdesc_2arg[] = IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3) #define IWMMXT_BUILTIN2(code, builtin) \ - { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + { ARM_FSET_MAKE_CPU1 (FL_IWMMXT), CODE_FOR_##code, NULL, \ + ARM_BUILTIN_##builtin, UNKNOWN, 0 }, #define IWMMXT2_BUILTIN2(code, builtin) \ - { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + { ARM_FSET_MAKE_CPU2 (FL_IWMMXT2), CODE_FOR_##code, NULL, \ + ARM_BUILTIN_##builtin, UNKNOWN, 0 }, IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM) IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL) @@ -1237,7 +1237,7 @@ static const struct builtin_description bdesc_2arg[] = #define FP_BUILTIN(L, U) \ - {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ + {ARM_FSET_EMPTY, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ UNKNOWN, 0}, FP_BUILTIN (get_fpscr, GET_FPSCR) @@ -1245,8 +1245,8 @@ static const struct builtin_description bdesc_2arg[] = #undef FP_BUILTIN #define CRC32_BUILTIN(L, U) \ - {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ - UNKNOWN, 0}, + {ARM_FSET_EMPTY, CODE_FOR_##L, "__builtin_arm_"#L, \ + ARM_BUILTIN_##U, UNKNOWN, 0}, CRC32_BUILTIN (crc32b, CRC32B) CRC32_BUILTIN (crc32h, CRC32H) CRC32_BUILTIN (crc32w, CRC32W) @@ -1256,9 +1256,9 @@ static const struct builtin_description bdesc_2arg[] = #undef CRC32_BUILTIN -#define CRYPTO_BUILTIN(L, U) \ - {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \ - UNKNOWN, 0}, +#define CRYPTO_BUILTIN(L, U) \ + {ARM_FSET_EMPTY, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, \ + ARM_BUILTIN_CRYPTO_##U, UNKNOWN, 0}, #undef CRYPTO1 #undef CRYPTO2 #undef CRYPTO3 @@ -1514,7 +1514,9 @@ arm_init_iwmmxt_builtins (void) machine_mode mode; tree type; - if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2)) + if (d->name == 0 || + !(ARM_FSET_HAS_CPU1 (d->features, FL_IWMMXT) || + ARM_FSET_HAS_CPU1 (d->features, FL_IWMMXT2))) continue; mode = insn_data[d->icode].operand[1].mode; @@ -1538,17 +1540,17 @@ arm_init_iwmmxt_builtins (void) gcc_unreachable (); } - def_mbuiltin (d->mask, d->name, type, d->code); + def_mbuiltin (d->features, d->name, type, d->code); } /* Add the remaining MMX insns with somewhat more complicated types. */ #define iwmmx_mbuiltin(NAME, TYPE, CODE) \ - def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \ - ARM_BUILTIN_ ## CODE) + def_mbuiltin (ARM_FSET_MAKE_CPU1 (FL_IWMMXT), "__builtin_arm_" NAME, \ + (TYPE), ARM_BUILTIN_ ## CODE) #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \ - def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \ - ARM_BUILTIN_ ## CODE) + def_mbuiltin (ARM_FSET_MAKE_CPU1 (FL_IWMMXT2), "__builtin_arm_" NAME, \ + (TYPE), ARM_BUILTIN_ ## CODE) iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO); iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0); @@ -1702,10 +1704,12 @@ arm_init_iwmmxt_builtins (void) static void arm_init_fp16_builtins (void) { - tree fp16_type = make_node (REAL_TYPE); - TYPE_PRECISION (fp16_type) = 16; - layout_type (fp16_type); - (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); + arm_simd_floatHF_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode); + layout_type (arm_simd_floatHF_type_node); + if (arm_fp16_format) + (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node, + "__fp16"); } static void @@ -1750,12 +1754,13 @@ arm_init_builtins (void) if (TARGET_REALLY_IWMMXT) arm_init_iwmmxt_builtins (); + /* This creates the arm_simd_floatHF_type_node so must come before + arm_init_neon_builtins which uses it. */ + arm_init_fp16_builtins (); + if (TARGET_NEON) arm_init_neon_builtins (); - if (arm_fp16_format) - arm_init_fp16_builtins (); - if (TARGET_CRC32) arm_init_crc32_builtins (); diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index 9d47fcfbcd7..4c35200b3f8 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -33,7 +33,7 @@ The TUNE_IDENT is the name of the core for which scheduling decisions should be made, represented as an identifier. ARCH is the architecture revision implemented by the chip. - FLAGS are the bitwise-or of the traits that apply to that core. + FLAGS is the set of feature flags of that core. This need not include flags implied by the architecture. COSTS is the name of the rtx_costs routine to use. @@ -43,134 +43,134 @@ Some tools assume no whitespace up to the first "," in each entry. */ /* V2/V2A Architecture Processors */ -ARM_CORE("arm2", arm2, arm2, 2, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm250", arm250, arm250, 2, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm3", arm3, arm3, 2, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm2", arm2, arm2, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul) +ARM_CORE("arm250", arm250, arm250, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul) +ARM_CORE("arm3", arm3, arm3, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul) /* V3 Architecture Processors */ -ARM_CORE("arm6", arm6, arm6, 3, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm60", arm60, arm60, 3, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm600", arm600, arm600, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm610", arm610, arm610, 3, FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm620", arm620, arm620, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm7", arm7, arm7, 3, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm7d", arm7d, arm7d, 3, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm7di", arm7di, arm7di, 3, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm70", arm70, arm70, 3, FL_CO_PROC | FL_MODE26, slowmul) -ARM_CORE("arm700", arm700, arm700, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm700i", arm700i, arm700i, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm710", arm710, arm710, 3, FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm720", arm720, arm720, 3, FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm710c", arm710c, arm710c, 3, FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm7100", arm7100, arm7100, 3, FL_MODE26 | FL_WBUF, slowmul) -ARM_CORE("arm7500", arm7500, arm7500, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm6", arm6, arm6, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm60", arm60, arm60, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm600", arm600, arm600, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm610", arm610, arm610, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm620", arm620, arm620, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm7", arm7, arm7, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm7d", arm7d, arm7d, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm7di", arm7di, arm7di, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm70", arm70, arm70, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm700", arm700, arm700, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm700i", arm700i, arm700i, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm710", arm710, arm710, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm720", arm720, arm720, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm710c", arm710c, arm710c, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm7100", arm7100, arm7100, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) +ARM_CORE("arm7500", arm7500, arm7500, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) /* Doesn't have an external co-proc, but does have embedded fpa. */ -ARM_CORE("arm7500fe", arm7500fe, arm7500fe, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm7500fe", arm7500fe, arm7500fe, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul) /* V3M Architecture Processors */ /* arm7m doesn't exist on its own, but only with D, ("and", and I), but those don't alter the code, so arm7m is sometimes used. */ -ARM_CORE("arm7m", arm7m, arm7m, 3M, FL_CO_PROC | FL_MODE26, fastmul) -ARM_CORE("arm7dm", arm7dm, arm7dm, 3M, FL_CO_PROC | FL_MODE26, fastmul) -ARM_CORE("arm7dmi", arm7dmi, arm7dmi, 3M, FL_CO_PROC | FL_MODE26, fastmul) +ARM_CORE("arm7m", arm7m, arm7m, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul) +ARM_CORE("arm7dm", arm7dm, arm7dm, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul) +ARM_CORE("arm7dmi", arm7dmi, arm7dmi, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul) /* V4 Architecture Processors */ -ARM_CORE("arm8", arm8, arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul) -ARM_CORE("arm810", arm810, arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul) -ARM_CORE("strongarm", strongarm, strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) -ARM_CORE("strongarm110", strongarm110, strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) -ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) -ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) -ARM_CORE("fa526", fa526, fa526, 4, FL_LDSCHED, fastmul) -ARM_CORE("fa626", fa626, fa626, 4, FL_LDSCHED, fastmul) +ARM_CORE("arm8", arm8, arm8, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_FOR_ARCH4), fastmul) +ARM_CORE("arm810", arm810, arm810, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_FOR_ARCH4), fastmul) +ARM_CORE("strongarm", strongarm, strongarm, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm) +ARM_CORE("strongarm110", strongarm110, strongarm110, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm) +ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm) +ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm) +ARM_CORE("fa526", fa526, fa526, 4, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4), fastmul) +ARM_CORE("fa626", fa626, fa626, 4, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4), fastmul) /* V4T Architecture Processors */ -ARM_CORE("arm7tdmi", arm7tdmi, arm7tdmi, 4T, FL_CO_PROC, fastmul) -ARM_CORE("arm7tdmi-s", arm7tdmis, arm7tdmis, 4T, FL_CO_PROC, fastmul) -ARM_CORE("arm710t", arm710t, arm710t, 4T, FL_WBUF, fastmul) -ARM_CORE("arm720t", arm720t, arm720t, 4T, FL_WBUF, fastmul) -ARM_CORE("arm740t", arm740t, arm740t, 4T, FL_WBUF, fastmul) -ARM_CORE("arm9", arm9, arm9, 4T, FL_LDSCHED, fastmul) -ARM_CORE("arm9tdmi", arm9tdmi, arm9tdmi, 4T, FL_LDSCHED, fastmul) -ARM_CORE("arm920", arm920, arm920, 4T, FL_LDSCHED, fastmul) -ARM_CORE("arm920t", arm920t, arm920t, 4T, FL_LDSCHED, fastmul) -ARM_CORE("arm922t", arm922t, arm922t, 4T, FL_LDSCHED, fastmul) -ARM_CORE("arm940t", arm940t, arm940t, 4T, FL_LDSCHED, fastmul) -ARM_CORE("ep9312", ep9312, ep9312, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm7tdmi", arm7tdmi, arm7tdmi, 4T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm7tdmi-s", arm7tdmis, arm7tdmis, 4T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm710t", arm710t, arm710t, 4T, ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm720t", arm720t, arm720t, 4T, ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm740t", arm740t, arm740t, 4T, ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm9", arm9, arm9, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm9tdmi", arm9tdmi, arm9tdmi, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm920", arm920, arm920, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm920t", arm920t, arm920t, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm922t", arm922t, arm922t, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul) +ARM_CORE("arm940t", arm940t, arm940t, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul) +ARM_CORE("ep9312", ep9312, ep9312, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul) /* V5T Architecture Processors */ -ARM_CORE("arm10tdmi", arm10tdmi, arm10tdmi, 5T, FL_LDSCHED, fastmul) -ARM_CORE("arm1020t", arm1020t, arm1020t, 5T, FL_LDSCHED, fastmul) +ARM_CORE("arm10tdmi", arm10tdmi, arm10tdmi, 5T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5T), fastmul) +ARM_CORE("arm1020t", arm1020t, arm1020t, 5T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5T), fastmul) /* V5TE Architecture Processors */ -ARM_CORE("arm9e", arm9e, arm9e, 5TE, FL_LDSCHED, 9e) -ARM_CORE("arm946e-s", arm946es, arm946es, 5TE, FL_LDSCHED, 9e) -ARM_CORE("arm966e-s", arm966es, arm966es, 5TE, FL_LDSCHED, 9e) -ARM_CORE("arm968e-s", arm968es, arm968es, 5TE, FL_LDSCHED, 9e) -ARM_CORE("arm10e", arm10e, arm10e, 5TE, FL_LDSCHED, fastmul) -ARM_CORE("arm1020e", arm1020e, arm1020e, 5TE, FL_LDSCHED, fastmul) -ARM_CORE("arm1022e", arm1022e, arm1022e, 5TE, FL_LDSCHED, fastmul) -ARM_CORE("xscale", xscale, xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale) -ARM_CORE("iwmmxt", iwmmxt, iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) -ARM_CORE("iwmmxt2", iwmmxt2, iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2, xscale) -ARM_CORE("fa606te", fa606te, fa606te, 5TE, FL_LDSCHED, 9e) -ARM_CORE("fa626te", fa626te, fa626te, 5TE, FL_LDSCHED, 9e) -ARM_CORE("fmp626", fmp626, fmp626, 5TE, FL_LDSCHED, 9e) -ARM_CORE("fa726te", fa726te, fa726te, 5TE, FL_LDSCHED, fa726te) +ARM_CORE("arm9e", arm9e, arm9e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e) +ARM_CORE("arm946e-s", arm946es, arm946es, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e) +ARM_CORE("arm966e-s", arm966es, arm966es, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e) +ARM_CORE("arm968e-s", arm968es, arm968es, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e) +ARM_CORE("arm10e", arm10e, arm10e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul) +ARM_CORE("arm1020e", arm1020e, arm1020e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul) +ARM_CORE("arm1022e", arm1022e, arm1022e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul) +ARM_CORE("xscale", xscale, xscale, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_FOR_ARCH5TE), xscale) +ARM_CORE("iwmmxt", iwmmxt, iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_FOR_ARCH5TE), xscale) +ARM_CORE("iwmmxt2", iwmmxt2, iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2 | FL_FOR_ARCH5TE), xscale) +ARM_CORE("fa606te", fa606te, fa606te, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e) +ARM_CORE("fa626te", fa626te, fa626te, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e) +ARM_CORE("fmp626", fmp626, fmp626, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e) +ARM_CORE("fa726te", fa726te, fa726te, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fa726te) /* V5TEJ Architecture Processors */ -ARM_CORE("arm926ej-s", arm926ejs, arm926ejs, 5TEJ, FL_LDSCHED, 9e) -ARM_CORE("arm1026ej-s", arm1026ejs, arm1026ejs, 5TEJ, FL_LDSCHED, 9e) +ARM_CORE("arm926ej-s", arm926ejs, arm926ejs, 5TEJ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TEJ), 9e) +ARM_CORE("arm1026ej-s", arm1026ejs, arm1026ejs, 5TEJ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TEJ), 9e) /* V6 Architecture Processors */ -ARM_CORE("arm1136j-s", arm1136js, arm1136js, 6J, FL_LDSCHED, 9e) -ARM_CORE("arm1136jf-s", arm1136jfs, arm1136jfs, 6J, FL_LDSCHED | FL_VFPV2, 9e) -ARM_CORE("arm1176jz-s", arm1176jzs, arm1176jzs, 6KZ, FL_LDSCHED, 9e) -ARM_CORE("arm1176jzf-s", arm1176jzfs, arm1176jzfs, 6KZ, FL_LDSCHED | FL_VFPV2, 9e) -ARM_CORE("mpcorenovfp", mpcorenovfp, mpcorenovfp, 6K, FL_LDSCHED, 9e) -ARM_CORE("mpcore", mpcore, mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) -ARM_CORE("arm1156t2-s", arm1156t2s, arm1156t2s, 6T2, FL_LDSCHED, v6t2) -ARM_CORE("arm1156t2f-s", arm1156t2fs, arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) +ARM_CORE("arm1136j-s", arm1136js, arm1136js, 6J, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6J), 9e) +ARM_CORE("arm1136jf-s", arm1136jfs, arm1136jfs, 6J, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6J), 9e) +ARM_CORE("arm1176jz-s", arm1176jzs, arm1176jzs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6KZ), 9e) +ARM_CORE("arm1176jzf-s", arm1176jzfs, arm1176jzfs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6KZ), 9e) +ARM_CORE("mpcorenovfp", mpcorenovfp, mpcorenovfp, 6K, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6K), 9e) +ARM_CORE("mpcore", mpcore, mpcore, 6K, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6K), 9e) +ARM_CORE("arm1156t2-s", arm1156t2s, arm1156t2s, 6T2, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6T2), v6t2) +ARM_CORE("arm1156t2f-s", arm1156t2fs, arm1156t2fs, 6T2, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6T2), v6t2) /* V6M Architecture Processors */ -ARM_CORE("cortex-m1", cortexm1, cortexm1, 6M, FL_LDSCHED, v6m) -ARM_CORE("cortex-m0", cortexm0, cortexm0, 6M, FL_LDSCHED, v6m) -ARM_CORE("cortex-m0plus", cortexm0plus, cortexm0plus, 6M, FL_LDSCHED, v6m) +ARM_CORE("cortex-m1", cortexm1, cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m) +ARM_CORE("cortex-m0", cortexm0, cortexm0, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m) +ARM_CORE("cortex-m0plus", cortexm0plus, cortexm0plus, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m) /* V6M Architecture Processors for small-multiply implementations. */ -ARM_CORE("cortex-m1.small-multiply", cortexm1smallmultiply, cortexm1, 6M, FL_LDSCHED | FL_SMALLMUL, v6m) -ARM_CORE("cortex-m0.small-multiply", cortexm0smallmultiply, cortexm0, 6M, FL_LDSCHED | FL_SMALLMUL, v6m) -ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply, cortexm0plus,6M, FL_LDSCHED | FL_SMALLMUL, v6m) +ARM_CORE("cortex-m1.small-multiply", cortexm1smallmultiply, cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m) +ARM_CORE("cortex-m0.small-multiply", cortexm0smallmultiply, cortexm0, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m) +ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply, cortexm0plus,6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m) /* V7 Architecture Processors */ -ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex) -ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5) -ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7) -ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex_a8) -ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9) -ARM_CORE("cortex-a12", cortexa12, cortexa17, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) -ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) -ARM_CORE("cortex-a17", cortexa17, cortexa17, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) -ARM_CORE("cortex-r4", cortexr4, cortexr4, 7R, FL_LDSCHED, cortex) -ARM_CORE("cortex-r4f", cortexr4f, cortexr4f, 7R, FL_LDSCHED, cortex) -ARM_CORE("cortex-r5", cortexr5, cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) -ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) -ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7) -ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m) -ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m) -ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, marvell_pj4) +ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex) +ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a5) +ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a7) +ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a8) +ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a9) +ARM_CORE("cortex-a12", cortexa12, cortexa17, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12) +ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a15) +ARM_CORE("cortex-a17", cortexa17, cortexa17, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12) +ARM_CORE("cortex-r4", cortexr4, cortexr4, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7R), cortex) +ARM_CORE("cortex-r4f", cortexr4f, cortexr4f, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7R), cortex) +ARM_CORE("cortex-r5", cortexr5, cortexr5, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARM_DIV | FL_FOR_ARCH7R), cortex) +ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARM_DIV | FL_FOR_ARCH7R), cortex) +ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_NO_VOLATILE_CE | FL_FOR_ARCH7EM), cortex_m7) +ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7EM), v7m) +ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7M), v7m) +ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), marvell_pj4) /* V7 big.LITTLE implementations */ -ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) -ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) +ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a15) +ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12) /* V8 Architecture Processors */ -ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a53) -ARM_CORE("cortex-a57", cortexa57, cortexa57, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) -ARM_CORE("cortex-a72", cortexa72, cortexa57, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) -ARM_CORE("exynos-m1", exynosm1, cortexa57, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) -ARM_CORE("xgene1", xgene1, xgene1, 8A, FL_LDSCHED, xgene1) +ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a53) +ARM_CORE("cortex-a57", cortexa57, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) +ARM_CORE("cortex-a72", cortexa72, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) +ARM_CORE("exynos-m1", exynosm1, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) +ARM_CORE("xgene1", xgene1, xgene1, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8A), xgene1) /* V8 big.LITTLE implementations */ -ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) -ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) +ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) +ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index cef9eec277d..8df312f3c67 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -319,6 +319,7 @@ extern int vfp3_const_double_for_bits (rtx); extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern bool arm_valid_symbolic_address_p (rtx); extern bool arm_validize_comparison (rtx *, rtx *, rtx *); #endif /* RTX_CODE */ @@ -346,6 +347,8 @@ extern bool arm_is_constant_pool_ref (rtx); /* Flags used to identify the presence of processor capabilities. */ /* Bit values used to identify processor capabilities. */ +#define FL_NONE (0) /* No flags. */ +#define FL_ANY (0xffffffff) /* All flags. */ #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */ #define FL_ARCH3M (1 << 1) /* Extended multiply */ #define FL_MODE26 (1 << 2) /* 26-bit mode support */ @@ -413,13 +416,116 @@ extern bool arm_is_constant_pool_ref (rtx); #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8) +/* There are too many feature bits to fit in a single word so the set of cpu and + fpu capabilities is a structure. A feature set is created and manipulated + with the ARM_FSET macros. */ + +typedef struct +{ + unsigned long cpu[2]; +} arm_feature_set; + + +/* Initialize a feature set. */ + +#define ARM_FSET_MAKE(CPU1,CPU2) { { (CPU1), (CPU2) } } + +#define ARM_FSET_MAKE_CPU1(CPU1) ARM_FSET_MAKE ((CPU1), (FL_NONE)) +#define ARM_FSET_MAKE_CPU2(CPU2) ARM_FSET_MAKE ((FL_NONE), (CPU2)) + +/* Accessors. */ + +#define ARM_FSET_CPU1(S) ((S).cpu[0]) +#define ARM_FSET_CPU2(S) ((S).cpu[1]) + +/* Useful combinations. */ + +#define ARM_FSET_EMPTY ARM_FSET_MAKE (FL_NONE, FL_NONE) +#define ARM_FSET_ANY ARM_FSET_MAKE (FL_ANY, FL_ANY) + +/* Tests for a specific CPU feature. */ + +#define ARM_FSET_HAS_CPU1(A, F) \ + (((A).cpu[0] & ((unsigned long)(F))) == ((unsigned long)(F))) +#define ARM_FSET_HAS_CPU2(A, F) \ + (((A).cpu[1] & ((unsigned long)(F))) == ((unsigned long)(F))) +#define ARM_FSET_HAS_CPU(A, F1, F2) \ + (ARM_FSET_HAS_CPU1 ((A), (F1)) && ARM_FSET_HAS_CPU2 ((A), (F2))) + +/* Add a feature to a feature set. */ + +#define ARM_FSET_ADD_CPU1(DST, F) \ + do { \ + (DST).cpu[0] |= (F); \ + } while (0) + +#define ARM_FSET_ADD_CPU2(DST, F) \ + do { \ + (DST).cpu[1] |= (F); \ + } while (0) + +/* Remove a feature from a feature set. */ + +#define ARM_FSET_DEL_CPU1(DST, F) \ + do { \ + (DST).cpu[0] &= ~(F); \ + } while (0) + +#define ARM_FSET_DEL_CPU2(DST, F) \ + do { \ + (DST).cpu[1] &= ~(F); \ + } while (0) + +/* Union of feature sets. */ + +#define ARM_FSET_UNION(DST,F1,F2) \ + do { \ + (DST).cpu[0] = (F1).cpu[0] | (F2).cpu[0]; \ + (DST).cpu[1] = (F1).cpu[1] | (F2).cpu[1]; \ + } while (0) + +/* Intersection of feature sets. */ + +#define ARM_FSET_INTER(DST,F1,F2) \ + do { \ + (DST).cpu[0] = (F1).cpu[0] & (F2).cpu[0]; \ + (DST).cpu[1] = (F1).cpu[1] & (F2).cpu[1]; \ + } while (0) + +/* Exclusive disjunction. */ + +#define ARM_FSET_XOR(DST,F1,F2) \ + do { \ + (DST).cpu[0] = (F1).cpu[0] ^ (F2).cpu[0]; \ + (DST).cpu[1] = (F1).cpu[1] ^ (F2).cpu[1]; \ + } while (0) + +/* Difference of feature sets: F1 excluding the elements of F2. */ + +#define ARM_FSET_EXCLUDE(DST,F1,F2) \ + do { \ + (DST).cpu[0] = (F1).cpu[0] & ~(F2).cpu[0]; \ + (DST).cpu[1] = (F1).cpu[1] & ~(F2).cpu[1]; \ + } while (0) + +/* Test for an empty feature set. */ + +#define ARM_FSET_IS_EMPTY(A) \ + (!((A).cpu[0]) && !((A).cpu[1])) + +/* Tests whether the cpu features of A are a subset of B. */ + +#define ARM_FSET_CPU_SUBSET(A,B) \ + ((((A).cpu[0] & (B).cpu[0]) == (A).cpu[0]) \ + && (((A).cpu[1] & (B).cpu[1]) == (A).cpu[1])) + /* The bits in this mask specify which instructions we are allowed to generate. */ -extern unsigned long insn_flags; +extern arm_feature_set insn_flags; /* The bits in this mask specify which instruction scheduling options should be used. */ -extern unsigned long tune_flags; +extern arm_feature_set tune_flags; /* Nonzero if this chip supports the ARM Architecture 3M extensions. */ extern int arm_arch3m; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 57702cbc8de..fa4e083adfe 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -97,6 +97,7 @@ static void arm_add_gc_roots (void); static int arm_gen_constant (enum rtx_code, machine_mode, rtx, HOST_WIDE_INT, rtx, rtx, int, int); static unsigned bit_count (unsigned long); +static unsigned feature_count (const arm_feature_set*); static int arm_address_register_rtx_p (rtx, int); static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int); static bool is_called_in_ARM_mode (tree); @@ -767,11 +768,11 @@ static int thumb_call_reg_needed; /* The bits in this mask specify which instructions we are allowed to generate. */ -unsigned long insn_flags = 0; +arm_feature_set insn_flags = ARM_FSET_EMPTY; /* The bits in this mask specify which instruction scheduling options should be used. */ -unsigned long tune_flags = 0; +arm_feature_set tune_flags = ARM_FSET_EMPTY; /* The highest ARM architecture version supported by the target. */ @@ -927,7 +928,7 @@ struct processors enum processor_type core; const char *arch; enum base_architecture base_arch; - const unsigned long flags; + const arm_feature_set flags; const struct tune_params *const tune; }; @@ -2196,10 +2197,10 @@ static const struct processors all_cores[] = /* ARM Cores */ #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \ {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \ - FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, + FLAGS, &arm_##COSTS##_tune}, #include "arm-cores.def" #undef ARM_CORE - {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL} + {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL} }; static const struct processors all_architectures[] = @@ -2212,7 +2213,7 @@ static const struct processors all_architectures[] = {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL}, #include "arm-arches.def" #undef ARM_ARCH - {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL} + {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL} }; @@ -2278,6 +2279,14 @@ bit_count (unsigned long value) return count; } +/* Return the number of features in feature-set SET. */ +static unsigned +feature_count (const arm_feature_set * set) +{ + return (bit_count (ARM_FSET_CPU1 (*set)) + + bit_count (ARM_FSET_CPU2 (*set))); +} + typedef struct { machine_mode mode; @@ -2703,7 +2712,7 @@ arm_option_check_internal (struct gcc_options *opts) /* Make sure that the processor choice does not conflict with any of the other command line choices. */ - if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM)) + if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM)) error ("target CPU does not support ARM mode"); /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done @@ -2803,7 +2812,8 @@ static void arm_option_override_internal (struct gcc_options *opts, struct gcc_options *opts_set) { - if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB)) + if (TARGET_THUMB_P (opts->x_target_flags) + && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB))) { warning (0, "target CPU does not support THUMB instructions"); opts->x_target_flags &= ~MASK_THUMB; @@ -2890,8 +2900,13 @@ arm_option_override (void) { if (arm_selected_cpu) { + const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE); + arm_feature_set selected_flags; + ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags, + arm_selected_arch->flags); + ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags); /* Check for conflict between mcpu and march. */ - if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE) + if (!ARM_FSET_IS_EMPTY (selected_flags)) { warning (0, "switch -mcpu=%s conflicts with -march=%s switch", arm_selected_cpu->name, arm_selected_arch->name); @@ -2915,7 +2930,7 @@ arm_option_override (void) if (!arm_selected_cpu) { const struct processors * sel; - unsigned int sought; + arm_feature_set sought = ARM_FSET_EMPTY;; arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT]; if (!arm_selected_cpu->name) @@ -2935,26 +2950,27 @@ arm_option_override (void) /* Now check to see if the user has specified some command line switch that require certain abilities from the cpu. */ - sought = 0; if (TARGET_INTERWORK || TARGET_THUMB) { - sought |= (FL_THUMB | FL_MODE32); + ARM_FSET_ADD_CPU1 (sought, FL_THUMB); + ARM_FSET_ADD_CPU1 (sought, FL_MODE32); /* There are no ARM processors that support both APCS-26 and interworking. Therefore we force FL_MODE26 to be removed from insn_flags here (if it was set), so that the search below will always be able to find a compatible processor. */ - insn_flags &= ~FL_MODE26; + ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26); } - if (sought != 0 && ((sought & insn_flags) != sought)) + if (!ARM_FSET_IS_EMPTY (sought) + && !(ARM_FSET_CPU_SUBSET (sought, insn_flags))) { /* Try to locate a CPU type that supports all of the abilities of the default CPU, plus the extra abilities requested by the user. */ for (sel = all_cores; sel->name != NULL; sel++) - if ((sel->flags & sought) == (sought | insn_flags)) + if (ARM_FSET_CPU_SUBSET (sought, sel->flags)) break; if (sel->name == NULL) @@ -2974,19 +2990,23 @@ arm_option_override (void) command line options we scan the array again looking for a best match. */ for (sel = all_cores; sel->name != NULL; sel++) - if ((sel->flags & sought) == sought) - { - unsigned count; - - count = bit_count (sel->flags & insn_flags); - - if (count >= current_bit_count) - { - best_fit = sel; - current_bit_count = count; - } - } + { + arm_feature_set required = ARM_FSET_EMPTY; + ARM_FSET_UNION (required, sought, insn_flags); + if (ARM_FSET_CPU_SUBSET (required, sel->flags)) + { + unsigned count; + arm_feature_set flags; + ARM_FSET_INTER (flags, sel->flags, insn_flags); + count = feature_count (&flags); + if (count >= current_bit_count) + { + best_fit = sel; + current_bit_count = count; + } + } + } gcc_assert (best_fit); sel = best_fit; } @@ -3014,7 +3034,8 @@ arm_option_override (void) /* BPABI targets use linker tricks to allow interworking on cores without thumb support. */ - if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI)) + if (TARGET_INTERWORK + && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI)) { warning (0, "target CPU does not support interworking" ); target_flags &= ~MASK_INTERWORK; @@ -3039,34 +3060,34 @@ arm_option_override (void) warning (0, "passing floating point arguments in fp regs not yet supported"); /* Initialize boolean versions of the flags, for use in the arm.md file. */ - arm_arch3m = (insn_flags & FL_ARCH3M) != 0; - arm_arch4 = (insn_flags & FL_ARCH4) != 0; - arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0); - arm_arch5 = (insn_flags & FL_ARCH5) != 0; - arm_arch5e = (insn_flags & FL_ARCH5E) != 0; - arm_arch6 = (insn_flags & FL_ARCH6) != 0; - arm_arch6k = (insn_flags & FL_ARCH6K) != 0; - arm_arch6kz = arm_arch6k && (insn_flags & FL_ARCH6KZ); - arm_arch_notm = (insn_flags & FL_NOTM) != 0; + arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M); + arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4); + arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)); + arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5); + arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E); + arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6); + arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K); + arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ); + arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM); arm_arch6m = arm_arch6 && !arm_arch_notm; - arm_arch7 = (insn_flags & FL_ARCH7) != 0; - arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; - arm_arch8 = (insn_flags & FL_ARCH8) != 0; - arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; - arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; - - arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; - arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; - arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; - arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; - arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; - arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0; - arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; - arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; - arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0; + arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7); + arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM); + arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8); + arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2); + arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE); + + arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED); + arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG); + arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF); + arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE); + arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT); + arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2); + arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV); + arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV); + arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE); arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; - arm_arch_crc = (insn_flags & FL_CRC32) != 0; - arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0; + arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32); + arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL); /* V5 code we generate is completely interworking capable, so we turn off TARGET_INTERWORK here to avoid many tests later on. */ @@ -3158,7 +3179,7 @@ arm_option_override (void) /* For arm2/3 there is no need to do any scheduling if we are doing software floating-point. */ - if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0) + if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32)) flag_schedule_insns = flag_schedule_insns_after_reload = 0; /* Use the cp15 method if it is available. */ @@ -12607,22 +12628,12 @@ neon_vdup_constant (rtx vals) { machine_mode mode = GET_MODE (vals); machine_mode inner_mode = GET_MODE_INNER (mode); - int n_elts = GET_MODE_NUNITS (mode); - bool all_same = true; rtx x; - int i; if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) return NULL_RTX; - for (i = 0; i < n_elts; ++i) - { - x = XVECEXP (vals, 0, i); - if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) - all_same = false; - } - - if (!all_same) + if (!const_vec_duplicate_p (vals, &x)) /* The elements are not all the same. We could handle repeating patterns of a mode larger than INNER_MODE here (e.g. int8x8_t {0, C, 0, C, 0, C, 0, C} which can be loaded using @@ -12633,7 +12644,7 @@ neon_vdup_constant (rtx vals) single ARM register. This will be cheaper than a vector load. */ - x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + x = copy_to_mode_reg (inner_mode, x); return gen_rtx_VEC_DUPLICATE (mode, x); } @@ -12809,10 +12820,10 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, if (lane < low || lane >= high) { if (exp) - error ("%K%s %lld out of range %lld - %lld", + error ("%K%s %wd out of range %wd - %wd", exp, desc, lane, low, high - 1); else - error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1); + error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1); } } @@ -28674,6 +28685,38 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in, #undef BRANCH } +/* Returns true if the pattern is a valid symbolic address, which is either a + symbol_ref or (symbol_ref + addend). + + According to the ARM ELF ABI, the initial addend of REL-type relocations + processing MOVW and MOVT instructions is formed by interpreting the 16-bit + literal field of the instruction as a 16-bit signed value in the range + -32768 <= A < 32768. */ + +bool +arm_valid_symbolic_address_p (rtx addr) +{ + rtx xop0, xop1 = NULL_RTX; + rtx tmp = addr; + + if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF) + return true; + + /* (const (plus: symbol_ref const_int)) */ + if (GET_CODE (addr) == CONST) + tmp = XEXP (addr, 0); + + if (GET_CODE (tmp) == PLUS) + { + xop0 = XEXP (tmp, 0); + xop1 = XEXP (tmp, 1); + + if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1)) + return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff); + } + + return false; +} /* Returns true if a valid comparison operation and makes the operands in a form that is valid. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 288bbb9f836..b6c20478f9c 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5774,7 +5774,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:SI 2 "general_operand" "i")))] - "arm_arch_thumb2" + "arm_arch_thumb2 && arm_valid_symbolic_address_p (operands[2])" "movt%?\t%0, #:upper16:%c2" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") @@ -8162,8 +8162,7 @@ size += GET_MODE_SIZE (GET_MODE (src)); } - emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL, - const0_rtx)); + emit_call_insn (gen_call_value (par, operands[0], const0_rtx, NULL)); size = 0; diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index c923e294cda..2b30be61a46 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -41,7 +41,9 @@ typedef __simd64_int8_t int8x8_t; typedef __simd64_int16_t int16x4_t; typedef __simd64_int32_t int32x2_t; typedef __builtin_neon_di int64x1_t; +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) typedef __simd64_float16_t float16x4_t; +#endif typedef __simd64_float32_t float32x2_t; typedef __simd64_poly8_t poly8x8_t; typedef __simd64_poly16_t poly16x4_t; @@ -6220,21 +6222,25 @@ vcvtq_u32_f32 (float32x4_t __a) } #if ((__ARM_FP & 0x2) != 0) +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) vcvt_f16_f32 (float32x4_t __a) { return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a); } - #endif +#endif + #if ((__ARM_FP & 0x2) != 0) +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vcvt_f32_f16 (float16x4_t __a) { return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a); } - #endif +#endif + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vcvt_n_s32_f32 (float32x2_t __a, const int __b) { diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 42935a4ca6d..e24858fe45e 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -21,7 +21,7 @@ ;; The following register constraints have been used: ;; - in ARM/Thumb-2 state: t, w, x, y, z ;; - in Thumb state: h, b -;; - in both states: l, c, k, q, US +;; - in both states: l, c, k, q, Cs, Ts, US ;; In ARM state, 'l' is an alias for 'r' ;; 'f' and 'v' were previously used for FPA and MAVERICK registers. @@ -67,7 +67,8 @@ (define_constraint "j" "A constant suitable for a MOVW instruction. (ARM/Thumb-2)" (and (match_test "TARGET_32BIT && arm_arch_thumb2") - (ior (match_code "high") + (ior (and (match_code "high") + (match_test "arm_valid_symbolic_address_p (XEXP (op, 0))")) (and (match_code "const_int") (match_test "(ival & 0xffff0000) == 0"))))) diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 3c477bc8eee..bec9a8bb788 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -9255,10 +9255,10 @@ avr_pgm_check_var_decl (tree node) { if (TYPE_P (node)) error ("%qT uses address space %qs beyond flash of %d KiB", - node, avr_addrspace[as].name, avr_n_flash); + node, avr_addrspace[as].name, 64 * avr_n_flash); else error ("%s %q+D uses address space %qs beyond flash of %d KiB", - reason, node, avr_addrspace[as].name, avr_n_flash); + reason, node, avr_addrspace[as].name, 64 * avr_n_flash); } else { @@ -9305,7 +9305,7 @@ avr_insert_attributes (tree node, tree *attributes) if (avr_addrspace[as].segment >= avr_n_flash) { error ("variable %q+D located in address space %qs beyond flash " - "of %d KiB", node, avr_addrspace[as].name, avr_n_flash); + "of %d KiB", node, avr_addrspace[as].name, 64 * avr_n_flash); } else if (!AVR_HAVE_LPM && avr_addrspace[as].pointer_size > 2) { diff --git a/gcc/config/cr16/cr16.c b/gcc/config/cr16/cr16.c index 8185b59b282..7b3b6efd3db 100644 --- a/gcc/config/cr16/cr16.c +++ b/gcc/config/cr16/cr16.c @@ -583,7 +583,7 @@ cr16_function_arg (cumulative_args_t cum_v, machine_mode mode, /* function_arg () is called with this type just after all the args have had their registers assigned. The rtx that function_arg returns from this type is supposed to pass to 'gen_call' but currently it is not - implemented (see macro GEN_CALL). */ + implemented. */ if (type == void_type_node) return NULL_RTX; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 05fa5e10ebf..c69c738caa0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -436,7 +436,7 @@ struct processor_costs iamcu_cost = { COSTS_N_INSNS (3), /* cost of movsx */ COSTS_N_INSNS (2), /* cost of movzx */ 8, /* "large" insn */ - 6, /* MOVE_RATIO */ + 9, /* MOVE_RATIO */ 6, /* cost for loading QImode using movzbl */ {2, 4, 2}, /* cost of loading integer registers in QImode, HImode and SImode. @@ -25531,7 +25531,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) /* Avoid branch in fixing the byte. */ tmpreg = gen_lowpart (QImode, tmpreg); - emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); + emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); tmp = gen_rtx_REG (CCmode, FLAGS_REG); cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); @@ -39510,60 +39510,57 @@ rdseed_step: return target; case IX86_BUILTIN_SBB32: - icode = CODE_FOR_subsi3_carry; + icode = CODE_FOR_subborrowsi; mode0 = SImode; - goto addcarryx; + goto handlecarry; case IX86_BUILTIN_SBB64: - icode = CODE_FOR_subdi3_carry; + icode = CODE_FOR_subborrowdi; mode0 = DImode; - goto addcarryx; + goto handlecarry; case IX86_BUILTIN_ADDCARRYX32: - icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry; + icode = CODE_FOR_addcarrysi; mode0 = SImode; - goto addcarryx; + goto handlecarry; case IX86_BUILTIN_ADDCARRYX64: - icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry; + icode = CODE_FOR_addcarrydi; mode0 = DImode; -addcarryx: + handlecarry: arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ - op0 = gen_reg_rtx (QImode); - - /* Generate CF from input operand. */ op1 = expand_normal (arg0); op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); - emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx)); - /* Gen ADCX instruction to compute X+Y+CF. */ op2 = expand_normal (arg1); - op3 = expand_normal (arg2); - - if (!REG_P (op2)) + if (!register_operand (op2, mode0)) op2 = copy_to_mode_reg (mode0, op2); - if (!REG_P (op3)) - op3 = copy_to_mode_reg (mode0, op3); - - op0 = gen_reg_rtx (mode0); - op4 = gen_rtx_REG (CCCmode, FLAGS_REG); - pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx); - emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat)); + op3 = expand_normal (arg2); + if (!register_operand (op3, mode0)) + op3 = copy_to_mode_reg (mode0, op3); - /* Store the result. */ op4 = expand_normal (arg3); if (!address_operand (op4, VOIDmode)) { op4 = convert_memory_address (Pmode, op4); op4 = copy_addr_to_reg (op4); } - emit_move_insn (gen_rtx_MEM (mode0, op4), op0); + + /* Generate CF from input operand. */ + emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); + + /* Generate instruction that consumes CF. */ + op0 = gen_reg_rtx (mode0); + + op1 = gen_rtx_REG (CCCmode, FLAGS_REG); + pat = gen_rtx_LTU (mode0, op1, const0_rtx); + emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat)); /* Return current CF value. */ if (target == 0) @@ -39571,6 +39568,10 @@ addcarryx: PUT_MODE (pat, QImode); emit_insn (gen_rtx_SET (target, pat)); + + /* Store the result. */ + emit_move_insn (gen_rtx_MEM (mode0, op4), op0); + return target; case IX86_BUILTIN_READ_FLAGS: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e6c2d30e507..7017913afe2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -102,7 +102,6 @@ UNSPEC_SAHF UNSPEC_PARITY UNSPEC_FSTCW - UNSPEC_ADD_CARRY UNSPEC_FLDCW UNSPEC_REP UNSPEC_LD_MPIC ; load_macho_picbase @@ -848,8 +847,6 @@ (define_code_attr plusminus_mnemonic [(plus "add") (ss_plus "adds") (us_plus "addus") (minus "sub") (ss_minus "subs") (us_minus "subus")]) -(define_code_attr plusminus_carry_mnemonic - [(plus "adc") (minus "sbb")]) (define_code_attr multdiv_mnemonic [(mult "mul") (div "div")]) @@ -5317,46 +5314,21 @@ "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)" "#" "reload_completed" - [(parallel [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 1) (match_dup 2)] - UNSPEC_ADD_CARRY)) + [(parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:DWIH (match_dup 1) (match_dup 2)) + (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (plus:DWIH - (match_dup 4) (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (match_dup 5)))) + (match_dup 4)) + (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") -(define_insn "*add<mode>3_cc" - [(set (reg:CC FLAGS_REG) - (unspec:CC - [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI48 2 "<general_operand>" "r<i>,rm")] - UNSPEC_ADD_CARRY)) - (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") - (plus:SWI48 (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" - "add{<imodesuffix>}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") - (set_attr "mode" "<MODE>")]) - -(define_insn "addqi3_cc" - [(set (reg:CC FLAGS_REG) - (unspec:CC - [(match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qn,qm")] - UNSPEC_ADD_CARRY)) - (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") - (plus:QI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, QImode, operands)" - "add{b}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - (define_insn "*add<mode>_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r") (plus:SWI48 @@ -6264,10 +6236,10 @@ (minus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (minus:DWIH - (match_dup 4) - (plus:DWIH - (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (match_dup 5)))) + (minus:DWIH + (match_dup 4) + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) + (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") @@ -6431,29 +6403,17 @@ ;; Add with carry and subtract with borrow -(define_expand "<plusminus_insn><mode>3_carry" - [(parallel - [(set (match_operand:SWI 0 "nonimmediate_operand") - (plusminus:SWI - (match_operand:SWI 1 "nonimmediate_operand") - (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" - [(match_operand 3 "flags_reg_operand") - (const_int 0)]) - (match_operand:SWI 2 "<general_operand>")))) - (clobber (reg:CC FLAGS_REG))])] - "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)") - -(define_insn "*<plusminus_insn><mode>3_carry" +(define_insn "add<mode>3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") - (plusminus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0") + (plus:SWI (plus:SWI - (match_operator 3 "ix86_carry_flag_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))) + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" - "<plusminus_carry_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}" + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") @@ -6462,10 +6422,11 @@ (define_insn "*addsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (plus:SI (match_operator 3 "ix86_carry_flag_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 2 "x86_64_general_operand" "rme"))))) + (plus:SI + (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 1 "register_operand" "%0")) + (match_operand:SI 2 "x86_64_general_operand" "rme")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %k0|%k0, %2}" @@ -6474,45 +6435,96 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) +;; There is no point to generate ADCX instruction. ADC is shorter and faster. + +(define_insn "addcarry<mode>" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 1 "nonimmediate_operand" "%0")) + (match_operand:SWI48 2 "nonimmediate_operand" "rm")) + (match_dup 1))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (plus:SWI48 (plus:SWI48 (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "adc{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + +(define_insn "sub<mode>3_carry" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") + (minus:SWI + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)])) + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "sbb{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*subsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "0") - (plus:SI (match_operator 3 "ix86_carry_flag_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 2 "x86_64_general_operand" "rme"))))) + (minus:SI + (minus:SI + (match_operand:SI 1 "register_operand" "0") + (match_operator:SI 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)])) + (match_operand:SI 2 "x86_64_general_operand" "rme")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) - -;; ADCX instruction -(define_insn "adcx<mode>3" +(define_insn "subborrow<mode>" [(set (reg:CCC FLAGS_REG) (compare:CCC + (match_operand:SWI48 1 "nonimmediate_operand" "0") (plus:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0") - (plus:SWI48 - (match_operator 4 "ix86_carry_flag_operator" - [(match_operand 3 "flags_reg_operand") (const_int 0)]) - (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) - (const_int 0))) + (match_operator:SWI48 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 2 "nonimmediate_operand" "rm")))) (set (match_operand:SWI48 0 "register_operand" "=r") - (plus:SWI48 (match_dup 1) - (plus:SWI48 (match_op_dup 4 - [(match_dup 3) (const_int 0)]) - (match_dup 2))))] - "TARGET_ADX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" - "adcx\t{%2, %0|%0, %2}" + (minus:SWI48 (minus:SWI48 (match_dup 1) + (match_op_dup 4 + [(match_dup 3) (const_int 0)])) + (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "sbb{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) ;; Overflow setting add instructions +(define_expand "addqi3_cconly_overflow" + [(parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:QI + (match_operand:QI 0 "nonimmediate_operand") + (match_operand:QI 1 "general_operand")) + (match_dup 0))) + (clobber (match_scratch:QI 2))])] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))") + (define_insn "*add<mode>3_cconly_overflow" [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -8842,9 +8854,9 @@ (set (match_dup 0) (neg:DWIH (match_dup 1)))]) (parallel [(set (match_dup 2) - (plus:DWIH (match_dup 3) - (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) - (const_int 0)))) + (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 2) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index a9c8623ada2..bc76a5b7cee 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -912,25 +912,9 @@ /* Return true if operand is a vector constant that is all ones. */ (define_predicate "vector_all_ones_operand" - (match_code "const_vector") -{ - int nunits = GET_MODE_NUNITS (mode); - - if (GET_CODE (op) == CONST_VECTOR - && CONST_VECTOR_NUNITS (op) == nunits) - { - int i; - for (i = 0; i < nunits; ++i) - { - rtx x = CONST_VECTOR_ELT (op, i); - if (x != constm1_rtx) - return false; - } - return true; - } - - return false; -}) + (and (match_code "const_vector") + (match_test "INTEGRAL_MODE_P (GET_MODE (op))") + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) ; Return true when OP is operand acceptable for standard SSE move. (define_predicate "vector_move_operand" diff --git a/gcc/config/iq2000/iq2000.md b/gcc/config/iq2000/iq2000.md index e87cb6802e3..bba67600d96 100644 --- a/gcc/config/iq2000/iq2000.md +++ b/gcc/config/iq2000/iq2000.md @@ -1708,7 +1708,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { diff --git a/gcc/config/m32c/blkmov.md b/gcc/config/m32c/blkmov.md index 88d04066d16..02ad3455bd1 100644 --- a/gcc/config/m32c/blkmov.md +++ b/gcc/config/m32c/blkmov.md @@ -178,10 +178,10 @@ ;; 3 = alignment (define_expand "cmpstrsi" - [(match_operand:HI 0 "" "") - (match_operand 1 "ap_operand" "") - (match_operand 2 "ap_operand" "") - (match_operand 3 "" "") + [(match_operand:HI 0 "register_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "memory_operand" "") + (match_operand 3 "const_int_operand" "") ] "TARGET_A24" "if (m32c_expand_cmpstr(operands)) DONE; FAIL;" diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md index 7f4195a9f2f..463c8277b43 100644 --- a/gcc/config/m68k/m68k.md +++ b/gcc/config/m68k/m68k.md @@ -6908,7 +6908,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c index 40a659a8d7b..6e7745ad137 100644 --- a/gcc/config/microblaze/microblaze.c +++ b/gcc/config/microblaze/microblaze.c @@ -661,7 +661,7 @@ microblaze_classify_unspec (struct microblaze_address_info *info, rtx x) else if (XINT (x, 1) == UNSPEC_TLS) { info->type = ADDRESS_TLS; - info->tls_type = tls_reloc INTVAL(XVECEXP(x, 0, 1)); + info->tls_type = tls_reloc (INTVAL (XVECEXP (x, 0, 1))); } else { diff --git a/gcc/config/mips/mips-opts.h b/gcc/config/mips/mips-opts.h index 79882051595..3c2c6590e3d 100644 --- a/gcc/config/mips/mips-opts.h +++ b/gcc/config/mips/mips-opts.h @@ -47,4 +47,10 @@ enum mips_r10k_cache_barrier_setting { #define MIPS_ARCH_OPTION_FROM_ABI -1 #define MIPS_ARCH_OPTION_NATIVE -2 +/* Enumerates the setting of the -mcompact-branches= option. */ +enum mips_cb_setting { + MIPS_CB_NEVER, + MIPS_CB_OPTIMAL, + MIPS_CB_ALWAYS +}; #endif diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index d9ad9100f99..8a9ae0147ed 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -298,6 +298,9 @@ extern const char *mips_output_conditional_branch (rtx_insn *, rtx *, const char *, const char *); extern const char *mips_output_order_conditional_branch (rtx_insn *, rtx *, bool); +extern const char *mips_output_equal_conditional_branch (rtx_insn *, rtx *, + bool); +extern const char *mips_output_jump (rtx *, int, int, bool); extern const char *mips_output_sync (void); extern const char *mips_output_sync_loop (rtx_insn *, rtx *); extern unsigned int mips_sync_loop_insns (rtx_insn *, rtx *); diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 401d73bfeaa..0e0ecf232d9 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -176,7 +176,8 @@ along with GCC; see the file COPYING3. If not see /* Return the opcode to jump to register DEST. When the JR opcode is not available use JALR $0, DEST. */ #define MIPS_JR(DEST) \ - (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9)) + (TARGET_CB_ALWAYS ? ((0x1b << 27) | ((DEST) << 16)) \ + : (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9))) /* Return the opcode for: @@ -5181,7 +5182,8 @@ mips_allocate_fcc (machine_mode mode) conditions are: - EQ or NE between two registers. - - any comparison between a register and zero. */ + - any comparison between a register and zero. + - if compact branches are available then any condition is valid. */ static void mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p) @@ -5203,6 +5205,44 @@ mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p) else *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1); } + else if (!need_eq_ne_p && TARGET_CB_MAYBE) + { + bool swap = false; + switch (*code) + { + case LE: + swap = true; + *code = GE; + break; + case GT: + swap = true; + *code = LT; + break; + case LEU: + swap = true; + *code = GEU; + break; + case GTU: + swap = true; + *code = LTU; + break; + case GE: + case LT: + case GEU: + case LTU: + /* Do nothing. */ + break; + default: + gcc_unreachable (); + } + *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1); + if (swap) + { + rtx tmp = *op1; + *op1 = *op0; + *op0 = tmp; + } + } else { /* The comparison needs a separate scc instruction. Store the @@ -7260,7 +7300,7 @@ mips16_build_call_stub (rtx retval, rtx *fn_ptr, rtx args_size, int fp_code) if (fp_ret_p) { /* Now call the non-MIPS16 function. */ - output_asm_insn (MIPS_CALL ("jal", &fn, 0, -1), &fn); + output_asm_insn (mips_output_jump (&fn, 0, -1, true), &fn); fprintf (asm_out_file, "\t.cfi_register 31,18\n"); /* Move the result from floating-point registers to @@ -7630,12 +7670,22 @@ mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) half-word alignment, it is usually better to move in half words. For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr. - Otherwise move word-sized chunks. */ - if (MEM_ALIGN (src) == BITS_PER_WORD / 2 - && MEM_ALIGN (dest) == BITS_PER_WORD / 2) - bits = BITS_PER_WORD / 2; + Otherwise move word-sized chunks. + + For ISA_HAS_LWL_LWR we rely on the lwl/lwr & swl/swr load. Otherwise + picking the minimum of alignment or BITS_PER_WORD gets us the + desired size for bits. */ + + if (!ISA_HAS_LWL_LWR) + bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))); else - bits = BITS_PER_WORD; + { + if (MEM_ALIGN (src) == BITS_PER_WORD / 2 + && MEM_ALIGN (dest) == BITS_PER_WORD / 2) + bits = BITS_PER_WORD / 2; + else + bits = BITS_PER_WORD; + } mode = mode_for_size (bits, MODE_INT, 0); delta = bits / BITS_PER_UNIT; @@ -7754,8 +7804,9 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, bool mips_expand_block_move (rtx dest, rtx src, rtx length) { - /* Disable entirely for R6 initially. */ - if (!ISA_HAS_LWL_LWR) + if (!ISA_HAS_LWL_LWR + && (MEM_ALIGN (src) < MIPS_MIN_MOVE_MEM_ALIGN + || MEM_ALIGN (dest) < MIPS_MIN_MOVE_MEM_ALIGN)) return false; if (CONST_INT_P (length)) @@ -8367,7 +8418,7 @@ mips_pop_asm_switch (struct mips_asm_switch *asm_switch) '!' Print "s" to use the short version if the delay slot contains a 16-bit instruction. - See also mips_init_print_operand_pucnt. */ + See also mips_init_print_operand_punct. */ static void mips_print_operand_punctuation (FILE *file, int ch) @@ -8451,7 +8502,8 @@ mips_print_operand_punctuation (FILE *file, int ch) case ':': /* When final_sequence is 0, the delay slot will be a nop. We can - use the compact version for microMIPS. */ + use the compact version where available. The %: formatter will + only be present if a compact form of the branch is available. */ if (final_sequence == 0) putc ('c', file); break; @@ -8459,8 +8511,9 @@ mips_print_operand_punctuation (FILE *file, int ch) case '!': /* If the delay slot instruction is short, then use the compact version. */ - if (final_sequence == 0 - || get_attr_length (final_sequence->insn (1)) == 2) + if (TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED && mips_isa_rev <= 5 + && (final_sequence == 0 + || get_attr_length (final_sequence->insn (1)) == 2)) putc ('s', file); break; @@ -12958,6 +13011,7 @@ mips_adjust_insn_length (rtx_insn *insn, int length) break; case HAZARD_DELAY: + case HAZARD_FORBIDDEN_SLOT: length += NOP_INSN_LENGTH; break; @@ -12969,6 +13023,78 @@ mips_adjust_insn_length (rtx_insn *insn, int length) return length; } +/* Return the asm template for a call. OPERANDS are the operands, TARGET_OPNO + is the operand number of the target. SIZE_OPNO is the operand number of + the argument size operand that can optionally hold the call attributes. If + SIZE_OPNO is not -1 and the call is indirect, use the function symbol from + the call attributes to attach a R_MIPS_JALR relocation to the call. LINK_P + indicates whether the jump is a call and needs to set the link register. + + When generating GOT code without explicit relocation operators, all calls + should use assembly macros. Otherwise, all indirect calls should use "jr" + or "jalr"; we will arrange to restore $gp afterwards if necessary. Finally, + we can only generate direct calls for -mabicalls by temporarily switching + to non-PIC mode. + + For microMIPS jal(r), we try to generate jal(r)s when a 16-bit + instruction is in the delay slot of jal(r). + + Where compact branches are available, we try to use them if the delay slot + has a NOP (or equivalently delay slots were not enabled for the instruction + anyway). */ + +const char * +mips_output_jump (rtx *operands, int target_opno, int size_opno, bool link_p) +{ + static char buffer[300]; + char *s = buffer; + bool reg_p = REG_P (operands[target_opno]); + + const char *and_link = link_p ? "al" : ""; + const char *reg = reg_p ? "r" : ""; + const char *compact = ""; + const char *nop = "%/"; + const char *short_delay = link_p ? "%!" : ""; + const char *insn_name = TARGET_CB_NEVER || reg_p ? "j" : "b"; + + /* Compact branches can only be described when the ISA has support for them + as both the compact formatter '%:' and the delay slot NOP formatter '%/' + work as a mutually exclusive pair. I.e. a NOP is never required if a + compact form is available. */ + if (!final_sequence + && (TARGET_CB_MAYBE + || (ISA_HAS_JRC && !link_p && reg_p))) + { + compact = "c"; + nop = ""; + } + + if (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS) + sprintf (s, "%%*%s%s\t%%%d%%/", insn_name, and_link, target_opno); + else + { + if (!reg_p && TARGET_ABICALLS_PIC2) + s += sprintf (s, ".option\tpic0\n\t"); + + if (reg_p && mips_get_pic_call_symbol (operands, size_opno)) + { + s += sprintf (s, "%%*.reloc\t1f,R_MIPS_JALR,%%%d\n1:\t", size_opno); + /* Not sure why this shouldn't permit a short delay but it did not + allow it before so we still don't allow it. */ + short_delay = ""; + } + else + s += sprintf (s, "%%*"); + + s += sprintf (s, "%s%s%s%s%s\t%%%d%s", insn_name, and_link, reg, compact, short_delay, + target_opno, nop); + + if (!reg_p && TARGET_ABICALLS_PIC2) + s += sprintf (s, "\n\t.option\tpic2"); + } + return buffer; +} + /* Return the assembly code for INSN, which has the operands given by OPERANDS, and which branches to OPERANDS[0] if some condition is true. BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0] @@ -13022,12 +13148,25 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands, } /* Output the unconditional branch to TAKEN. */ - if (TARGET_ABSOLUTE_JUMPS) + if (TARGET_ABSOLUTE_JUMPS && TARGET_CB_MAYBE) + { + /* Add a hazard nop. */ + if (!final_sequence) + { + output_asm_insn ("nop\t\t# hazard nop", 0); + fprintf (asm_out_file, "\n"); + } + output_asm_insn (MIPS_ABSOLUTE_JUMP ("bc\t%0"), &taken); + } + else if (TARGET_ABSOLUTE_JUMPS) output_asm_insn (MIPS_ABSOLUTE_JUMP ("j\t%0%/"), &taken); else { mips_output_load_label (taken); - output_asm_insn ("jr\t%@%]%/", 0); + if (TARGET_CB_MAYBE) + output_asm_insn ("jrc\t%@%]", 0); + else + output_asm_insn ("jr\t%@%]%/", 0); } /* Now deal with its delay slot; see above. */ @@ -13041,7 +13180,7 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands, asm_out_file, optimize, 1, NULL); final_sequence->insn (1)->set_deleted (); } - else + else if (TARGET_CB_NEVER) output_asm_insn ("nop", 0); fprintf (asm_out_file, "\n"); } @@ -13053,42 +13192,155 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands, } /* Return the assembly code for INSN, which branches to OPERANDS[0] + if some equality condition is true. The condition is given by + OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of + OPERANDS[1]. OPERANDS[2] is the comparison's first operand; + OPERANDS[3] is the second operand and may be zero or a register. */ + +const char * +mips_output_equal_conditional_branch (rtx_insn* insn, rtx *operands, + bool inverted_p) +{ + const char *branch[2]; + /* For a simple BNEZ or BEQZ microMIPSr3 branch. */ + if (TARGET_MICROMIPS + && mips_isa_rev <= 5 + && operands[3] == const0_rtx + && get_attr_length (insn) <= 8) + { + if (mips_cb == MIPS_CB_OPTIMAL) + { + branch[!inverted_p] = "%*b%C1z%:\t%2,%0"; + branch[inverted_p] = "%*b%N1z%:\t%2,%0"; + } + else + { + branch[!inverted_p] = "%*b%C1z\t%2,%0%/"; + branch[inverted_p] = "%*b%N1z\t%2,%0%/"; + } + } + else if (TARGET_CB_MAYBE) + { + if (operands[3] == const0_rtx) + { + branch[!inverted_p] = MIPS_BRANCH_C ("b%C1z", "%2,%0"); + branch[inverted_p] = MIPS_BRANCH_C ("b%N1z", "%2,%0"); + } + else if (REGNO (operands[2]) != REGNO (operands[3])) + { + branch[!inverted_p] = MIPS_BRANCH_C ("b%C1", "%2,%3,%0"); + branch[inverted_p] = MIPS_BRANCH_C ("b%N1", "%2,%3,%0"); + } + else + { + /* This case is degenerate. It should not happen, but does. */ + if (GET_CODE (operands[1]) == NE) + inverted_p = !inverted_p; + + branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0"); + branch[inverted_p] = "%*\t\t# branch never"; + } + } + else + { + branch[!inverted_p] = MIPS_BRANCH ("b%C1", "%2,%z3,%0"); + branch[inverted_p] = MIPS_BRANCH ("b%N1", "%2,%z3,%0"); + } + + return mips_output_conditional_branch (insn, operands, branch[1], branch[0]); +} + +/* Return the assembly code for INSN, which branches to OPERANDS[0] if some ordering condition is true. The condition is given by OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of OPERANDS[1]. OPERANDS[2] is the comparison's first operand; - its second is always zero. */ + OPERANDS[3] is the second operand and may be zero or a register. */ const char * -mips_output_order_conditional_branch (rtx_insn *insn, rtx *operands, bool inverted_p) +mips_output_order_conditional_branch (rtx_insn *insn, rtx *operands, + bool inverted_p) { const char *branch[2]; /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true. Make BRANCH[0] branch on the inverse condition. */ - switch (GET_CODE (operands[1])) + if (operands[3] != const0_rtx) { - /* These cases are equivalent to comparisons against zero. */ - case LEU: - inverted_p = !inverted_p; - /* Fall through. */ - case GTU: - branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0"); - branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0"); - break; + /* Handle degenerate cases that should not, but do, occur. */ + if (REGNO (operands[2]) == REGNO (operands[3])) + { + switch (GET_CODE (operands[1])) + { + case LT: + case LTU: + inverted_p = !inverted_p; + /* Fall through. */ + case GE: + case GEU: + branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0"); + branch[inverted_p] = "%*\t\t# branch never"; + break; + default: + gcc_unreachable (); + } + } + else + { + branch[!inverted_p] = MIPS_BRANCH_C ("b%C1", "%2,%3,%0"); + branch[inverted_p] = MIPS_BRANCH_C ("b%N1", "%2,%3,%0"); + } + } + else + { + switch (GET_CODE (operands[1])) + { + /* These cases are equivalent to comparisons against zero. */ + case LEU: + inverted_p = !inverted_p; + /* Fall through. */ + case GTU: + if (TARGET_CB_MAYBE) + { + branch[!inverted_p] = MIPS_BRANCH_C ("bnez", "%2,%0"); + branch[inverted_p] = MIPS_BRANCH_C ("beqz", "%2,%0"); + } + else + { + branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0"); + branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0"); + } + break; - /* These cases are always true or always false. */ - case LTU: - inverted_p = !inverted_p; - /* Fall through. */ - case GEU: - branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0"); - branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0"); - break; + /* These cases are always true or always false. */ + case LTU: + inverted_p = !inverted_p; + /* Fall through. */ + case GEU: + if (TARGET_CB_MAYBE) + { + branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0"); + branch[inverted_p] = "%*\t\t# branch never"; + } + else + { + branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0"); + branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0"); + } + break; - default: - branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0"); - branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0"); - break; + default: + if (TARGET_CB_MAYBE) + { + branch[!inverted_p] = MIPS_BRANCH_C ("b%C1z", "%2,%0"); + branch[inverted_p] = MIPS_BRANCH_C ("b%N1z", "%2,%0"); + } + else + { + branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0"); + branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0"); + } + break; + } } return mips_output_conditional_branch (insn, operands, branch[1], branch[0]); } @@ -13291,11 +13543,18 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands) at, oldval, inclusive_mask, NULL); tmp1 = at; } - mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL); + if (TARGET_CB_NEVER) + mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL); /* CMP = 0 [delay slot]. */ if (cmp) mips_multi_add_insn ("li\t%0,0", cmp, NULL); + + if (TARGET_CB_MAYBE && required_oldval == const0_rtx) + mips_multi_add_insn ("bnezc\t%0,2f", tmp1, NULL); + else if (TARGET_CB_MAYBE) + mips_multi_add_insn ("bnec\t%0,%1,2f", tmp1, required_oldval, NULL); + } /* $TMP1 = OLDVAL & EXCLUSIVE_MASK. */ @@ -13358,7 +13617,10 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands) be annulled. To ensure this behaviour unconditionally use a NOP in the delay slot for the branch likely case. */ - mips_multi_add_insn ("beq%?\t%0,%.,1b%~", at, NULL); + if (TARGET_CB_MAYBE) + mips_multi_add_insn ("beqzc\t%0,1b", at, NULL); + else + mips_multi_add_insn ("beq%?\t%0,%.,1b%~", at, NULL); /* if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot]. */ if (insn1 != SYNC_INSN1_MOVE && insn1 != SYNC_INSN1_LI && tmp3 != newval) @@ -16640,7 +16902,7 @@ mips_orphaned_high_part_p (mips_offset_table *htab, rtx_insn *insn) static void mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay, - rtx *delayed_reg, rtx lo_reg) + rtx *delayed_reg, rtx lo_reg, bool *fs_delay) { rtx pattern, set; int nops, ninsns; @@ -16666,6 +16928,15 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay, nops = 2 - *hilo_delay; else if (*delayed_reg != 0 && reg_referenced_p (*delayed_reg, pattern)) nops = 1; + /* If processing a forbidden slot hazard then a NOP is required if the + branch instruction was not in a sequence (as the sequence would + imply it is not actually a compact branch anyway) and the current + insn is not an inline asm, and can't go in a delay slot. */ + else if (*fs_delay && get_attr_can_delay (insn) == CAN_DELAY_NO + && GET_CODE (PATTERN (after)) != SEQUENCE + && GET_CODE (pattern) != ASM_INPUT + && asm_noperands (pattern) < 0) + nops = 1; else nops = 0; @@ -16678,12 +16949,18 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay, /* Set up the state for the next instruction. */ *hilo_delay += ninsns; *delayed_reg = 0; + *fs_delay = false; if (INSN_CODE (insn) >= 0) switch (get_attr_hazard (insn)) { case HAZARD_NONE: break; + case HAZARD_FORBIDDEN_SLOT: + if (TARGET_CB_MAYBE) + *fs_delay = true; + break; + case HAZARD_HILO: *hilo_delay = 0; break; @@ -16707,6 +16984,7 @@ mips_reorg_process_insns (void) rtx_insn *insn, *last_insn, *subinsn, *next_insn; rtx lo_reg, delayed_reg; int hilo_delay; + bool fs_delay; /* Force all instructions to be split into their final form. */ split_all_insns_noflow (); @@ -16775,6 +17053,7 @@ mips_reorg_process_insns (void) hilo_delay = 2; delayed_reg = 0; lo_reg = gen_rtx_REG (SImode, LO_REGNUM); + fs_delay = false; /* Make a second pass over the instructions. Delete orphaned high-part relocations or turn them into NOPs. Avoid hazards @@ -16798,7 +17077,7 @@ mips_reorg_process_insns (void) INSN_CODE (subinsn) = CODE_FOR_nop; } mips_avoid_hazard (last_insn, subinsn, &hilo_delay, - &delayed_reg, lo_reg); + &delayed_reg, lo_reg, &fs_delay); } last_insn = insn; } @@ -16819,7 +17098,7 @@ mips_reorg_process_insns (void) else { mips_avoid_hazard (last_insn, insn, &hilo_delay, - &delayed_reg, lo_reg); + &delayed_reg, lo_reg, &fs_delay); last_insn = insn; } } @@ -17684,6 +17963,27 @@ mips_option_override (void) target_flags |= MASK_ODD_SPREG; } + if (!ISA_HAS_COMPACT_BRANCHES && mips_cb == MIPS_CB_ALWAYS) + { + error ("unsupported combination: %qs%s %s", + mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "", + "-mcompact-branches=always"); + } + else if (!ISA_HAS_DELAY_SLOTS && mips_cb == MIPS_CB_NEVER) + { + error ("unsupported combination: %qs%s %s", + mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "", + "-mcompact-branches=never"); + } + + /* Require explicit relocs for MIPS R6 onwards. This enables simplification + of the compact branch and jump support through the backend. */ + if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6) + { + error ("unsupported combination: %qs %s", + mips_arch_info->name, "-mno-explicit-relocs"); + } + /* The effect of -mabicalls isn't defined for the EABI. */ if (mips_abi == ABI_EABI && TARGET_ABICALLS) { @@ -18703,6 +19003,18 @@ mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) #undef OP + /* If we are using compact branches we don't have delay slots so + place the instruction that was in the delay slot before the JRC + instruction. */ + + if (TARGET_CB_ALWAYS) + { + rtx temp; + temp = trampoline[i-2]; + trampoline[i-2] = trampoline[i-1]; + trampoline[i-1] = temp; + } + /* Copy the trampoline code. Leave any padding uninitialized. */ for (j = 0; j < i; j++) { diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index da1de011fc9..25a1e0622cd 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -92,6 +92,33 @@ struct mips_cpu_info { /* True if we are generating position-independent VxWorks RTP code. */ #define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic) +/* Compact branches must not be used if the user either selects the + 'never' policy or the 'optimal' policy on a core that lacks + compact branch instructions. */ +#define TARGET_CB_NEVER (mips_cb == MIPS_CB_NEVER \ + || (mips_cb == MIPS_CB_OPTIMAL \ + && !ISA_HAS_COMPACT_BRANCHES)) + +/* Compact branches may be used if the user either selects the + 'always' policy or the 'optimal' policy on a core that supports + compact branch instructions. */ +#define TARGET_CB_MAYBE (TARGET_CB_ALWAYS \ + || (mips_cb == MIPS_CB_OPTIMAL \ + && ISA_HAS_COMPACT_BRANCHES)) + +/* Compact branches must always be generated if the user selects + the 'always' policy or the 'optimal' policy om a core that + lacks delay slot branch instructions. */ +#define TARGET_CB_ALWAYS (mips_cb == MIPS_CB_ALWAYS \ + || (mips_cb == MIPS_CB_OPTIMAL \ + && !ISA_HAS_DELAY_SLOTS)) + +/* Special handling for JRC that exists in microMIPSR3 as well as R6 + ISAs with full compact branch support. */ +#define ISA_HAS_JRC ((ISA_HAS_COMPACT_BRANCHES \ + || TARGET_MICROMIPS) \ + && mips_cb != MIPS_CB_NEVER) + /* True if the output file is marked as ".abicalls; .option pic0" (-call_nonpic). */ #define TARGET_ABICALLS_PIC0 \ @@ -872,6 +899,10 @@ struct mips_cpu_info { #define ISA_HAS_JR (mips_isa_rev <= 5) +#define ISA_HAS_DELAY_SLOTS 1 + +#define ISA_HAS_COMPACT_BRANCHES (mips_isa_rev >= 6) + /* ISA has branch likely instructions (e.g. mips2). */ /* Disable branchlikely for tx39 until compare rewrite. They haven't been generated up to this point. */ @@ -2645,6 +2676,9 @@ typedef struct mips_args { #define MIPS_BRANCH(OPCODE, OPERANDS) \ "%*" OPCODE "%?\t" OPERANDS "%/" +#define MIPS_BRANCH_C(OPCODE, OPERANDS) \ + "%*" OPCODE "%:\t" OPERANDS + /* Return an asm string that forces INSN to be treated as an absolute J or JAL instruction instead of an assembler macro. */ #define MIPS_ABSOLUTE_JUMP(INSN) \ @@ -2652,45 +2686,6 @@ typedef struct mips_args { ? ".option\tpic0\n\t" INSN "\n\t.option\tpic2" \ : INSN) -/* Return the asm template for a call. INSN is the instruction's mnemonic - ("j" or "jal"), OPERANDS are its operands, TARGET_OPNO is the operand - number of the target. SIZE_OPNO is the operand number of the argument size - operand that can optionally hold the call attributes. If SIZE_OPNO is not - -1 and the call is indirect, use the function symbol from the call - attributes to attach a R_MIPS_JALR relocation to the call. - - When generating GOT code without explicit relocation operators, - all calls should use assembly macros. Otherwise, all indirect - calls should use "jr" or "jalr"; we will arrange to restore $gp - afterwards if necessary. Finally, we can only generate direct - calls for -mabicalls by temporarily switching to non-PIC mode. - - For microMIPS jal(r), we try to generate jal(r)s when a 16-bit - instruction is in the delay slot of jal(r). */ -#define MIPS_CALL(INSN, OPERANDS, TARGET_OPNO, SIZE_OPNO) \ - (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS \ - ? "%*" INSN "\t%" #TARGET_OPNO "%/" \ - : REG_P (OPERANDS[TARGET_OPNO]) \ - ? (mips_get_pic_call_symbol (OPERANDS, SIZE_OPNO) \ - ? ("%*.reloc\t1f,R_MIPS_JALR,%" #SIZE_OPNO "\n" \ - "1:\t" INSN "r\t%" #TARGET_OPNO "%/") \ - : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED \ - ? "%*" INSN "r%!\t%" #TARGET_OPNO "%/" \ - : "%*" INSN "r\t%" #TARGET_OPNO "%/") \ - : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED \ - ? MIPS_ABSOLUTE_JUMP ("%*" INSN "%!\t%" #TARGET_OPNO "%/") \ - : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #TARGET_OPNO "%/")) \ - -/* Similar to MIPS_CALL, but this is for MICROMIPS "j" to generate - "jrc" when nop is in the delay slot of "jr". */ - -#define MICROMIPS_J(INSN, OPERANDS, OPNO) \ - (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS \ - ? "%*j\t%" #OPNO "%/" \ - : REG_P (OPERANDS[OPNO]) \ - ? "%*jr%:\t%" #OPNO \ - : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #OPNO "%/")) - /* Control the assembler format that we output. */ @@ -2981,6 +2976,9 @@ while (0) #undef PTRDIFF_TYPE #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") +/* The minimum alignment of any expanded block move. */ +#define MIPS_MIN_MOVE_MEM_ALIGN 16 + /* The maximum number of bytes that can be copied by one iteration of a movmemsi loop; see mips_block_move_loop. */ #define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \ diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index a0079d5c974..1d1c42bf5f5 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -409,6 +409,15 @@ (eq_attr "sync_mem" "!none") (const_string "syncloop")] (const_string "unknown"))) +(define_attr "compact_form" "always,maybe,never" + (cond [(eq_attr "jal" "direct") + (const_string "always") + (eq_attr "jal" "indirect") + (const_string "maybe") + (eq_attr "type" "jump") + (const_string "maybe")] + (const_string "never"))) + ;; Mode for conversion types (fcvt) ;; I2S integer to float single (SI/DI to SF) ;; I2D integer to float double (SI/DI to DF) @@ -694,7 +703,7 @@ ;; DELAY means that the next instruction cannot read the result ;; of this one. HILO means that the next two instructions cannot ;; write to HI or LO. -(define_attr "hazard" "none,delay,hilo" +(define_attr "hazard" "none,delay,hilo,forbidden_slot" (cond [(and (eq_attr "type" "load,fpload,fpidxload") (match_test "ISA_HAS_LOAD_DELAY")) (const_string "delay") @@ -1045,21 +1054,37 @@ (nil) (eq_attr "can_delay" "yes")]) -;; Branches that don't have likely variants do not annul on false. +;; Branches that have delay slots and don't have likely variants do +;; not annul on false. (define_delay (and (eq_attr "type" "branch") (not (match_test "TARGET_MIPS16")) + (ior (match_test "TARGET_CB_NEVER") + (and (eq_attr "compact_form" "maybe") + (not (match_test "TARGET_CB_ALWAYS"))) + (eq_attr "compact_form" "never")) (eq_attr "branch_likely" "no")) [(eq_attr "can_delay" "yes") (nil) (nil)]) -(define_delay (eq_attr "type" "jump") +(define_delay (and (eq_attr "type" "jump") + (ior (match_test "TARGET_CB_NEVER") + (and (eq_attr "compact_form" "maybe") + (not (match_test "TARGET_CB_ALWAYS"))) + (eq_attr "compact_form" "never"))) [(eq_attr "can_delay" "yes") (nil) (nil)]) +;; Call type instructions should never have a compact form as the +;; type is only used for MIPS16 patterns. For safety put the compact +;; branch detection condition in anyway. (define_delay (and (eq_attr "type" "call") - (eq_attr "jal_macro" "no")) + (eq_attr "jal_macro" "no") + (ior (match_test "TARGET_CB_NEVER") + (and (eq_attr "compact_form" "maybe") + (not (match_test "TARGET_CB_ALWAYS"))) + (eq_attr "compact_form" "never"))) [(eq_attr "can_delay" "yes") (nil) (nil)]) @@ -5813,25 +5838,29 @@ [(set (pc) (if_then_else (match_operator 1 "order_operator" - [(match_operand:GPR 2 "register_operand" "d") - (const_int 0)]) + [(match_operand:GPR 2 "register_operand" "d,d") + (match_operand:GPR 3 "reg_or_0_operand" "J,d")]) (label_ref (match_operand 0 "" "")) (pc)))] "!TARGET_MIPS16" { return mips_output_order_conditional_branch (insn, operands, false); } - [(set_attr "type" "branch")]) + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe,always") + (set_attr "hazard" "forbidden_slot")]) (define_insn "*branch_order<mode>_inverted" [(set (pc) (if_then_else (match_operator 1 "order_operator" - [(match_operand:GPR 2 "register_operand" "d") - (const_int 0)]) + [(match_operand:GPR 2 "register_operand" "d,d") + (match_operand:GPR 3 "reg_or_0_operand" "J,d")]) (pc) (label_ref (match_operand 0 "" ""))))] "!TARGET_MIPS16" { return mips_output_order_conditional_branch (insn, operands, true); } - [(set_attr "type" "branch")]) + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe,always") + (set_attr "hazard" "forbidden_slot")]) ;; Conditional branch on equality comparison. @@ -5844,20 +5873,10 @@ (label_ref (match_operand 0 "" "")) (pc)))] "!TARGET_MIPS16" -{ - /* For a simple BNEZ or BEQZ microMIPS branch. */ - if (TARGET_MICROMIPS - && operands[3] == const0_rtx - && get_attr_length (insn) <= 8) - return mips_output_conditional_branch (insn, operands, - "%*b%C1z%:\t%2,%0", - "%*b%N1z%:\t%2,%0"); - - return mips_output_conditional_branch (insn, operands, - MIPS_BRANCH ("b%C1", "%2,%z3,%0"), - MIPS_BRANCH ("b%N1", "%2,%z3,%0")); -} - [(set_attr "type" "branch")]) + { return mips_output_equal_conditional_branch (insn, operands, false); } + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe") + (set_attr "hazard" "forbidden_slot")]) (define_insn "*branch_equality<mode>_inverted" [(set (pc) @@ -5868,20 +5887,10 @@ (pc) (label_ref (match_operand 0 "" ""))))] "!TARGET_MIPS16" -{ - /* For a simple BNEZ or BEQZ microMIPS branch. */ - if (TARGET_MICROMIPS - && operands[3] == const0_rtx - && get_attr_length (insn) <= 8) - return mips_output_conditional_branch (insn, operands, - "%*b%N0z%:\t%2,%1", - "%*b%C0z%:\t%2,%1"); - - return mips_output_conditional_branch (insn, operands, - MIPS_BRANCH ("b%N1", "%2,%z3,%0"), - MIPS_BRANCH ("b%C1", "%2,%z3,%0")); -} - [(set_attr "type" "branch")]) + { return mips_output_equal_conditional_branch (insn, operands, true); } + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe") + (set_attr "hazard" "forbidden_slot")]) ;; MIPS16 branches @@ -6176,11 +6185,22 @@ "!TARGET_MIPS16 && TARGET_ABSOLUTE_JUMPS" { if (get_attr_length (insn) <= 8) - return "%*b\t%l0%/"; + { + if (TARGET_CB_MAYBE) + return MIPS_ABSOLUTE_JUMP ("%*b%:\t%l0"); + else + return MIPS_ABSOLUTE_JUMP ("%*b\t%l0%/"); + } else - return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/"); + { + if (TARGET_CB_MAYBE && !final_sequence) + return MIPS_ABSOLUTE_JUMP ("%*bc\t%l0"); + else + return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/"); + } } - [(set_attr "type" "branch")]) + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe")]) (define_insn "*jump_pic" [(set (pc) @@ -6188,14 +6208,23 @@ "!TARGET_MIPS16 && !TARGET_ABSOLUTE_JUMPS" { if (get_attr_length (insn) <= 8) - return "%*b\t%l0%/"; + { + if (TARGET_CB_MAYBE) + return "%*b%:\t%l0"; + else + return "%*b\t%l0%/"; + } else { mips_output_load_label (operands[0]); - return "%*jr\t%@%/%]"; + if (TARGET_CB_MAYBE) + return "%*jr%:\t%@%]"; + else + return "%*jr\t%@%/%]"; } } - [(set_attr "type" "branch")]) + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe")]) ;; We need a different insn for the mips16, because a mips16 branch ;; does not have a delay slot. @@ -6242,12 +6271,9 @@ (define_insn "indirect_jump_<mode>" [(set (pc) (match_operand:P 0 "register_operand" "d"))] "" -{ - if (TARGET_MICROMIPS) - return "%*jr%:\t%0"; - else - return "%*j\t%0%/"; -} + { + return mips_output_jump (operands, 0, -1, false); + } [(set_attr "type" "jump") (set_attr "mode" "none")]) @@ -6291,12 +6317,9 @@ (match_operand:P 0 "register_operand" "d")) (use (label_ref (match_operand 1 "" "")))] "" -{ - if (TARGET_MICROMIPS) - return "%*jr%:\t%0"; - else - return "%*j\t%0%/"; -} + { + return mips_output_jump (operands, 0, -1, false); + } [(set_attr "type" "jump") (set_attr "mode" "none")]) @@ -6508,10 +6531,8 @@ [(any_return)] "" { - if (TARGET_MICROMIPS) - return "%*jr%:\t$31"; - else - return "%*j\t$31%/"; + operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + return mips_output_jump (operands, 0, -1, false); } [(set_attr "type" "jump") (set_attr "mode" "none")]) @@ -6522,12 +6543,10 @@ [(any_return) (use (match_operand 0 "pmode_register_operand" ""))] "" -{ - if (TARGET_MICROMIPS) - return "%*jr%:\t%0"; - else - return "%*j\t%0%/"; -} + { + operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + return mips_output_jump (operands, 0, -1, false); + } [(set_attr "type" "jump") (set_attr "mode" "none")]) @@ -6783,12 +6802,7 @@ [(call (mem:SI (match_operand 0 "call_insn_operand" "j,S")) (match_operand 1 "" ""))] "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" -{ - if (TARGET_MICROMIPS) - return MICROMIPS_J ("j", operands, 0); - else - return MIPS_CALL ("j", operands, 0, 1); -} + { return mips_output_jump (operands, 0, 1, false); } [(set_attr "jal" "indirect,direct") (set_attr "jal_macro" "no")]) @@ -6809,12 +6823,7 @@ (call (mem:SI (match_operand 1 "call_insn_operand" "j,S")) (match_operand 2 "" "")))] "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" -{ - if (TARGET_MICROMIPS) - return MICROMIPS_J ("j", operands, 1); - else - return MIPS_CALL ("j", operands, 1, 2); -} + { return mips_output_jump (operands, 1, 2, false); } [(set_attr "jal" "indirect,direct") (set_attr "jal_macro" "no")]) @@ -6826,12 +6835,7 @@ (call (mem:SI (match_dup 1)) (match_dup 2)))] "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" -{ - if (TARGET_MICROMIPS) - return MICROMIPS_J ("j", operands, 1); - else - return MIPS_CALL ("j", operands, 1, 2); -} + { return mips_output_jump (operands, 1, 2, false); } [(set_attr "jal" "indirect,direct") (set_attr "jal_macro" "no")]) @@ -6887,7 +6891,10 @@ (match_operand 1 "" "")) (clobber (reg:SI RETURN_ADDR_REGNUM))] "" - { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, 1); } + { + return (TARGET_SPLIT_CALLS ? "#" + : mips_output_jump (operands, 0, 1, true)); + } "reload_completed && TARGET_SPLIT_CALLS" [(const_int 0)] { @@ -6902,7 +6909,7 @@ (clobber (reg:SI RETURN_ADDR_REGNUM)) (clobber (reg:SI 28))] "TARGET_SPLIT_CALLS" - { return MIPS_CALL ("jal", operands, 0, 1); } + { return mips_output_jump (operands, 0, 1, true); } [(set_attr "jal" "indirect,direct") (set_attr "jal_macro" "no")]) @@ -6916,7 +6923,10 @@ (const_int 1) (clobber (reg:SI RETURN_ADDR_REGNUM))] "" - { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, -1); } + { + return (TARGET_SPLIT_CALLS ? "#" + : mips_output_jump (operands, 0, -1, true)); + } "reload_completed && TARGET_SPLIT_CALLS" [(const_int 0)] { @@ -6933,7 +6943,7 @@ (clobber (reg:SI RETURN_ADDR_REGNUM)) (clobber (reg:SI 28))] "TARGET_SPLIT_CALLS" - { return MIPS_CALL ("jal", operands, 0, -1); } + { return mips_output_jump (operands, 0, -1, true); } [(set_attr "jal" "direct") (set_attr "jal_macro" "no")]) @@ -6956,7 +6966,10 @@ (match_operand 2 "" ""))) (clobber (reg:SI RETURN_ADDR_REGNUM))] "" - { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); } + { + return (TARGET_SPLIT_CALLS ? "#" + : mips_output_jump (operands, 1, 2, true)); + } "reload_completed && TARGET_SPLIT_CALLS" [(const_int 0)] { @@ -6974,7 +6987,7 @@ (clobber (reg:SI RETURN_ADDR_REGNUM)) (clobber (reg:SI 28))] "TARGET_SPLIT_CALLS" - { return MIPS_CALL ("jal", operands, 1, 2); } + { return mips_output_jump (operands, 1, 2, true); } [(set_attr "jal" "indirect,direct") (set_attr "jal_macro" "no")]) @@ -6986,7 +6999,10 @@ (const_int 1) (clobber (reg:SI RETURN_ADDR_REGNUM))] "" - { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, -1); } + { + return (TARGET_SPLIT_CALLS ? "#" + : mips_output_jump (operands, 1, -1, true)); + } "reload_completed && TARGET_SPLIT_CALLS" [(const_int 0)] { @@ -7005,7 +7021,7 @@ (clobber (reg:SI RETURN_ADDR_REGNUM)) (clobber (reg:SI 28))] "TARGET_SPLIT_CALLS" - { return MIPS_CALL ("jal", operands, 1, -1); } + { return mips_output_jump (operands, 1, -1, true); } [(set_attr "jal" "direct") (set_attr "jal_macro" "no")]) @@ -7019,7 +7035,10 @@ (match_dup 2))) (clobber (reg:SI RETURN_ADDR_REGNUM))] "" - { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); } + { + return (TARGET_SPLIT_CALLS ? "#" + : mips_output_jump (operands, 1, 2, true)); + } "reload_completed && TARGET_SPLIT_CALLS" [(const_int 0)] { @@ -7040,7 +7059,7 @@ (clobber (reg:SI RETURN_ADDR_REGNUM)) (clobber (reg:SI 28))] "TARGET_SPLIT_CALLS" - { return MIPS_CALL ("jal", operands, 1, 2); } + { return mips_output_jump (operands, 1, 2, true); } [(set_attr "jal" "indirect,direct") (set_attr "jal_macro" "no")]) @@ -7055,7 +7074,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -7411,7 +7430,7 @@ (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM)) (clobber (reg:P RETURN_ADDR_REGNUM))] "HAVE_AS_TLS && TARGET_MIPS16" - { return MIPS_CALL ("jal", operands, 0, -1); } + { return mips_output_jump (operands, 0, -1, true); } [(set_attr "type" "call") (set_attr "insn_count" "3") (set_attr "mode" "<MODE>")]) @@ -7452,7 +7471,7 @@ (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM)) (clobber (reg:P RETURN_ADDR_REGNUM))] "TARGET_HARD_FLOAT_ABI && TARGET_MIPS16" - { return MIPS_CALL ("jal", operands, 0, -1); } + { return mips_output_jump (operands, 0, -1, true); } [(set_attr "type" "call") (set_attr "insn_count" "3")]) @@ -7482,7 +7501,7 @@ (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM)) (clobber (reg:P RETURN_ADDR_REGNUM))] "TARGET_HARD_FLOAT_ABI && TARGET_MIPS16" - { return MIPS_CALL ("jal", operands, 0, -1); } + { return mips_output_jump (operands, 0, -1, true); } [(set_attr "type" "call") (set_attr "insn_count" "3")]) diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index 348c6e03f1e..84887d11623 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -418,3 +418,20 @@ Driver mload-store-pairs Target Report Var(TARGET_LOAD_STORE_PAIRS) Init(1) Enable load/store bonding. + +mcompact-branches= +Target RejectNegative JoinedOrMissing Var(mips_cb) Report Enum(mips_cb_setting) Init(MIPS_CB_OPTIMAL) +Specify the compact branch usage policy + +Enum +Name(mips_cb_setting) Type(enum mips_cb_setting) +Policies available for use with -mcompact-branches=: + +EnumValue +Enum(mips_cb_setting) String(never) Value(MIPS_CB_NEVER) + +EnumValue +Enum(mips_cb_setting) String(optimal) Value(MIPS_CB_OPTIMAL) + +EnumValue +Enum(mips_cb_setting) String(always) Value(MIPS_CB_ALWAYS) diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md index 4929c3dc27e..3259232bb89 100644 --- a/gcc/config/mips/predicates.md +++ b/gcc/config/mips/predicates.md @@ -475,7 +475,18 @@ (match_code "eq,ne,lt,ltu,ge,geu")) (define_predicate "order_operator" - (match_code "lt,ltu,le,leu,ge,geu,gt,gtu")) + (match_code "lt,ltu,le,leu,ge,geu,gt,gtu") +{ + if (XEXP (op, 1) == const0_rtx) + return true; + + if (TARGET_CB_MAYBE + && (GET_CODE (op) == LT || GET_CODE (op) == LTU + || GET_CODE (op) == GE || GET_CODE (op) == GEU)) + return true; + + return false; +}) ;; For NE, cstore uses sltu instructions in which the first operand is $0. ;; This isn't possible in mips16 code. diff --git a/gcc/config/msp430/msp430.opt b/gcc/config/msp430/msp430.opt index 3fed8799822..e055f61069d 100644 --- a/gcc/config/msp430/msp430.opt +++ b/gcc/config/msp430/msp430.opt @@ -12,7 +12,7 @@ Specify the MCU to build for. mcpu= Target Report Joined RejectNegative Var(target_cpu) -Specify the ISA to build for: msp430, mdsp430x, msp430xv2 +Specify the ISA to build for: msp430, msp430x, msp430xv2 mlarge Target Report Mask(LARGE) RejectNegative diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c index 1e154c8412c..ba0454e537a 100644 --- a/gcc/config/nvptx/mkoffload.c +++ b/gcc/config/nvptx/mkoffload.c @@ -881,10 +881,10 @@ process (FILE *in, FILE *out) "extern \"C\" {\n" "#endif\n"); - fprintf (out, "extern void GOMP_offload_register" - " (const void *, int, const void *);\n"); - fprintf (out, "extern void GOMP_offload_unregister" - " (const void *, int, const void *);\n"); + fprintf (out, "extern void GOMP_offload_register_ver" + " (unsigned, const void *, int, const void *);\n"); + fprintf (out, "extern void GOMP_offload_unregister_ver" + " (unsigned, const void *, int, const void *);\n"); fprintf (out, "#ifdef __cplusplus\n" "}\n" @@ -894,15 +894,19 @@ process (FILE *in, FILE *out) fprintf (out, "static __attribute__((constructor)) void init (void)\n" "{\n" - " GOMP_offload_register (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n" - " &target_data);\n" - "};\n", GOMP_DEVICE_NVIDIA_PTX); + " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__," + "%d/*NVIDIA_PTX*/, &target_data);\n" + "};\n", + GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX), + GOMP_DEVICE_NVIDIA_PTX); fprintf (out, "static __attribute__((destructor)) void fini (void)\n" "{\n" - " GOMP_offload_unregister (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n" - " &target_data);\n" - "};\n", GOMP_DEVICE_NVIDIA_PTX); + " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__," + "%d/*NVIDIA_PTX*/, &target_data);\n" + "};\n", + GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX), + GOMP_DEVICE_NVIDIA_PTX); } static void diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index a3569670d62..e6853680078 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -1,4 +1,3 @@ - /* Target code for NVPTX. Copyright (C) 2014-2015 Free Software Foundation, Inc. Contributed by Bernd Schmidt <bernds@codesourcery.com> @@ -322,7 +321,8 @@ nvptx_write_function_decl (std::stringstream &s, const char *name, const_tree de /* Declare argument types. */ if ((args != NULL_TREE - && !(TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) == void_type_node)) + && !(TREE_CODE (args) == TREE_LIST + && TREE_VALUE (args) == void_type_node)) || is_main || return_in_mem || DECL_STATIC_CHAIN (decl)) @@ -406,8 +406,8 @@ walk_args_for_param (FILE *file, tree argtypes, tree args, bool write_copy, mode = DFmode; } - mode = arg_promotion (mode); } + mode = arg_promotion (mode); while (count-- > 0) { i++; @@ -546,7 +546,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl) else if (TYPE_MODE (result_type) != VOIDmode) { machine_mode mode = arg_promotion (TYPE_MODE (result_type)); - fprintf (file, ".reg%s %%retval;\n", + fprintf (file, "\t.reg%s %%retval;\n", nvptx_ptx_type_from_mode (mode, false)); } @@ -598,9 +598,11 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl) sz = get_frame_size (); if (sz > 0 || cfun->machine->has_call_with_sc) { + int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT; + fprintf (file, "\t.reg.u%d %%frame;\n" - "\t.local.align 8 .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n", - BITS_PER_WORD, sz == 0 ? 1 : sz); + "\t.local.align %d .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n", + BITS_PER_WORD, alignment, sz == 0 ? 1 : sz); fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n", BITS_PER_WORD); } @@ -616,10 +618,10 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl) walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl), true, return_in_mem); if (return_in_mem) - fprintf (file, "ld.param.u%d %%ar1, [%%in_ar1];\n", + fprintf (file, "\tld.param.u%d %%ar1, [%%in_ar1];\n", GET_MODE_BITSIZE (Pmode)); if (stdarg_p (fntype)) - fprintf (file, "ld.param.u%d %%argp, [%%in_argp];\n", + fprintf (file, "\tld.param.u%d %%argp, [%%in_argp];\n", GET_MODE_BITSIZE (Pmode)); } @@ -726,6 +728,14 @@ nvptx_function_ok_for_sibcall (tree, tree) return false; } +/* Return Dynamic ReAlignment Pointer RTX. For PTX there isn't any. */ + +static rtx +nvptx_get_drap_rtx (void) +{ + return NULL_RTX; +} + /* Implement the TARGET_CALL_ARGS hook. Record information about one argument to the next call. */ @@ -1908,7 +1918,7 @@ nvptx_reorg_subreg (void) { next = NEXT_INSN (insn); if (!NONDEBUG_INSN_P (insn) - || asm_noperands (insn) >= 0 + || asm_noperands (PATTERN (insn)) >= 0 || GET_CODE (PATTERN (insn)) == USE || GET_CODE (PATTERN (insn)) == CLOBBER) continue; @@ -2118,6 +2128,8 @@ nvptx_file_end (void) #define TARGET_LIBCALL_VALUE nvptx_libcall_value #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall +#undef TARGET_GET_DRAP_RTX +#define TARGET_GET_DRAP_RTX nvptx_get_drap_rtx #undef TARGET_SPLIT_COMPLEX_ARG #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true #undef TARGET_RETURN_IN_MEMORY diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index afe4fcdd361..60a922af93f 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -52,6 +52,8 @@ #define BIGGEST_ALIGNMENT 64 #define STRICT_ALIGNMENT 1 +#define MAX_STACK_ALIGNMENT (1024 * 8) + /* Copied from elf.h and other places. We'd otherwise use BIGGEST_ALIGNMENT and fail a number of testcases. */ #define MAX_OFILE_ALIGNMENT (32768 * 8) diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 7c2cb9c15bc..b857e53bb22 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1241,6 +1241,12 @@ (match_operand 1 "nvptx_register_operand")] "" { + /* The ptx documentation specifies an alloca intrinsic (for 32 bit + only) but notes it is not implemented. The assembler emits a + confused error message. Issue a blunt one now instead. */ + sorry ("target cannot support alloca."); + emit_insn (gen_nop ()); + DONE; if (TARGET_ABI64) emit_insn (gen_allocate_stack_di (operands[0], operands[1])); else diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index ad9289236ff..46fc0f5719c 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -8248,7 +8248,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 3ef6bc85ecd..1c00099c78d 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -208,6 +208,8 @@ #define vec_lvebx __builtin_vec_lvebx #define vec_lvehx __builtin_vec_lvehx #define vec_lvewx __builtin_vec_lvewx +#define vec_pmsum_be __builtin_vec_vpmsum +#define vec_shasigma_be __builtin_crypto_vshasigma /* Cell only intrinsics. */ #ifdef __PPU__ #define vec_lvlx __builtin_vec_lvlx diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ae74796849d..3edb4774e75 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -239,6 +239,25 @@ return INT_REGNO_P (REGNO (op)); }) +;; Like int_reg_operand, but don't return true for pseudo registers +(define_predicate "int_reg_operand_not_pseudo" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 0; + + return INT_REGNO_P (REGNO (op)); +}) + ;; Like int_reg_operand, but only return true for base registers (define_predicate "base_reg_operand" (match_operand 0 "int_reg_operand") @@ -883,12 +902,12 @@ (define_predicate "current_file_function_operand" (and (match_code "symbol_ref") (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op)) - && ((SYMBOL_REF_LOCAL_P (op) - && ((DEFAULT_ABI != ABI_AIX - && DEFAULT_ABI != ABI_ELFv2) - || !SYMBOL_REF_EXTERNAL_P (op))) - || (op == XEXP (DECL_RTL (current_function_decl), - 0)))"))) + && (SYMBOL_REF_LOCAL_P (op) + || op == XEXP (DECL_RTL (current_function_decl), 0)) + && !((DEFAULT_ABI == ABI_AIX + || DEFAULT_ABI == ABI_ELFv2) + && (SYMBOL_REF_EXTERNAL_P (op) + || SYMBOL_REF_WEAK (op)))"))) ;; Return 1 if this operand is a valid input for a move insn. (define_predicate "input_operand" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 7beddf64d1b..85082ec0ee2 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1489,6 +1489,10 @@ BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpksdus) +BU_P8V_AV_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_P8V_AV_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_P8V_AV_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_P8V_AV_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3) BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3) BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) @@ -1570,6 +1574,7 @@ BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss") BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus") BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum") BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus") +BU_P8V_OVERLOAD_2 (VPMSUM, "vpmsum") BU_P8V_OVERLOAD_2 (VRLD, "vrld") BU_P8V_OVERLOAD_2 (VSLD, "vsld") BU_P8V_OVERLOAD_2 (VSRAD, "vsrad") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index d45bc93b10a..5fc2b53adfe 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -2937,6 +2937,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, @@ -4171,6 +4179,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 9fd565286f2..03764aef740 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -53,6 +53,7 @@ | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_HTM \ | OPTION_MASK_QUAD_MEMORY \ | OPTION_MASK_QUAD_MEMORY_ATOMIC \ @@ -78,6 +79,7 @@ | OPTION_MASK_DFP \ | OPTION_MASK_DIRECT_MOVE \ | OPTION_MASK_DLMZB \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_FPRND \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 7262a151438..7be529fab49 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -54,6 +54,7 @@ extern const char *output_vec_const_move (rtx *); extern const char *rs6000_output_move_128bit (rtx *); extern bool rs6000_move_128bit_ok_p (rtx []); extern bool rs6000_split_128bit_ok_p (rtx []); +extern void rs6000_expand_float128_convert (rtx, rtx, bool); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2a969782f26..8107bec8e6e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3716,6 +3716,45 @@ rs6000_option_override_internal (bool global_init_p) else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX) error ("-mfloat128-software requires VSX support"); + /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 + support. If we only have ISA 2.06 support, and the user did not specify + the switch, leave it set to -1 so the movmisalign patterns are enabled, + but we don't enable the full vectorization support */ + if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) + TARGET_ALLOW_MOVMISALIGN = 1; + + else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) + { + if (TARGET_ALLOW_MOVMISALIGN > 0) + error ("-mallow-movmisalign requires -mvsx"); + + TARGET_ALLOW_MOVMISALIGN = 0; + } + + /* Determine when unaligned vector accesses are permitted, and when + they are preferred over masked Altivec loads. Note that if + TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then + TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is + not true. */ + if (TARGET_EFFICIENT_UNALIGNED_VSX) + { + if (!TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mvsx"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + + else if (!TARGET_ALLOW_MOVMISALIGN) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mallow-movmisalign"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + } + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -4275,22 +4314,6 @@ rs6000_option_override_internal (bool global_init_p) } } - /* Determine when unaligned vector accesses are permitted, and when - they are preferred over masked Altivec loads. Note that if - TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then - TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is - not true. */ - if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) { - if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8 - && TARGET_ALLOW_MOVMISALIGN != 0) - TARGET_EFFICIENT_UNALIGNED_VSX = 1; - else - TARGET_EFFICIENT_UNALIGNED_VSX = 0; - } - - if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8) - TARGET_ALLOW_MOVMISALIGN = 1; - /* Set the builtin mask of the various options used that could affect which builtins were used. In the past we used target_flags, but we've run out of bits, and some options like SPE and PAIRED are no longer in @@ -8462,7 +8485,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) during expand. */ gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); - /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, + /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, V1TImode). */ if (mode == TImode || mode == V1TImode) { @@ -18519,6 +18542,8 @@ rs6000_cannot_change_mode_class (machine_mode from, { unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to]; unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from]; + bool to_float128_vector_p = FLOAT128_VECTOR_P (to); + bool from_float128_vector_p = FLOAT128_VECTOR_P (from); /* Don't allow 64-bit types to overlap with 128-bit types that take a single register under VSX because the scalar part of the register @@ -18527,7 +18552,10 @@ rs6000_cannot_change_mode_class (machine_mode from, IEEE floating point can't overlap, and neither can small values. */ - if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode)) + if (to_float128_vector_p && from_float128_vector_p) + return false; + + else if (to_float128_vector_p || from_float128_vector_p) return true; /* TDmode in floating-mode registers must always go into a register @@ -18555,6 +18583,8 @@ rs6000_cannot_change_mode_class (machine_mode from, if (TARGET_E500_DOUBLE && ((((to) == DFmode) + ((from) == DFmode)) == 1 || (((to) == TFmode) + ((from) == TFmode)) == 1 + || (((to) == IFmode) + ((from) == IFmode)) == 1 + || (((to) == KFmode) + ((from) == KFmode)) == 1 || (((to) == DDmode) + ((from) == DDmode)) == 1 || (((to) == TDmode) + ((from) == TDmode)) == 1 || (((to) == DImode) + ((from) == DImode)) == 1)) @@ -18751,13 +18781,7 @@ rs6000_output_move_128bit (rtx operands[]) return output_vec_const_move (operands); } - if (TARGET_DEBUG_ADDR) - { - fprintf (stderr, "\n===== Bad 128 bit move:\n"); - debug_rtx (gen_rtx_SET (dest, src)); - } - - gcc_unreachable (); + fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); } /* Validate a 128-bit move. */ @@ -19801,6 +19825,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfeq_gpr (compare_result, op0, op1) : gen_cmptfeq_gpr (compare_result, op0, op1); @@ -19828,6 +19854,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfgt_gpr (compare_result, op0, op1) : gen_cmptfgt_gpr (compare_result, op0, op1); @@ -19855,6 +19883,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttflt_gpr (compare_result, op0, op1) : gen_cmptflt_gpr (compare_result, op0, op1); @@ -19892,6 +19922,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfeq_gpr (compare_result2, op0, op1) : gen_cmptfeq_gpr (compare_result2, op0, op1); @@ -19914,14 +19946,117 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) emit_insn (cmp); } + + /* IEEE 128-bit support in VSX registers. The comparison function (__cmpkf2) + returns 0..15 that is laid out the same way as the PowerPC CR register + would for a normal floating point comparison. */ + else if (FLOAT128_IEEE_P (mode)) + { + rtx and_reg = gen_reg_rtx (SImode); + rtx dest = gen_reg_rtx (SImode); + rtx libfunc = optab_libfunc (cmp_optab, mode); + HOST_WIDE_INT mask_value = 0; + + /* Values that __cmpkf2 returns. */ +#define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */ +#define PPC_CMP_EQUAL 0x2 /* a == b. */ +#define PPC_CMP_GREATER_THEN 0x4 /* a > b. */ +#define PPC_CMP_LESS_THEN 0x8 /* a < b. */ + + switch (code) + { + case EQ: + mask_value = PPC_CMP_EQUAL; + code = NE; + break; + + case NE: + mask_value = PPC_CMP_EQUAL; + code = EQ; + break; + + case GT: + mask_value = PPC_CMP_GREATER_THEN; + code = NE; + break; + + case GE: + mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL; + code = NE; + break; + + case LT: + mask_value = PPC_CMP_LESS_THEN; + code = NE; + break; + + case LE: + mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL; + code = NE; + break; + + case UNLE: + mask_value = PPC_CMP_GREATER_THEN; + code = EQ; + break; + + case UNLT: + mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL; + code = EQ; + break; + + case UNGE: + mask_value = PPC_CMP_LESS_THEN; + code = EQ; + break; + + case UNGT: + mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL; + code = EQ; + break; + + case UNEQ: + mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED; + code = NE; + + case LTGT: + mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED; + code = EQ; + break; + + case UNORDERED: + mask_value = PPC_CMP_UNORDERED; + code = NE; + break; + + case ORDERED: + mask_value = PPC_CMP_UNORDERED; + code = EQ; + break; + + default: + gcc_unreachable (); + } + + gcc_assert (mask_value != 0); + and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2, + op0, mode, op1, mode); + + emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value))); + compare_result = gen_reg_rtx (CCmode); + comp_mode = CCmode; + + emit_insn (gen_rtx_SET (compare_result, + gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); + } + else { /* Generate XLC-compatible TFmode compare as PARALLEL with extra CLOBBERs to match cmptf_internal2 pattern. */ if (comp_mode == CCFPmode && TARGET_XL_COMPAT - && GET_MODE (op0) == TFmode - && !TARGET_IEEEQUAD - && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128) + && FLOAT128_IBM_P (GET_MODE (op0)) + && TARGET_HARD_FLOAT && TARGET_FPRS) emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (10, gen_rtx_SET (compare_result, @@ -19954,6 +20089,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) /* Some kinds of FP comparisons need an OR operation; under flag_finite_math_only we don't bother. */ if (FLOAT_MODE_P (mode) + && !FLOAT128_IEEE_P (mode) && !flag_finite_math_only && !(TARGET_HARD_FLOAT && !TARGET_FPRS) && (code == LE || code == GE @@ -19993,6 +20129,68 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) } +/* Expand floating point conversion to/from __float128 and __ibm128. */ + +void +rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) +{ + machine_mode dest_mode = GET_MODE (dest); + machine_mode src_mode = GET_MODE (src); + convert_optab cvt = unknown_optab; + rtx libfunc = NULL_RTX; + rtx dest2; + + if (dest_mode == src_mode) + gcc_unreachable (); + + if (FLOAT128_IEEE_P (dest_mode)) + { + if (src_mode == SFmode + || src_mode == DFmode + || FLOAT128_IBM_P (src_mode)) + cvt = sext_optab; + + else if (GET_MODE_CLASS (src_mode) == MODE_INT) + cvt = (unsigned_p) ? ufloat_optab : sfloat_optab; + + else if (FLOAT128_IEEE_P (src_mode)) + emit_move_insn (dest, gen_lowpart (dest_mode, src)); + + else + gcc_unreachable (); + } + + else if (FLOAT128_IEEE_P (src_mode)) + { + if (dest_mode == SFmode + || dest_mode == DFmode + || FLOAT128_IBM_P (dest_mode)) + cvt = trunc_optab; + + else if (GET_MODE_CLASS (dest_mode) == MODE_INT) + cvt = (unsigned_p) ? ufix_optab : sfix_optab; + + else + gcc_unreachable (); + } + + else + gcc_unreachable (); + + gcc_assert (cvt != unknown_optab); + libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); + gcc_assert (libfunc != NULL_RTX); + + dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src, + src_mode); + + gcc_assert (dest != NULL_RTX); + if (!rtx_equal_p (dest, dest2)) + emit_move_insn (dest, dest2); + + return; +} + /* Emit the RTL for an sISEL pattern. */ void @@ -22635,6 +22833,7 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp) || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && decl && !DECL_EXTERNAL (decl) + && !DECL_WEAK (decl) && (*targetm.binds_local_p) (decl)) || (DEFAULT_ABI == ABI_V4 && (!TARGET_SECURE_PLT @@ -32921,6 +33120,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "crypto", OPTION_MASK_CRYPTO, false, true }, { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, { "dlmzb", OPTION_MASK_DLMZB, false, true }, + { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, + false, true }, { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, { "htm", OPTION_MASK_HTM, false, true }, @@ -34786,7 +34987,7 @@ class swap_web_entry : public web_entry_base /* A nonzero value indicates what kind of special handling for this insn is required if doublewords are swapped. Undefined if is_swappable is not set. */ - unsigned int special_handling : 3; + unsigned int special_handling : 4; /* Set if the web represented by this entry cannot be optimized. */ unsigned int web_not_optimizable : 1; /* Set if this insn should be deleted. */ @@ -34800,7 +35001,9 @@ enum special_handling_values { SH_NOSWAP_LD, SH_NOSWAP_ST, SH_EXTRACT, - SH_SPLAT + SH_SPLAT, + SH_XXPERMDI, + SH_CONCAT }; /* Union INSN with all insns containing definitions that reach USE. @@ -34992,6 +35195,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special) *special = SH_EXTRACT; return 1; } + /* An XXPERMDI is ok if we adjust the lanes. Note that if the + XXPERMDI is a swap operation, it will be identified by + insn_is_swap_p and therefore we won't get here. */ + else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT + && (GET_MODE (XEXP (op, 0)) == V4DFmode + || GET_MODE (XEXP (op, 0)) == V4DImode) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 2 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT + && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT) + { + *special = SH_XXPERMDI; + return 1; + } else return 0; @@ -35169,6 +35386,17 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, return 1; } + /* A concatenation of two doublewords is ok if we reverse the + order of the inputs. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == VEC_CONCAT + && (GET_MODE (SET_SRC (body)) == V2DFmode + || GET_MODE (SET_SRC (body)) == V2DImode)) + { + *special = SH_CONCAT; + return 1; + } + /* Otherwise check the operands for vector lane violations. */ return rtx_is_swappable_p (body, special); } @@ -35458,6 +35686,49 @@ adjust_splat (rtx_insn *insn) fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); } +/* Given OP that contains an XXPERMDI operation (that is not a doubleword + swap), reverse the order of the source operands and adjust the indices + of the source lanes to account for doubleword reversal. */ +static void +adjust_xxpermdi (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx select = XEXP (set, 1); + rtx concat = XEXP (select, 0); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + rtx parallel = XEXP (select, 1); + int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); + int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); + int new_lane0 = 3 - lane1; + int new_lane1 = 3 - lane0; + XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); + XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); +} + +/* Given OP that contains a VEC_CONCAT operation of two doublewords, + reverse the order of those inputs. */ +static void +adjust_concat (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx concat = XEXP (set, 1); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); +} + /* The insn described by INSN_ENTRY[I] can be swapped, but only with special handling. Take care of that here. */ static void @@ -35504,6 +35775,14 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) /* Change the lane on a direct-splat operation. */ adjust_splat (insn); break; + case SH_XXPERMDI: + /* Change the lanes on an XXPERMDI operation. */ + adjust_xxpermdi (insn); + break; + case SH_CONCAT: + /* Reverse the order of a concatenation operation. */ + adjust_concat (insn); + break; } } @@ -35576,6 +35855,10 @@ dump_swap_insn_table (swap_web_entry *insn_entry) fputs ("special:extract ", dump_file); else if (insn_entry[i].special_handling == SH_SPLAT) fputs ("special:splat ", dump_file); + else if (insn_entry[i].special_handling == SH_XXPERMDI) + fputs ("special:xxpermdi ", dump_file); + else if (insn_entry[i].special_handling == SH_CONCAT) + fputs ("special:concat ", dump_file); } if (insn_entry[i].web_not_optimizable) fputs ("unoptimizable ", dump_file); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 527ad985423..cfdb286a2cb 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -348,6 +348,8 @@ && TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128") + (IF "TARGET_FLOAT128") + (KF "TARGET_FLOAT128") (DD "TARGET_DFP") (TD "TARGET_DFP")]) @@ -365,9 +367,14 @@ (define_mode_iterator FMOVE32 [SF SD]) (define_mode_iterator FMOVE64 [DF DD]) (define_mode_iterator FMOVE64X [DI DF DD]) -(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128") +(define_mode_iterator FMOVE128 [(TF "TARGET_LONG_DOUBLE_128") + (IF "TARGET_LONG_DOUBLE_128") (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) +(define_mode_iterator FMOVE128_FPR [(TF "FLOAT128_2REG_P (TFmode)") + (IF "FLOAT128_2REG_P (IFmode)") + (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) + ; Iterators for 128 bit types for direct move (define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE") (V16QI "") @@ -376,7 +383,13 @@ (V4SF "") (V2DI "") (V2DF "") - (V1TI "")]) + (V1TI "") + (KF "") + (TF "") + (IF "")]) + +; Iterator for 128-bit VSX types for pack/unpack +(define_mode_iterator FMOVE128_VSX [V1TI KF]) ; Whether a floating point move is ok, don't allow SD without hardware FP (define_mode_attr fmove_ok [(SF "") @@ -432,6 +445,25 @@ ; Iterator for just SF/DF (define_mode_iterator SFDF [SF DF]) +; Iterator for float128 floating conversions +(define_mode_iterator FLOAT128_SFDFTF [ + (SF "TARGET_FLOAT128") + (DF "TARGET_FLOAT128") + (TF "FLOAT128_IBM_P (TFmode)") + (IF "TARGET_FLOAT128")]) + +; Iterator for special 128-bit floating point. This is for non-default +; conversions, so TFmode is not used here. +(define_mode_iterator IFKF [IF KF]) + +; Iterator for 128-bit floating point that uses the IBM double-double format +(define_mode_iterator IBM128 [IF TF]) + +; Iterator for 128-bit floating point +(define_mode_iterator TFIFKF [(KF "TARGET_FLOAT128") + (IF "TARGET_FLOAT128") + (TF "TARGET_LONG_DOUBLE_128")]) + ; SF/DF suffix for traditional floating instructions (define_mode_attr Ftrad [(SF "s") (DF "")]) @@ -596,7 +628,7 @@ ;; Reload iterator for creating the function to allocate a base register to ;; supplement addressing modes. (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI PTI]) + SF SD SI DF DD DI TI PTI KF IF TF]) ;; Start with fixed-point load and store insns. Here we put only the more @@ -3037,15 +3069,15 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "*and<mode>3_imm_dot_shifted" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") +(define_insn "*and<mode>3_imm_dot_shifted" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") (compare:CC (and:GPR - (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") - (match_operand:SI 4 "const_int_operand" "n,n")) - (match_operand:GPR 2 "const_int_operand" "n,n")) + (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r") + (match_operand:SI 4 "const_int_operand" "n")) + (match_operand:GPR 2 "const_int_operand" "n")) (const_int 0))) - (clobber (match_scratch:GPR 0 "=r,r"))] + (clobber (match_scratch:GPR 0 "=r"))] "logical_const_operand (GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4])), DImode) @@ -3054,23 +3086,10 @@ && rs6000_gen_cell_microcode" { operands[2] = GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4])); - if (which_alternative == 0) - return "andi%e2. %0,%1,%u2"; - else - return "#"; + return "andi%e2. %0,%1,%u2"; } - "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" - [(set (match_dup 0) - (and:GPR (lshiftrt:GPR (match_dup 1) - (match_dup 4)) - (match_dup 2))) - (set (match_dup 3) - (compare:CC (match_dup 0) - (const_int 0)))] - "" [(set_attr "type" "logical") - (set_attr "dot" "yes") - (set_attr "length" "4,8")]) + (set_attr "dot" "yes")]) (define_insn "and<mode>3_mask" @@ -3664,10 +3683,10 @@ ; an insert instruction, in many cases. (define_insn_and_split "*ior<mode>_mask" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") - (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") - (match_operand:GPR 2 "const_int_operand" "n")))] - "can_create_pseudo_p () - && !logical_const_operand (operands[2], <MODE>mode) + (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "0") + (match_operand:GPR 2 "const_int_operand" "n"))) + (clobber (match_scratch:GPR 3 "=r"))] + "!logical_const_operand (operands[2], <MODE>mode) && rs6000_is_valid_mask (operands[2], NULL, NULL, <MODE>mode)" "#" "&& 1" @@ -3682,7 +3701,8 @@ { int nb, ne; rs6000_is_valid_mask (operands[2], &nb, &ne, <MODE>mode); - operands[3] = gen_reg_rtx (<MODE>mode); + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = gen_reg_rtx (<MODE>mode); operands[4] = GEN_INT (ne); operands[5] = GEN_INT (~UINTVAL (operands[2])); } @@ -4216,19 +4236,18 @@ ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in ;; builtins.c and optabs.c that are not correct for IBM long double ;; when little-endian. -(define_expand "signbittf2" +(define_expand "signbit<mode>2" [(set (match_dup 2) - (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" ""))) + (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" ""))) (set (match_dup 3) (subreg:DI (match_dup 2) 0)) (set (match_dup 4) (match_dup 5)) (set (match_operand:SI 0 "gpc_reg_operand" "") (match_dup 6))] - "!TARGET_IEEEQUAD + "FLOAT128_IBM_P (<MODE>mode) && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) - && TARGET_LONG_DOUBLE_128" + && (TARGET_FPRS || TARGET_E500_DOUBLE)" { operands[2] = gen_reg_rtx (DFmode); operands[3] = gen_reg_rtx (DImode); @@ -6402,9 +6421,10 @@ ;; problematical. Don't allow direct move for this case. (define_insn_and_split "*mov<mode>_64bit_dm" - [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm") - (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))] + [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm") + (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 + && FLOAT128_2REG_P (<MODE>mode) && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -6427,9 +6447,12 @@ [(set_attr "length" "8,8,8,8,12,12,8")]) (define_insn_and_split "*mov<mode>_32bit" - [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r") - (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r"))] + [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r") + (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r"))] "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64 + && (FLOAT128_2REG_P (<MODE>mode) + || int_reg_operand_not_pseudo (operands[0], <MODE>mode) + || int_reg_operand_not_pseudo (operands[1], <MODE>mode)) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" "#" @@ -6453,12 +6476,12 @@ (define_expand "extenddftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") (float_extend:TF (match_operand:DF 1 "input_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - if (TARGET_E500_DOUBLE) + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) emit_insn (gen_spe_extenddftf2 (operands[0], operands[1])); else emit_insn (gen_extenddftf2_fprs (operands[0], operands[1])); @@ -6507,25 +6530,34 @@ (define_expand "extendsftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") (float_extend:TF (match_operand:SF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - rtx tmp = gen_reg_rtx (DFmode); - emit_insn (gen_extendsfdf2 (tmp, operands[1])); - emit_insn (gen_extenddftf2 (operands[0], tmp)); + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else + { + rtx tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extendsfdf2 (tmp, operands[1])); + emit_insn (gen_extenddftf2 (operands[0], tmp)); + } DONE; }) (define_expand "trunctfdf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" - "") +{ + if (TARGET_IEEEQUAD) + { + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; + } +}) (define_insn_and_split "trunctfdf2_internal1" [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d") @@ -6556,12 +6588,13 @@ (define_expand "trunctfsf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") (float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - if (TARGET_E500_DOUBLE) + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1])); else emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1])); @@ -6612,10 +6645,12 @@ (define_expand "fix_trunctfsi2" [(set (match_operand:SI 0 "gpc_reg_operand" "") (fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - if (TARGET_E500_DOUBLE) + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) emit_insn (gen_spe_fix_trunctfsi2 (operands[0], operands[1])); else emit_insn (gen_fix_trunctfsi2_fprs (operands[0], operands[1])); @@ -6663,20 +6698,73 @@ DONE; }) -(define_expand "negtf2" - [(set (match_operand:TF 0 "gpc_reg_operand" "") - (neg:TF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) - && TARGET_LONG_DOUBLE_128" - "") +(define_expand "fix_trunctfdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:TF 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "fixuns_trunctf<mode>2" + [(set (match_operand:SDI 0 "nonimmediate_operand" "") + (unsigned_fix:SDI (match_operand:TF 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + +(define_expand "floatditf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float:TF (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "floatuns<mode>tf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (unsigned_float:TF (match_operand:SDI 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + +(define_expand "neg<mode>2" + [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "") + (neg:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))] + "FLOAT128_IEEE_P (<MODE>mode) + || (FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE))" + " +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + { + if (TARGET_FLOAT128) + emit_insn (gen_ieee_128bit_vsx_neg<mode>2 (operands[0], operands[1])); + else + { + rtx libfunc = optab_libfunc (neg_optab, <MODE>mode); + rtx target = emit_library_call_value (libfunc, operands[0], LCT_CONST, + <MODE>mode, 1, + operands[1], <MODE>mode); + + if (target && !rtx_equal_p (target, operands[0])) + emit_move_insn (operands[0], target); + } + DONE; + } +}") (define_insn "negtf2_internal" [(set (match_operand:TF 0 "gpc_reg_operand" "=d") (neg:TF (match_operand:TF 1 "gpc_reg_operand" "d")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT && TARGET_FPRS && FLOAT128_IBM_P (TFmode)" "* { if (REGNO (operands[0]) == REGNO (operands[1]) + 1) @@ -6687,16 +6775,29 @@ [(set_attr "type" "fp") (set_attr "length" "8")]) -(define_expand "abstf2" - [(set (match_operand:TF 0 "gpc_reg_operand" "") - (abs:TF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) - && TARGET_LONG_DOUBLE_128" +(define_expand "abs<mode>2" + [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "") + (abs:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))] + "FLOAT128_IEEE_P (<MODE>mode) + || (FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE))" " { - rtx label = gen_label_rtx (); + rtx label; + + if (FLOAT128_IEEE_P (<MODE>mode)) + { + if (TARGET_FLOAT128) + { + emit_insn (gen_ieee_128bit_vsx_abs<mode>2 (operands[0], operands[1])); + DONE; + } + else + FAIL; + } + + label = gen_label_rtx (); if (TARGET_E500_DOUBLE) { if (flag_finite_math_only && !flag_trapping_math) @@ -6732,6 +6833,184 @@ operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word); }") + + +;; Generate IEEE 128-bit -0.0 (0x80000000000000000000000000000000) in a vector +;; register + +(define_expand "ieee_128bit_negative_zero" + [(set (match_operand:V16QI 0 "register_operand" "") (match_dup 1))] + "TARGET_FLOAT128" +{ + rtvec v = rtvec_alloc (16); + int i, high; + + for (i = 0; i < 16; i++) + RTVEC_ELT (v, i) = const0_rtx; + + high = (BYTES_BIG_ENDIAN) ? 0 : 15; + RTVEC_ELT (v, high) = GEN_INT (0x80); + + rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v)); + DONE; +}) + +;; IEEE 128-bit negate + +;; We have 2 insns here for negate and absolute value. The first uses +;; match_scratch so that phases like combine can recognize neg/abs as generic +;; insns, and second insn after the first split pass loads up the bit to +;; twiddle the sign bit. Later GCSE passes can then combine multiple uses of +;; neg/abs to create the constant just once. + +(define_insn_and_split "ieee_128bit_vsx_neg<mode>2" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (neg:TFIFKF (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_neg<mode>2_internal" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (use (match_operand:V16QI 2 "register_operand" "=v"))] + "TARGET_FLOAT128" + "xxlxor %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +;; IEEE 128-bit absolute value +(define_insn_and_split "ieee_128bit_vsx_abs<mode>2" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (abs:TFIFKF (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_abs<mode>2_internal" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (use (match_operand:V16QI 2 "register_operand" "=v"))] + "TARGET_FLOAT128" + "xxlandc %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +;; IEEE 128-bit negative absolute value +(define_insn_and_split "*ieee_128bit_vsx_nabs<mode>2" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF + (abs:TFIFKF + (match_operand:TFIFKF 1 "register_operand" "wa")))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (abs:TFIFKF (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_nabs<mode>2_internal" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF + (abs:TFIFKF + (match_operand:TFIFKF 1 "register_operand" "wa")))) + (use (match_operand:V16QI 2 "register_operand" "=v"))] + "TARGET_FLOAT128" + "xxlor %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +;; Float128 conversion functions. These expand to library function calls. + +(define_expand "extend<FLOAT128_SFDFTF:mode><IFKF:mode>2" + [(set (match_operand:IFKF 0 "nonimmediate_operand" "") + (float_extend:IFKF + (match_operand:FLOAT128_SFDFTF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "trunc<IFKF:mode><FLOAT128_SFDFTF:mode>2" + [(set (match_operand:FLOAT128_SFDFTF 0 "nonimmediate_operand" "") + (float_truncate:FLOAT128_SFDFTF + (match_operand:IFKF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "fix_trunc<IFKF:mode><SDI:mode>2" + [(set (match_operand:SDI 0 "nonimmediate_operand" "") + (fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "fixuns_trunc<IFKF:mode><SDI:mode>2" + [(set (match_operand:SDI 0 "nonimmediate_operand" "") + (unsigned_fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + +(define_expand "float<SDI:mode><IFKF:mode>2" + [(set (match_operand:IFKF 0 "nonimmediate_operand" "") + (float:KF (match_operand:SDI 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "floatuns<SDI:mode><IFKF:mode>2" + [(set (match_operand:IFKF 0 "nonimmediate_operand" "") + (unsigned_float:IFKF (match_operand:SDI 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + ;; Reload helper functions used by rs6000_secondary_reload. The patterns all ;; must have 3 arguments, and scratch register constraint must be a single @@ -9516,7 +9795,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, const0_rtx, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -12134,7 +12413,10 @@ ;; Pack/unpack 128-bit floating point types that take 2 scalar registers ; Type of the 64-bit part when packing/unpacking 128-bit floating point types -(define_mode_attr FP128_64 [(TF "DF") (TD "DI")]) +(define_mode_attr FP128_64 [(TF "DF") + (IF "DF") + (TD "DI") + (KF "DI")]) (define_expand "unpack<mode>" [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "") @@ -12142,7 +12424,7 @@ [(match_operand:FMOVE128 1 "register_operand" "") (match_operand:QI 2 "const_0_to_1_operand" "")] UNSPEC_UNPACK_128BIT))] - "" + "FLOAT128_2REG_P (<MODE>mode)" "") (define_insn_and_split "unpack<mode>_dm" @@ -12151,7 +12433,7 @@ [(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r") (match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")] UNSPEC_UNPACK_128BIT))] - "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && FLOAT128_2REG_P (<MODE>mode)" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 3))] @@ -12175,7 +12457,7 @@ [(match_operand:FMOVE128 1 "register_operand" "d,d") (match_operand:QI 2 "const_0_to_1_operand" "i,i")] UNSPEC_UNPACK_128BIT))] - "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE" + "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (<MODE>mode)" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 3))] @@ -12199,7 +12481,7 @@ [(match_operand:<FP128_64> 1 "register_operand" "0,d") (match_operand:<FP128_64> 2 "register_operand" "d,d")] UNSPEC_PACK_128BIT))] - "" + "FLOAT128_2REG_P (<MODE>mode)" "@ fmr %L0,%2 #" @@ -12219,12 +12501,12 @@ [(set_attr "type" "fp,fp") (set_attr "length" "4,8")]) -(define_insn "unpackv1ti" +(define_insn "unpack<mode>" [(set (match_operand:DI 0 "register_operand" "=d,d") - (unspec:DI [(match_operand:V1TI 1 "register_operand" "0,wa") + (unspec:DI [(match_operand:FMOVE128_VSX 1 "register_operand" "0,wa") (match_operand:QI 2 "const_0_to_1_operand" "O,i")] UNSPEC_UNPACK_128BIT))] - "TARGET_VSX" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" { if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0) return ASM_COMMENT_START " xxpermdi to same register"; @@ -12232,19 +12514,17 @@ operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3); return "xxpermdi %x0,%x1,%x1,%3"; } - [(set_attr "type" "vecperm") - (set_attr "length" "4")]) + [(set_attr "type" "vecperm")]) -(define_insn "packv1ti" - [(set (match_operand:V1TI 0 "register_operand" "=wa") - (unspec:V1TI +(define_insn "pack<mode>" + [(set (match_operand:FMOVE128_VSX 0 "register_operand" "=wa") + (unspec:FMOVE128_VSX [(match_operand:DI 1 "register_operand" "d") (match_operand:DI 2 "register_operand" "d")] UNSPEC_PACK_128BIT))] "TARGET_VSX" "xxpermdi %x0,%x1,%x2,0" - [(set_attr "type" "vecperm") - (set_attr "length" "4")]) + [(set_attr "type" "vecperm")]) diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 18ea27a3d90..6d11ff7dfdb 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -212,7 +212,7 @@ Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) Save ; Allow/disallow the movmisalign in DF/DI vectors mefficient-unaligned-vector -Target Undocumented Report Var(TARGET_EFFICIENT_UNALIGNED_VSX) Init(-1) +Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags) ; Consider unaligned VSX accesses to be efficient/inefficient mallow-df-permute diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h index 24618e309f1..f48af43e7c5 100644 --- a/gcc/config/rs6000/sysv4.h +++ b/gcc/config/rs6000/sysv4.h @@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) %{R*} \ %(link_shlib) \ %{!T*: %(link_start) } \ -%(link_target) \ %(link_os)" /* Shared libraries are not default. */ @@ -584,10 +583,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) %{shared:-G -dy -z text } \ %{symbolic:-Bsymbolic -G -dy -z text }" -/* Override the default target of the linker. */ -#define LINK_TARGET_SPEC \ - ENDIAN_SELECT("", " --oformat elf32-powerpcle", "") - /* Any specific OS flags. */ #define LINK_OS_SPEC "\ %{mads : %(link_os_ads) ; \ @@ -873,7 +868,6 @@ ncrtn.o%s" { "endfile_openbsd", ENDFILE_OPENBSD_SPEC }, \ { "endfile_default", ENDFILE_DEFAULT_SPEC }, \ { "link_shlib", LINK_SHLIB_SPEC }, \ - { "link_target", LINK_TARGET_SPEC }, \ { "link_start", LINK_START_SPEC }, \ { "link_start_ads", LINK_START_ADS_SPEC }, \ { "link_start_yellowknife", LINK_START_YELLOWKNIFE_SPEC }, \ diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h index 7b1d6a1b4de..66ee7cadfe4 100644 --- a/gcc/config/rs6000/sysv4le.h +++ b/gcc/config/rs6000/sysv4le.h @@ -25,10 +25,6 @@ #undef DEFAULT_ASM_ENDIAN #define DEFAULT_ASM_ENDIAN " -mlittle" -#undef LINK_TARGET_SPEC -#define LINK_TARGET_SPEC \ - ENDIAN_SELECT(" --oformat elf32-powerpc", "", "") - #undef MULTILIB_DEFAULTS #define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" } diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 4a62fbbbdd4..8821dec5989 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -977,6 +977,8 @@ ;; General shift amounts can be supported using vsro + vsr. We're ;; not expecting to see these yet (the vectorizer currently ;; generates only shifts by a whole number of vector elements). +;; Note that the vec_shr operation is actually defined as +;; 'shift toward element 0' so is a shr for LE and shl for BE. (define_expand "vec_shr_<mode>" [(match_operand:VEC_L 0 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "") @@ -987,6 +989,7 @@ rtx bitshift = operands[2]; rtx shift; rtx insn; + rtx zero_reg, op1, op2; HOST_WIDE_INT bitshift_val; HOST_WIDE_INT byteshift_val; @@ -996,19 +999,29 @@ if (bitshift_val & 0x7) FAIL; byteshift_val = (bitshift_val >> 3); + zero_reg = gen_reg_rtx (<MODE>mode); + emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode)); if (!BYTES_BIG_ENDIAN) - byteshift_val = 16 - byteshift_val; + { + byteshift_val = 16 - byteshift_val; + op1 = zero_reg; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = zero_reg; + } + if (TARGET_VSX && (byteshift_val & 0x3) == 0) { shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); - insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1], - shift); + insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift); } else { shift = gen_rtx_CONST_INT (QImode, byteshift_val); - insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1], - shift); + insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift); } emit_insn (insn); diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index 8b124759b0b..6faf7719a9a 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -2315,7 +2315,7 @@ emit_move_insn (str1, force_operand (XEXP (operands[1], 0), NULL_RTX)); emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX)); - emit_move_insn (len, force_operand (operands[3], NULL_RTX)); + emit_move_insn (len, operands[3]); emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2])); DONE; diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index 0a24da9bcb1..b267b04e2a7 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -438,15 +438,15 @@ B_DEF (s390_vllezf, vec_insert_and_zerov4si,0, B_DEF (s390_vllezg, vec_insert_and_zerov2di,0, B_VX, 0, BT_FN_UV2DI_ULONGLONGCONSTPTR) OB_DEF (s390_vec_load_bndry, s390_vec_load_bndry_s8,s390_vec_load_bndry_dbl,B_VX, BT_FN_OV4SI_INTCONSTPTR_INT) -OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U3, BT_OV_V16QI_SCHARCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U3, BT_OV_UV16QI_UCHARCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U3, BT_OV_V8HI_SHORTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U3, BT_OV_UV8HI_USHORTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U3, BT_OV_V4SI_INTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U3, BT_OV_UV4SI_UINTCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U3, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U3, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT) -OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U3, BT_OV_V2DF_DBLCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U16, BT_OV_V16QI_SCHARCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U16, BT_OV_UV16QI_UCHARCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U16, BT_OV_V8HI_SHORTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U16, BT_OV_UV8HI_USHORTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U16, BT_OV_V4SI_INTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U16, BT_OV_UV4SI_UINTCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U16, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U16, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT) +OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U16, BT_OV_V2DF_DBLCONSTPTR_USHORT) B_DEF (s390_vlbb, vlbb, 0, B_VX, O2_U3, BT_FN_UV16QI_UCHARCONSTPTR_USHORT) diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 5814694adbc..cbfc80073c9 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -2258,23 +2258,14 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end) { unsigned HOST_WIDE_INT mask; int length, size; + rtx elt; - if (!VECTOR_MODE_P (GET_MODE (op)) - || GET_CODE (op) != CONST_VECTOR - || !CONST_INT_P (XVECEXP (op, 0, 0))) + if (!const_vec_duplicate_p (op, &elt) + || !CONST_INT_P (elt)) return false; - if (GET_MODE_NUNITS (GET_MODE (op)) > 1) - { - int i; - - for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i) - if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0))) - return false; - } - size = GET_MODE_UNIT_BITSIZE (GET_MODE (op)); - mask = UINTVAL (XVECEXP (op, 0, 0)); + mask = UINTVAL (elt); if (s390_contiguous_bitmask_p (mask, size, start, end != NULL ? &length : NULL)) { @@ -10360,6 +10351,7 @@ s390_emit_prologue (void) current_function_name(), cfun_frame_layout.frame_size, s390_stack_size); emit_insn (gen_trap ()); + emit_barrier (); } else { diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index 5a552e2be81..3e4211be4de 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -159,6 +159,7 @@ extern int sh_eval_treg_value (rtx op); extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op); extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a); extern bool sh_movsf_ie_ra_split_p (rtx, rtx, rtx); +extern void sh_expand_sym_label2reg (rtx, rtx, rtx, bool); /* Result value of sh_find_set_of_reg. */ struct set_of_reg diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 450d634e246..1442b7fc790 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -1604,6 +1604,10 @@ sh_asm_output_addr_const_extra (FILE *file, rtx x) output_addr_const (file, XVECEXP (x, 0, 0)); fputs ("@GOTPLT", file); break; + case UNSPEC_PCREL: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@PCREL", file); + break; case UNSPEC_DTPOFF: output_addr_const (file, XVECEXP (x, 0, 0)); fputs ("@DTPOFF", file); @@ -10441,6 +10445,7 @@ nonpic_symbol_mentioned_p (rtx x) || XINT (x, 1) == UNSPEC_DTPOFF || XINT (x, 1) == UNSPEC_TPOFF || XINT (x, 1) == UNSPEC_PLT + || XINT (x, 1) == UNSPEC_PCREL || XINT (x, 1) == UNSPEC_SYMOFF || XINT (x, 1) == UNSPEC_PCREL_SYMOFF)) return false; @@ -10714,7 +10719,8 @@ sh_delegitimize_address (rtx orig_x) rtx symplt = XEXP (XVECEXP (y, 0, 0), 0); if (GET_CODE (symplt) == UNSPEC - && XINT (symplt, 1) == UNSPEC_PLT) + && (XINT (symplt, 1) == UNSPEC_PLT + || XINT (symplt, 1) == UNSPEC_PCREL)) return XVECEXP (symplt, 0, 0); } } @@ -11702,9 +11708,24 @@ sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) || crtl->args.info.stack_regs == 0) && ! sh_cfun_interrupt_handler_p () && (! flag_pic - || (decl && ! TREE_PUBLIC (decl)) + || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl))) || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT))); } + +/* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */ +void +sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p) +{ + const_tree decl = SYMBOL_REF_DECL (sym); + bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl)); + + if (!is_weak && SYMBOL_REF_LOCAL_P (sym)) + emit_insn (gen_sym_label2reg (reg, sym, lab)); + else if (sibcall_p) + emit_insn (gen_symPCREL_label2reg (reg, sym, lab)); + else + emit_insn (gen_symPLT_label2reg (reg, sym, lab)); +} /* Machine specific built-in functions. */ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index ad49f72c68d..4e7cd169f84 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -135,6 +135,7 @@ UNSPEC_PLT UNSPEC_CALLER UNSPEC_GOTPLT + UNSPEC_PCREL UNSPEC_ICACHE UNSPEC_INIT_TRAMP UNSPEC_FCOSA @@ -9470,11 +9471,8 @@ label: [(const_int 0)] { rtx lab = PATTERN (gen_call_site ()); - - if (SYMBOL_REF_LOCAL_P (operands[0])) - emit_insn (gen_sym_label2reg (operands[2], operands[0], lab)); - else - emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab)); + + sh_expand_sym_label2reg (operands[2], operands[0], lab, false); emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab))); DONE; } @@ -9605,10 +9603,7 @@ label: { rtx lab = PATTERN (gen_call_site ()); - if (SYMBOL_REF_LOCAL_P (operands[1])) - emit_insn (gen_sym_label2reg (operands[3], operands[1], lab)); - else - emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab)); + sh_expand_sym_label2reg (operands[3], operands[1], lab, false); emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3], operands[2], copy_rtx (lab))); DONE; @@ -10008,7 +10003,7 @@ label: rtx lab = PATTERN (gen_call_site ()); rtx call_insn; - emit_insn (gen_sym_label2reg (operands[2], operands[0], lab)); + sh_expand_sym_label2reg (operands[2], operands[0], lab, true); call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1], copy_rtx (lab))); SIBLING_CALL_P (call_insn) = 1; @@ -10200,7 +10195,7 @@ label: rtx lab = PATTERN (gen_call_site ()); rtx call_insn; - emit_insn (gen_sym_label2reg (operands[3], operands[1], lab)); + sh_expand_sym_label2reg (operands[3], operands[1], lab, true); call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0], operands[3], operands[2], @@ -10748,6 +10743,16 @@ label: UNSPEC_SYMOFF)))] "TARGET_SH1" "") +(define_expand "symPCREL_label2reg" + [(set (match_operand:SI 0 "" "") + (const:SI + (unspec:SI + [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PCREL)) + (const:SI (plus:SI (match_operand:SI 2 "" "") + (const_int 2)))] UNSPEC_PCREL_SYMOFF)))] + "TARGET_SH1" + "") + (define_expand "symGOT_load" [(set (match_dup 2) (match_operand 1 "" "")) (set (match_dup 3) (plus (match_dup 2) (reg PIC_REG))) @@ -12731,7 +12736,7 @@ label: [(set (match_operand:SI 0 "register_operand") (compare:SI (match_operand:BLK 1 "memory_operand") (match_operand:BLK 2 "memory_operand"))) - (use (match_operand:SI 3 "immediate_operand")) + (use (match_operand:SI 3 "nonmemory_operand")) (use (match_operand:SI 4 "immediate_operand"))] "TARGET_SH1 && optimize" { diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 9665ee6da9b..5b9f0517b90 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -6403,7 +6403,7 @@ /* Pass constm1 to indicate that it may expect a structure value, but we don't know what size it is. */ - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, constm1_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, constm1_rtx)); /* Save the function value registers. */ emit_move_insn (adjust_address (result, DImode, 0), valreg1); diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index ca762877a0f..05c81f5ed73 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -3185,11 +3185,8 @@ classify_immediate (rtx op, machine_mode mode) && mode == V4SImode && GET_CODE (op) == CONST_VECTOR && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT - && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1) - && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2) - && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3)) - op = CONST_VECTOR_ELT (op, 0); + && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE) + op = unwrap_const_vec_duplicate (op); switch (GET_CODE (op)) { @@ -3507,9 +3504,7 @@ spu_legitimate_constant_p (machine_mode mode, rtx x) && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST)) - return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1) - && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2) - && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3); + return const_vec_duplicate_p (x); if (GET_CODE (x) == CONST_VECTOR && !const_vector_immediate_p (x)) diff --git a/gcc/config/tilegx/constraints.md b/gcc/config/tilegx/constraints.md index 783e1ca98fe..f47d0f68296 100644 --- a/gcc/config/tilegx/constraints.md +++ b/gcc/config/tilegx/constraints.md @@ -96,21 +96,14 @@ "An 8-element vector constant with identical elements" (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 8") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)"))) + (match_test "const_vec_duplicate_p (op)"))) (define_constraint "Y" "A 4-element vector constant with identical elements" (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 4") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))) + (match_test "const_vec_duplicate_p (op)"))) + (define_constraint "Z0" "The integer constant 0xffffffff" (and (match_code "const_int") diff --git a/gcc/config/tilegx/predicates.md b/gcc/config/tilegx/predicates.md index 4cbebf18a91..ce04660f9ed 100644 --- a/gcc/config/tilegx/predicates.md +++ b/gcc/config/tilegx/predicates.md @@ -112,14 +112,8 @@ (ior (match_operand 0 "register_operand") (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 8 - && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0)) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)")))) + && (satisfies_constraint_I + (unwrap_const_vec_duplicate (op)))")))) ;; Return 1 if OP is a 4-element vector constant with identical signed ;; 8-bit elements or any register. @@ -127,10 +121,8 @@ (ior (match_operand 0 "register_operand") (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 4 - && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0)) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))) + && (satisfies_constraint_I + (unwrap_const_vec_duplicate (op)))")))) ;; Return 1 if the operand is a valid second operand to an add insn. (define_predicate "add_operand" diff --git a/gcc/config/tilegx/tilegx.md b/gcc/config/tilegx/tilegx.md index 75322e16721..944953c34b2 100644 --- a/gcc/config/tilegx/tilegx.md +++ b/gcc/config/tilegx/tilegx.md @@ -2670,7 +2670,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { diff --git a/gcc/config/tilepro/constraints.md b/gcc/config/tilepro/constraints.md index 4d13fb0640a..3ab9ab75650 100644 --- a/gcc/config/tilepro/constraints.md +++ b/gcc/config/tilepro/constraints.md @@ -90,12 +90,10 @@ "A 4-element vector constant with identical elements" (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 4") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))) + (match_test "const_vec_duplicate_p (op)"))) (define_constraint "Y" "A 2-element vector constant with identical elements" (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 2") - (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)"))) + (match_test "const_vec_duplicate_p (op)"))) diff --git a/gcc/config/tilepro/predicates.md b/gcc/config/tilepro/predicates.md index 00d2bb989cd..ab62d20731a 100644 --- a/gcc/config/tilepro/predicates.md +++ b/gcc/config/tilepro/predicates.md @@ -75,10 +75,8 @@ (ior (match_operand 0 "register_operand") (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 4 - && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0)) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))) + && (satisfies_constraint_I + (unwrap_const_vec_duplicate (op)))")))) ;; Return 1 if OP is a 2-element vector constant with identical signed ;; 8-bit elements or any register. @@ -86,8 +84,8 @@ (ior (match_operand 0 "register_operand") (and (match_code "const_vector") (match_test "CONST_VECTOR_NUNITS (op) == 2 - && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0)) - && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")))) + && (satisfies_constraint_I + (unwrap_const_vec_duplicate (op)))")))) ;; Return 1 if the operand is a valid second operand to an add insn. (define_predicate "add_operand" diff --git a/gcc/config/tilepro/tilepro.md b/gcc/config/tilepro/tilepro.md index a97ebf9eb22..b1e6b81e71f 100644 --- a/gcc/config/tilepro/tilepro.md +++ b/gcc/config/tilepro/tilepro.md @@ -1516,7 +1516,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { diff --git a/gcc/config/visium/visium.md b/gcc/config/visium/visium.md index 969cb887a6c..370b6a4b5b2 100644 --- a/gcc/config/visium/visium.md +++ b/gcc/config/visium/visium.md @@ -2375,7 +2375,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, NULL)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { |