summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 17:20:51 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 17:20:51 +0000
commit2d9d01985a7a7866916fafa19c5c296702e69714 (patch)
tree259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config
parentc8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff)
downloadgcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{merging with even more of GCC 6, using subversion 1.9 svn merge -r227001:227400 ^/trunk ; there is some gengtype issue before svn r228000... }} git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233281 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c25
-rw-r--r--gcc/config/aarch64/aarch64-fusion-pairs.def18
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def5
-rw-r--r--gcc/config/aarch64/aarch64-protos.h65
-rw-r--r--gcc/config/aarch64/aarch64-tuning-flags.def8
-rw-r--r--gcc/config/aarch64/aarch64.c290
-rw-r--r--gcc/config/aarch64/aarch64.h12
-rw-r--r--gcc/config/aarch64/aarch64.md182
-rw-r--r--gcc/config/aarch64/aarch64.opt19
-rw-r--r--gcc/config/aarch64/iterators.md6
-rw-r--r--gcc/config/aarch64/thunderx.md151
-rw-r--r--gcc/config/alpha/alpha.c2
-rw-r--r--gcc/config/alpha/alpha.md2
-rw-r--r--gcc/config/arm/arm-arches.def63
-rw-r--r--gcc/config/arm/arm-builtins.c71
-rw-r--r--gcc/config/arm/arm-cores.def202
-rw-r--r--gcc/config/arm/arm-protos.h110
-rw-r--r--gcc/config/arm/arm.c181
-rw-r--r--gcc/config/arm/arm.md5
-rw-r--r--gcc/config/arm/arm_neon.h10
-rw-r--r--gcc/config/arm/constraints.md5
-rw-r--r--gcc/config/avr/avr.c6
-rw-r--r--gcc/config/cr16/cr16.c2
-rw-r--r--gcc/config/i386/i386.c55
-rw-r--r--gcc/config/i386/i386.md178
-rw-r--r--gcc/config/i386/predicates.md22
-rw-r--r--gcc/config/iq2000/iq2000.md2
-rw-r--r--gcc/config/m32c/blkmov.md8
-rw-r--r--gcc/config/m68k/m68k.md2
-rw-r--r--gcc/config/microblaze/microblaze.c2
-rw-r--r--gcc/config/mips/mips-opts.h6
-rw-r--r--gcc/config/mips/mips-protos.h3
-rw-r--r--gcc/config/mips/mips.c402
-rw-r--r--gcc/config/mips/mips.h76
-rw-r--r--gcc/config/mips/mips.md215
-rw-r--r--gcc/config/mips/mips.opt17
-rw-r--r--gcc/config/mips/predicates.md13
-rw-r--r--gcc/config/msp430/msp430.opt2
-rw-r--r--gcc/config/nvptx/mkoffload.c24
-rw-r--r--gcc/config/nvptx/nvptx.c30
-rw-r--r--gcc/config/nvptx/nvptx.h2
-rw-r--r--gcc/config/nvptx/nvptx.md6
-rw-r--r--gcc/config/pa/pa.md2
-rw-r--r--gcc/config/rs6000/altivec.h2
-rw-r--r--gcc/config/rs6000/predicates.md31
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def5
-rw-r--r--gcc/config/rs6000/rs6000-c.c21
-rw-r--r--gcc/config/rs6000/rs6000-cpus.def2
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c343
-rw-r--r--gcc/config/rs6000/rs6000.md458
-rw-r--r--gcc/config/rs6000/rs6000.opt2
-rw-r--r--gcc/config/rs6000/sysv4.h6
-rw-r--r--gcc/config/rs6000/sysv4le.h4
-rw-r--r--gcc/config/rs6000/vector.md23
-rw-r--r--gcc/config/rx/rx.md2
-rw-r--r--gcc/config/s390/s390-builtins.def18
-rw-r--r--gcc/config/s390/s390.c18
-rw-r--r--gcc/config/sh/sh-protos.h1
-rw-r--r--gcc/config/sh/sh.c25
-rw-r--r--gcc/config/sh/sh.md29
-rw-r--r--gcc/config/sparc/sparc.md2
-rw-r--r--gcc/config/spu/spu.c11
-rw-r--r--gcc/config/tilegx/constraints.md13
-rw-r--r--gcc/config/tilegx/predicates.md16
-rw-r--r--gcc/config/tilegx/tilegx.md2
-rw-r--r--gcc/config/tilepro/constraints.md6
-rw-r--r--gcc/config/tilepro/predicates.md10
-rw-r--r--gcc/config/tilepro/tilepro.md2
-rw-r--r--gcc/config/visium/visium.md2
70 files changed, 2524 insertions, 1038 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 0f4f2b97022..e3a90b5e4dd 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -886,30 +886,6 @@ typedef enum
SIMD_ARG_STOP
} builtin_simd_arg;
-/* Relayout the decl of a function arg. Keep the RTL component the same,
- as varasm.c ICEs. It doesn't like reinitializing the RTL
- on PARM decls. Something like this needs to be done when compiling a
- file without SIMD and then tagging a function with +simd and using SIMD
- intrinsics in there. The types will have been laid out assuming no SIMD,
- so we want to re-lay them out. */
-
-static void
-aarch64_relayout_simd_param (tree arg)
-{
- tree argdecl = arg;
- if (TREE_CODE (argdecl) == SSA_NAME)
- argdecl = SSA_NAME_VAR (argdecl);
-
- if (argdecl
- && (TREE_CODE (argdecl) == PARM_DECL
- || TREE_CODE (argdecl) == VAR_DECL))
- {
- rtx rtl = NULL_RTX;
- rtl = DECL_RTL_IF_SET (argdecl);
- relayout_decl (argdecl);
- SET_DECL_RTL (argdecl, rtl);
- }
-}
static rtx
aarch64_simd_expand_args (rtx target, int icode, int have_retval,
@@ -940,7 +916,6 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
{
tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
enum machine_mode mode = insn_data[icode].operand[opc].mode;
- aarch64_relayout_simd_param (arg);
op[opc] = expand_normal (arg);
switch (thisarg)
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index a7b00f6975d..53bbef46eb2 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -20,19 +20,17 @@
/* Pairs of instructions which can be fused. before including this file,
define a macro:
- AARCH64_FUSION_PAIR (name, internal_name, index_bit)
+ AARCH64_FUSION_PAIR (name, internal_name)
Where:
NAME is a string giving a friendly name for the instructions to fuse.
INTERNAL_NAME gives the internal name suitable for appending to
- AARCH64_FUSE_ to give an enum name.
- INDEX_BIT is the bit to set in the bitmask of supported fusion
- operations. */
-
-AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK, 0)
-AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD, 1)
-AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK, 2)
-AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR, 3)
-AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH, 4)
+ AARCH64_FUSE_ to give an enum name. */
+
+AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK)
+AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD)
+AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK)
+AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
+AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 1762cc8d58f..b261a0f7c3c 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -34,11 +34,6 @@
should contain a whitespace-separated list of the strings in 'Features'
that are required. Their order is not important. */
-/* V8 Architecture Extensions.
- This list currently contains example extensions for CPUs that implement
- AArch64, and therefore serves as a template for adding more CPUs in the
- future. */
-
AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp")
AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "asimd")
AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2")
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 0b09d49f670..ff1985137b3 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -73,8 +73,12 @@ enum aarch64_symbol_context
SYMBOL_SMALL_TLSGD
SYMBOL_SMALL_TLSDESC
- SYMBOL_SMALL_GOTTPREL
- SYMBOL_TLSLE
+ SYMBOL_SMALL_TLSIE
+ SYMBOL_TINY_TLSIE
+ SYMBOL_TLSLE12
+ SYMBOL_TLSLE24
+ SYMBOL_TLSLE32
+ SYMBOL_TLSLE48
Each of these represents a thread-local symbol, and corresponds to the
thread local storage relocation operator for the symbol being referred to.
@@ -108,10 +112,14 @@ enum aarch64_symbol_type
SYMBOL_SMALL_GOT_4G,
SYMBOL_SMALL_TLSGD,
SYMBOL_SMALL_TLSDESC,
- SYMBOL_SMALL_GOTTPREL,
+ SYMBOL_SMALL_TLSIE,
SYMBOL_TINY_ABSOLUTE,
SYMBOL_TINY_GOT,
- SYMBOL_TLSLE,
+ SYMBOL_TINY_TLSIE,
+ SYMBOL_TLSLE12,
+ SYMBOL_TLSLE24,
+ SYMBOL_TLSLE32,
+ SYMBOL_TLSLE48,
SYMBOL_FORCE_TO_MEM
};
@@ -201,41 +209,46 @@ struct tune_params
unsigned int extra_tuning_flags;
};
-#define AARCH64_FUSION_PAIR(x, name, index) \
- AARCH64_FUSE_##name = (1 << index),
+#define AARCH64_FUSION_PAIR(x, name) \
+ AARCH64_FUSE_##name##_index,
/* Supported fusion operations. */
-enum aarch64_fusion_pairs
+enum aarch64_fusion_pairs_index
{
- AARCH64_FUSE_NOTHING = 0,
#include "aarch64-fusion-pairs.def"
-
-/* Hacky macro to build AARCH64_FUSE_ALL. The sequence below expands
- to:
- AARCH64_FUSE_ALL = 0 | AARCH64_FUSE_index1 | AARCH64_FUSE_index2 ... */
+ AARCH64_FUSE_index_END
+};
#undef AARCH64_FUSION_PAIR
-#define AARCH64_FUSION_PAIR(x, name, y) \
- | AARCH64_FUSE_##name
- AARCH64_FUSE_ALL = 0
+#define AARCH64_FUSION_PAIR(x, name) \
+ AARCH64_FUSE_##name = (1u << AARCH64_FUSE_##name##_index),
+/* Supported fusion operations. */
+enum aarch64_fusion_pairs
+{
+ AARCH64_FUSE_NOTHING = 0,
#include "aarch64-fusion-pairs.def"
+ AARCH64_FUSE_ALL = (1u << AARCH64_FUSE_index_END) - 1
};
#undef AARCH64_FUSION_PAIR
-#define AARCH64_EXTRA_TUNING_OPTION(x, name, index) \
- AARCH64_EXTRA_TUNE_##name = (1 << index),
+#define AARCH64_EXTRA_TUNING_OPTION(x, name) \
+ AARCH64_EXTRA_TUNE_##name##_index,
+/* Supported tuning flags indexes. */
+enum aarch64_extra_tuning_flags_index
+{
+#include "aarch64-tuning-flags.def"
+ AARCH64_EXTRA_TUNE_index_END
+};
+#undef AARCH64_EXTRA_TUNING_OPTION
+
+
+#define AARCH64_EXTRA_TUNING_OPTION(x, name) \
+ AARCH64_EXTRA_TUNE_##name = (1u << AARCH64_EXTRA_TUNE_##name##_index),
/* Supported tuning flags. */
enum aarch64_extra_tuning_flags
{
AARCH64_EXTRA_TUNE_NONE = 0,
#include "aarch64-tuning-flags.def"
-
-/* Hacky macro to build the "all" flag mask.
- Expands to 0 | AARCH64_TUNE_index0 | AARCH64_TUNE_index1 , etc. */
-#undef AARCH64_EXTRA_TUNING_OPTION
-#define AARCH64_EXTRA_TUNING_OPTION(x, name, y) \
- | AARCH64_EXTRA_TUNE_##name
- AARCH64_EXTRA_TUNE_ALL = 0
-#include "aarch64-tuning-flags.def"
+ AARCH64_EXTRA_TUNE_ALL = (1u << AARCH64_EXTRA_TUNE_index_END) - 1
};
#undef AARCH64_EXTRA_TUNING_OPTION
@@ -310,12 +323,14 @@ rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
bool aarch64_simd_mem_operand_p (rtx);
rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool);
rtx aarch64_tls_get_addr (void);
+std::string aarch64_get_extension_string_for_isa_flags (unsigned long);
tree aarch64_fold_builtin (tree, int, tree *, bool);
unsigned aarch64_dbx_register_number (unsigned);
unsigned aarch64_trampoline_size (void);
void aarch64_asm_output_labelref (FILE *, const char *);
void aarch64_cpu_cpp_builtins (cpp_reader *);
void aarch64_elf_asm_named_section (const char *, unsigned, tree);
+const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
void aarch64_err_no_fpadvsimd (machine_mode, const char *);
void aarch64_expand_epilogue (bool);
void aarch64_expand_mov_immediate (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 01aaca83594..628386b5a1d 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -20,15 +20,13 @@
/* Additional control over certain tuning parameters. Before including
this file, define a macro:
- AARCH64_EXTRA_TUNING_OPTION (name, internal_name, index_bit)
+ AARCH64_EXTRA_TUNING_OPTION (name, internal_name)
Where:
NAME is a string giving a friendly name for the tuning flag.
INTERNAL_NAME gives the internal name suitable for appending to
- AARCH64_TUNE_ to give an enum name.
- INDEX_BIT is the bit to set in the bitmask of supported tuning
- flags. */
+ AARCH64_TUNE_ to give an enum name. */
-AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS, 0)
+AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aa268aeff4d..bc612e47d4f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -150,7 +150,6 @@ static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_override_options_after_change (void);
static bool aarch64_vector_mode_supported_p (machine_mode);
-static unsigned bit_count (unsigned HOST_WIDE_INT);
static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel);
static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
@@ -172,7 +171,7 @@ struct aarch64_flag_desc
unsigned int flag;
};
-#define AARCH64_FUSION_PAIR(name, internal_name, y) \
+#define AARCH64_FUSION_PAIR(name, internal_name) \
{ name, AARCH64_FUSE_##internal_name },
static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
{
@@ -183,7 +182,7 @@ static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
};
#undef AARCH64_FUION_PAIR
-#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \
+#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
{ name, AARCH64_EXTRA_TUNE_##internal_name },
static const struct aarch64_flag_desc aarch64_tuning_flags[] =
{
@@ -587,6 +586,29 @@ static const char * const aarch64_condition_codes[] =
"hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
};
+/* Generate code to enable conditional branches in functions over 1 MiB. */
+const char *
+aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
+ const char * branch_format)
+{
+ rtx_code_label * tmp_label = gen_label_rtx ();
+ char label_buf[256];
+ char buffer[128];
+ ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
+ CODE_LABEL_NUMBER (tmp_label));
+ const char *label_ptr = targetm.strip_name_encoding (label_buf);
+ rtx dest_label = operands[pos_label];
+ operands[pos_label] = tmp_label;
+
+ snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
+ output_asm_insn (buffer, operands);
+
+ snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
+ operands[pos_label] = dest_label;
+ output_asm_insn (buffer, operands);
+ return "";
+}
+
void
aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
{
@@ -931,7 +953,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
The generate instruction sequence for accessing global variable
is:
- ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
Only one instruction needed. But we must initialize
pic_offset_table_rtx properly. We generate initialize insn for
@@ -940,12 +962,12 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
The final instruction sequences will look like the following
for multiply global variables access.
- adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
+ adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
- ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
- ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
- ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
- ... */
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
+ ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
+ ... */
rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
crtl->uses_pic_offset_table = 1;
@@ -1081,7 +1103,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
return;
}
- case SYMBOL_SMALL_GOTTPREL:
+ case SYMBOL_SMALL_TLSIE:
{
/* In ILP32, the mode of dest can be either SImode or DImode,
while the got entry is always of SImode size. The mode of
@@ -1115,14 +1137,43 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
return;
}
- case SYMBOL_TLSLE:
+ case SYMBOL_TLSLE12:
+ case SYMBOL_TLSLE24:
+ case SYMBOL_TLSLE32:
+ case SYMBOL_TLSLE48:
{
+ machine_mode mode = GET_MODE (dest);
rtx tp = aarch64_load_tp (NULL);
- if (GET_MODE (dest) != Pmode)
- tp = gen_lowpart (GET_MODE (dest), tp);
+ if (mode != Pmode)
+ tp = gen_lowpart (mode, tp);
+
+ switch (type)
+ {
+ case SYMBOL_TLSLE12:
+ emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
+ (dest, tp, imm));
+ break;
+ case SYMBOL_TLSLE24:
+ emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
+ (dest, tp, imm));
+ break;
+ case SYMBOL_TLSLE32:
+ emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
+ (dest, imm));
+ emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
+ (dest, dest, tp));
+ break;
+ case SYMBOL_TLSLE48:
+ emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
+ (dest, imm));
+ emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
+ (dest, dest, tp));
+ break;
+ default:
+ gcc_unreachable ();
+ }
- emit_insn (gen_tlsle (dest, tp, imm));
set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
return;
}
@@ -1131,6 +1182,31 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
emit_insn (gen_ldr_got_tiny (dest, imm));
return;
+ case SYMBOL_TINY_TLSIE:
+ {
+ machine_mode mode = GET_MODE (dest);
+ rtx tp = aarch64_load_tp (NULL);
+
+ if (mode == ptr_mode)
+ {
+ if (mode == DImode)
+ emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
+ else
+ {
+ tp = gen_lowpart (mode, tp);
+ emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
+ }
+ }
+ else
+ {
+ gcc_assert (mode == Pmode);
+ emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
+ }
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+ return;
+ }
+
default:
gcc_unreachable ();
}
@@ -1661,10 +1737,11 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
case SYMBOL_SMALL_TLSGD:
case SYMBOL_SMALL_TLSDESC:
- case SYMBOL_SMALL_GOTTPREL:
+ case SYMBOL_SMALL_TLSIE:
case SYMBOL_SMALL_GOT_28K:
case SYMBOL_SMALL_GOT_4G:
case SYMBOL_TINY_GOT:
+ case SYMBOL_TINY_TLSIE:
if (offset != const0_rtx)
{
gcc_assert(can_create_pseudo_p ());
@@ -1677,7 +1754,10 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
case SYMBOL_SMALL_ABSOLUTE:
case SYMBOL_TINY_ABSOLUTE:
- case SYMBOL_TLSLE:
+ case SYMBOL_TLSLE12:
+ case SYMBOL_TLSLE24:
+ case SYMBOL_TLSLE32:
+ case SYMBOL_TLSLE48:
aarch64_load_symref_appropriately (dest, imm, sty);
return;
@@ -4163,19 +4243,6 @@ aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
return aarch64_const_vec_all_same_in_range_p (x, val, val);
}
-static unsigned
-bit_count (unsigned HOST_WIDE_INT value)
-{
- unsigned count = 0;
-
- while (value)
- {
- count++;
- value &= value - 1;
- }
-
- return count;
-}
/* N Z C V. */
#define AARCH64_CC_V 1
@@ -4330,7 +4397,7 @@ aarch64_print_operand (FILE *f, rtx x, char code)
return;
}
- asm_fprintf (f, "%u", bit_count (INTVAL (x)));
+ asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
break;
case 'H':
@@ -4556,11 +4623,11 @@ aarch64_print_operand (FILE *f, rtx x, char code)
asm_fprintf (asm_out_file, ":tlsdesc:");
break;
- case SYMBOL_SMALL_GOTTPREL:
+ case SYMBOL_SMALL_TLSIE:
asm_fprintf (asm_out_file, ":gottprel:");
break;
- case SYMBOL_TLSLE:
+ case SYMBOL_TLSLE24:
asm_fprintf (asm_out_file, ":tprel:");
break;
@@ -4589,11 +4656,15 @@ aarch64_print_operand (FILE *f, rtx x, char code)
asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
break;
- case SYMBOL_SMALL_GOTTPREL:
+ case SYMBOL_SMALL_TLSIE:
asm_fprintf (asm_out_file, ":gottprel_lo12:");
break;
- case SYMBOL_TLSLE:
+ case SYMBOL_TLSLE12:
+ asm_fprintf (asm_out_file, ":tprel_lo12:");
+ break;
+
+ case SYMBOL_TLSLE24:
asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
break;
@@ -4601,6 +4672,10 @@ aarch64_print_operand (FILE *f, rtx x, char code)
asm_fprintf (asm_out_file, ":got:");
break;
+ case SYMBOL_TINY_TLSIE:
+ asm_fprintf (asm_out_file, ":gottprel:");
+ break;
+
default:
break;
}
@@ -4611,7 +4686,7 @@ aarch64_print_operand (FILE *f, rtx x, char code)
switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
{
- case SYMBOL_TLSLE:
+ case SYMBOL_TLSLE24:
asm_fprintf (asm_out_file, ":tprel_hi12:");
break;
default:
@@ -7506,6 +7581,40 @@ aarch64_parse_one_override_token (const char* token,
return;
}
+/* A checking mechanism for the implementation of the tls size. */
+
+static void
+initialize_aarch64_tls_size (struct gcc_options *opts)
+{
+ if (aarch64_tls_size == 0)
+ aarch64_tls_size = 24;
+
+ switch (opts->x_aarch64_cmodel_var)
+ {
+ case AARCH64_CMODEL_TINY:
+ /* Both the default and maximum TLS size allowed under tiny is 1M which
+ needs two instructions to address, so we clamp the size to 24. */
+ if (aarch64_tls_size > 24)
+ aarch64_tls_size = 24;
+ break;
+ case AARCH64_CMODEL_SMALL:
+ /* The maximum TLS size allowed under small is 4G. */
+ if (aarch64_tls_size > 32)
+ aarch64_tls_size = 32;
+ break;
+ case AARCH64_CMODEL_LARGE:
+ /* The maximum TLS size allowed under large is 16E.
+ FIXME: 16E should be 64bit, we only support 48bit offset now. */
+ if (aarch64_tls_size > 48)
+ aarch64_tls_size = 48;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+}
+
/* Parse STRING looking for options in the format:
string :: option:string
option :: name=substring
@@ -7598,6 +7707,7 @@ aarch64_override_options_internal (struct gcc_options *opts)
}
initialize_aarch64_code_model (opts);
+ initialize_aarch64_tls_size (opts);
aarch64_override_options_after_change_1 (opts);
}
@@ -7904,20 +8014,6 @@ initialize_aarch64_code_model (struct gcc_options *opts)
aarch64_cmodel = opts->x_aarch64_cmodel_var;
}
-/* Print to F the architecture features specified by ISA_FLAGS. */
-
-static void
-aarch64_print_extension (FILE *f, unsigned long isa_flags)
-{
- const struct aarch64_option_extension *opt = NULL;
-
- for (opt = all_extensions; opt->name != NULL; opt++)
- if ((isa_flags & opt->flags_on) == opt->flags_on)
- asm_fprintf (f, "+%s", opt->name);
-
- asm_fprintf (f, "\n");
-}
-
/* Implement TARGET_OPTION_SAVE. */
static void
@@ -7950,10 +8046,12 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
= aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
unsigned long isa_flags = ptr->x_aarch64_isa_flags;
const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
+ std::string extension
+ = aarch64_get_extension_string_for_isa_flags (isa_flags);
fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
- fprintf (file, "%*sselected arch = %s", indent, "", arch->name);
- aarch64_print_extension (file, isa_flags);
+ fprintf (file, "%*sselected arch = %s%s\n", indent, "",
+ arch->name, extension.c_str ());
}
static GTY(()) tree aarch64_previous_fndecl;
@@ -8013,6 +8111,23 @@ aarch64_set_current_function (tree fndecl)
= save_target_globals_default_opts ();
}
}
+
+ if (!fndecl)
+ return;
+
+ /* If we turned on SIMD make sure that any vector parameters are re-laid out
+ so that they use proper vector modes. */
+ if (TARGET_SIMD)
+ {
+ tree parms = DECL_ARGUMENTS (fndecl);
+ for (; parms && parms != void_list_node; parms = TREE_CHAIN (parms))
+ {
+ if (TREE_CODE (parms) == PARM_DECL
+ && VECTOR_TYPE_P (TREE_TYPE (parms))
+ && DECL_MODE (parms) != TYPE_MODE (TREE_TYPE (parms)))
+ relayout_decl (parms);
+ }
+ }
}
/* Enum describing the various ways we can handle attributes.
@@ -8683,10 +8798,26 @@ aarch64_classify_tls_symbol (rtx x)
return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
case TLS_MODEL_INITIAL_EXEC:
- return SYMBOL_SMALL_GOTTPREL;
+ switch (aarch64_cmodel)
+ {
+ case AARCH64_CMODEL_TINY:
+ case AARCH64_CMODEL_TINY_PIC:
+ return SYMBOL_TINY_TLSIE;
+ default:
+ return SYMBOL_SMALL_TLSIE;
+ }
case TLS_MODEL_LOCAL_EXEC:
- return SYMBOL_TLSLE;
+ if (aarch64_tls_size == 12)
+ return SYMBOL_TLSLE12;
+ else if (aarch64_tls_size == 24)
+ return SYMBOL_TLSLE24;
+ else if (aarch64_tls_size == 32)
+ return SYMBOL_TLSLE32;
+ else if (aarch64_tls_size == 48)
+ return SYMBOL_TLSLE48;
+ else
+ gcc_unreachable ();
case TLS_MODEL_EMULATED:
case TLS_MODEL_NONE:
@@ -9893,31 +10024,10 @@ sizetochar (int size)
static bool
aarch64_vect_float_const_representable_p (rtx x)
{
- int i = 0;
- REAL_VALUE_TYPE r0, ri;
- rtx x0, xi;
-
- if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
- return false;
-
- x0 = CONST_VECTOR_ELT (x, 0);
- if (!CONST_DOUBLE_P (x0))
- return false;
-
- REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
-
- for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
- {
- xi = CONST_VECTOR_ELT (x, i);
- if (!CONST_DOUBLE_P (xi))
- return false;
-
- REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
- if (!REAL_VALUES_EQUAL (r0, ri))
- return false;
- }
-
- return aarch64_float_const_representable_p (x0);
+ rtx elt;
+ return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
+ && const_vec_duplicate_p (x, &elt)
+ && aarch64_float_const_representable_p (elt));
}
/* Return true for valid and false for invalid. */
@@ -10380,28 +10490,15 @@ aarch64_simd_dup_constant (rtx vals)
{
machine_mode mode = GET_MODE (vals);
machine_mode inner_mode = GET_MODE_INNER (mode);
- int n_elts = GET_MODE_NUNITS (mode);
- bool all_same = true;
rtx x;
- int i;
-
- if (GET_CODE (vals) != CONST_VECTOR)
- return NULL_RTX;
-
- for (i = 1; i < n_elts; ++i)
- {
- x = CONST_VECTOR_ELT (vals, i);
- if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
- all_same = false;
- }
- if (!all_same)
+ if (!const_vec_duplicate_p (vals, &x))
return NULL_RTX;
/* We can load this constant by using DUP and a constant in a
single ARM register. This will be cheaper than a vector
load. */
- x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
+ x = copy_to_mode_reg (inner_mode, x);
return gen_rtx_VEC_DUPLICATE (mode, x);
}
@@ -10677,8 +10774,11 @@ aarch64_declare_function_name (FILE *stream, const char* name,
const struct processor *this_arch
= aarch64_get_arch (targ_options->x_explicit_arch);
- asm_fprintf (asm_out_file, "\t.arch %s", this_arch->name);
- aarch64_print_extension (asm_out_file, targ_options->x_aarch64_isa_flags);
+ unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
+ std::string extension
+ = aarch64_get_extension_string_for_isa_flags (isa_flags);
+ asm_fprintf (asm_out_file, "\t.arch %s%s\n",
+ this_arch->name, extension.c_str ());
/* Print the cpu name we're tuning for in the comments, might be
useful to readers of the generated asm. */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 1be78fc16dd..1e5f5dbd4fa 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -887,18 +887,18 @@ extern enum aarch64_code_model aarch64_cmodel;
{"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \
{"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" },
-#define BIG_LITTLE_SPEC \
- " %{mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})}"
+#define MCPU_TO_MARCH_SPEC \
+ " %{mcpu=*:-march=%:rewrite_mcpu(%{mcpu=*:%*})}"
extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
-#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \
+#define MCPU_TO_MARCH_SPEC_FUNCTIONS \
{ "rewrite_mcpu", aarch64_rewrite_mcpu },
#if defined(__aarch64__)
extern const char *host_detect_local_cpu (int argc, const char **argv);
# define EXTRA_SPEC_FUNCTIONS \
{ "local_cpu_detect", host_detect_local_cpu }, \
- BIG_LITTLE_CPU_SPEC_FUNCTIONS
+ MCPU_TO_MARCH_SPEC_FUNCTIONS
# define MCPU_MTUNE_NATIVE_SPECS \
" %{march=native:%<march=native %:local_cpu_detect(arch)}" \
@@ -906,11 +906,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
" %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
#else
# define MCPU_MTUNE_NATIVE_SPECS ""
-# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
+# define EXTRA_SPEC_FUNCTIONS MCPU_TO_MARCH_SPEC_FUNCTIONS
#endif
#define ASM_CPU_SPEC \
- BIG_LITTLE_SPEC
+ MCPU_TO_MARCH_SPEC
#define EXTRA_SPECS \
{ "asm_cpu_spec", ASM_CPU_SPEC }
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 35255e91a95..25229824fb5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -90,6 +90,7 @@
UNSPEC_GOTSMALLPIC28K
UNSPEC_GOTSMALLTLS
UNSPEC_GOTTINYPIC
+ UNSPEC_GOTTINYTLS
UNSPEC_LD1
UNSPEC_LD2
UNSPEC_LD2_DUP
@@ -117,7 +118,10 @@
UNSPEC_ST4_LANE
UNSPEC_TLS
UNSPEC_TLSDESC
- UNSPEC_TLSLE
+ UNSPEC_TLSLE12
+ UNSPEC_TLSLE24
+ UNSPEC_TLSLE32
+ UNSPEC_TLSLE48
UNSPEC_USHL_2S
UNSPEC_VSTRUCTDUMMY
UNSPEC_SP_SET
@@ -181,6 +185,13 @@
(const_string "no")
] (const_string "yes")))
+;; Attribute that specifies whether we are dealing with a branch to a
+;; label that is far away, i.e. further away than the maximum/minimum
+;; representable in a signed 21-bits number.
+;; 0 :=: no
+;; 1 :=: yes
+(define_attr "far_branch" "" (const_int 0))
+
;; -------------------------------------------------------------------
;; Pipeline descriptions and scheduling
;; -------------------------------------------------------------------
@@ -308,8 +319,23 @@
(label_ref (match_operand 2 "" ""))
(pc)))]
""
- "b%m0\\t%l2"
- [(set_attr "type" "branch")]
+ {
+ if (get_attr_length (insn) == 8)
+ return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t");
+ else
+ return "b%m0\\t%l2";
+ }
+ [(set_attr "type" "branch")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+ (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+ (const_int 4)
+ (const_int 8)))
+ (set (attr "far_branch")
+ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+ (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+ (const_int 0)
+ (const_int 1)))]
)
(define_expand "casesi"
@@ -488,9 +514,23 @@
(label_ref (match_operand 1 "" ""))
(pc)))]
""
- "<cbz>\\t%<w>0, %l1"
- [(set_attr "type" "branch")]
-
+ {
+ if (get_attr_length (insn) == 8)
+ return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
+ else
+ return "<cbz>\\t%<w>0, %l1";
+ }
+ [(set_attr "type" "branch")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
+ (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
+ (const_int 4)
+ (const_int 8)))
+ (set (attr "far_branch")
+ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+ (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+ (const_int 0)
+ (const_int 1)))]
)
(define_insn "*tb<optab><mode>1"
@@ -506,8 +546,14 @@
{
if (get_attr_length (insn) == 8)
{
- operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
- return "tst\t%<w>0, %1\;<bcond>\t%l2";
+ if (get_attr_far_branch (insn) == 1)
+ return aarch64_gen_far_branch (operands, 2, "Ltb",
+ "<inv_tb>\\t%<w>0, %1, ");
+ else
+ {
+ operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
+ return "tst\t%<w>0, %1\;<bcond>\t%l2";
+ }
}
else
return "<tbz>\t%<w>0, %1, %l2";
@@ -517,7 +563,13 @@
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
(lt (minus (match_dup 2) (pc)) (const_int 32764)))
(const_int 4)
- (const_int 8)))]
+ (const_int 8)))
+ (set (attr "far_branch")
+ (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+ (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+ (const_int 0)
+ (const_int 1)))]
+
)
(define_insn "*cb<optab><mode>1"
@@ -530,12 +582,18 @@
{
if (get_attr_length (insn) == 8)
{
- char buf[64];
- uint64_t val = ((uint64_t ) 1)
- << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
- sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
- output_asm_insn (buf, operands);
- return "<bcond>\t%l1";
+ if (get_attr_far_branch (insn) == 1)
+ return aarch64_gen_far_branch (operands, 1, "Ltb",
+ "<inv_tb>\\t%<w>0, <sizem1>, ");
+ else
+ {
+ char buf[64];
+ uint64_t val = ((uint64_t) 1)
+ << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
+ sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
+ output_asm_insn (buf, operands);
+ return "<bcond>\t%l1";
+ }
}
else
return "<tbz>\t%<w>0, <sizem1>, %l1";
@@ -545,7 +603,12 @@
(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
(lt (minus (match_dup 1) (pc)) (const_int 32764)))
(const_int 4)
- (const_int 8)))]
+ (const_int 8)))
+ (set (attr "far_branch")
+ (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
+ (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
+ (const_int 0)
+ (const_int 1)))]
)
;; -------------------------------------------------------------------
@@ -768,7 +831,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, NULL));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
@@ -3923,6 +3986,16 @@
[(set_attr "type" "bfm")]
)
+(define_insn "*aarch64_bfi<GPI:mode><ALLX:mode>4"
+ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+ (match_operand 1 "const_int_operand" "n")
+ (match_operand 2 "const_int_operand" "n"))
+ (zero_extend:GPI (match_operand:ALLX 3 "register_operand" "r")))]
+ "UINTVAL (operands[1]) <= <ALLX:sizen>"
+ "bfi\\t%<GPI:w>0, %<GPI:w>3, %2, %1"
+ [(set_attr "type" "bfm")]
+)
+
(define_insn "*extr_insv_lower_reg<mode>"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
(match_operand 1 "const_int_operand" "n")
@@ -4512,31 +4585,72 @@
(set_attr "length" "8")]
)
-(define_expand "tlsle"
- [(set (match_operand 0 "register_operand" "=r")
- (unspec [(match_operand 1 "register_operand" "r")
- (match_operand 2 "aarch64_tls_le_symref" "S")]
- UNSPEC_TLSLE))]
+(define_insn "tlsie_tiny_<mode>"
+ [(set (match_operand:PTR 0 "register_operand" "=&r")
+ (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")
+ (match_operand:PTR 2 "register_operand" "r")]
+ UNSPEC_GOTTINYTLS))]
""
-{
- machine_mode mode = GET_MODE (operands[0]);
- emit_insn ((mode == DImode
- ? gen_tlsle_di
- : gen_tlsle_si) (operands[0], operands[1], operands[2]));
- DONE;
-})
+ "ldr\\t%<w>0, %L1\;add\\t%<w>0, %<w>0, %<w>2"
+ [(set_attr "type" "multiple")
+ (set_attr "length" "8")]
+)
-(define_insn "tlsle_<mode>"
+(define_insn "tlsie_tiny_sidi"
+ [(set (match_operand:DI 0 "register_operand" "=&r")
+ (zero_extend:DI
+ (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")
+ (match_operand:DI 2 "register_operand" "r")
+ ]
+ UNSPEC_GOTTINYTLS)))]
+ ""
+ "ldr\\t%w0, %L1\;add\\t%<w>0, %<w>0, %<w>2"
+ [(set_attr "type" "multiple")
+ (set_attr "length" "8")]
+)
+
+(define_insn "tlsle12_<mode>"
[(set (match_operand:P 0 "register_operand" "=r")
- (unspec:P [(match_operand:P 1 "register_operand" "r")
- (match_operand 2 "aarch64_tls_le_symref" "S")]
- UNSPEC_TLSLE))]
+ (unspec:P [(match_operand:P 1 "register_operand" "r")
+ (match_operand 2 "aarch64_tls_le_symref" "S")]
+ UNSPEC_TLSLE12))]
""
- "add\\t%<w>0, %<w>1, #%G2, lsl #12\;add\\t%<w>0, %<w>0, #%L2"
+ "add\\t%<w>0, %<w>1, #%L2";
[(set_attr "type" "alu_sreg")
+ (set_attr "length" "4")]
+)
+
+(define_insn "tlsle24_<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec:P [(match_operand:P 1 "register_operand" "r")
+ (match_operand 2 "aarch64_tls_le_symref" "S")]
+ UNSPEC_TLSLE24))]
+ ""
+ "add\\t%<w>0, %<w>1, #%G2, lsl #12\;add\\t%<w>0, %<w>0, #%L2"
+ [(set_attr "type" "multiple")
(set_attr "length" "8")]
)
+(define_insn "tlsle32_<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec:P [(match_operand 1 "aarch64_tls_le_symref" "S")]
+ UNSPEC_TLSLE32))]
+ ""
+ "movz\\t%<w>0, #:tprel_g1:%1\;movk\\t%<w>0, #:tprel_g0_nc:%1"
+ [(set_attr "type" "multiple")
+ (set_attr "length" "8")]
+)
+
+(define_insn "tlsle48_<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec:P [(match_operand 1 "aarch64_tls_le_symref" "S")]
+ UNSPEC_TLSLE48))]
+ ""
+ "movz\\t%<w>0, #:tprel_g2:%1\;movk\\t%<w>0, #:tprel_g1_nc:%1\;movk\\t%<w>0, #:tprel_g0_nc:%1"
+ [(set_attr "type" "multiple")
+ (set_attr "length" "12")]
+)
+
(define_insn "tlsdesc_small_<mode>"
[(set (reg:PTR R0_REGNUM)
(unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 37c2c509fe2..8642bdb74f3 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -96,6 +96,25 @@ mtls-dialect=
Target RejectNegative Joined Enum(tls_type) Var(aarch64_tls_dialect) Init(TLS_DESCRIPTORS) Save
Specify TLS dialect
+mtls-size=
+Target RejectNegative Joined Var(aarch64_tls_size) Enum(aarch64_tls_size)
+Specifies bit size of immediate TLS offsets. Valid values are 12, 24, 32, 48.
+
+Enum
+Name(aarch64_tls_size) Type(int)
+
+EnumValue
+Enum(aarch64_tls_size) String(12) Value(12)
+
+EnumValue
+Enum(aarch64_tls_size) String(24) Value(24)
+
+EnumValue
+Enum(aarch64_tls_size) String(32) Value(32)
+
+EnumValue
+Enum(aarch64_tls_size) String(48) Value(48)
+
march=
Target RejectNegative ToLower Joined Var(aarch64_arch_string)
-march=ARCH Use features of architecture ARCH
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b8a45d1d6ed..475aa6e6d37 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -817,9 +817,15 @@
;; Emit cbz/cbnz depending on comparison type.
(define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")])
+;; Emit inverted cbz/cbnz depending on comparison type.
+(define_code_attr inv_cb [(eq "cbnz") (ne "cbz") (lt "cbz") (ge "cbnz")])
+
;; Emit tbz/tbnz depending on comparison type.
(define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")])
+;; Emit inverted tbz/tbnz depending on comparison type.
+(define_code_attr inv_tb [(eq "tbnz") (ne "tbz") (lt "tbz") (ge "tbnz")])
+
;; Max/min attributes.
(define_code_attr maxmin [(smax "max")
(smin "min")
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index 914daf33a5a..cf9636862f2 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -39,7 +39,7 @@
(define_insn_reservation "thunderx_shift" 1
(and (eq_attr "tune" "thunderx")
- (eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
+ (eq_attr "type" "bfm,extend,shift_imm,shift_reg,rbit,rev"))
"thunderx_pipe0 | thunderx_pipe1")
@@ -66,12 +66,18 @@
(eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
"thunderx_pipe1 + thunderx_mult")
-;; Multiply high instructions take an extra cycle and cause the muliply unit to
-;; be busy for an extra cycle.
+;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1
-;(define_insn_reservation "thunderx_mul_high" 5
+(define_insn_reservation "thunderx_crc32" 4
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "crc"))
+ "thunderx_pipe1 + thunderx_mult")
+
+;; crcx is 5 cycles and only happen on pipe 1
+;(define_insn_reservation "thunderx_crc64" 5
; (and (eq_attr "tune" "thunderx")
-; (eq_attr "type" "smull,umull"))
+; (eq_attr "type" "crc")
+; (eq_attr "mode" "DI"))
; "thunderx_pipe1 + thunderx_mult")
(define_insn_reservation "thunderx_div32" 22
@@ -97,6 +103,11 @@
(eq_attr "type" "store2"))
"thunderx_pipe0 + thunderx_pipe1")
+;; Prefetch are single issued
+;(define_insn_reservation "thunderx_prefetch" 1
+; (and (eq_attr "tune" "thunderx")
+; (eq_attr "type" "prefetch"))
+; "thunderx_pipe0 + thunderx_pipe1")
;; loads (and load pairs) from L1 take 3 cycles in pipe 0
(define_insn_reservation "thunderx_load" 3
@@ -121,10 +132,21 @@
(eq_attr "type" "fconsts,fconstd"))
"thunderx_pipe1")
-;; Moves between fp are 2 cycles including min/max/select/abs/neg
+;; Moves between fp are 2 cycles including min/max
(define_insn_reservation "thunderx_fmov" 2
(and (eq_attr "tune" "thunderx")
- (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
+ (eq_attr "type" "fmov,f_minmaxs,f_minmaxd"))
+ "thunderx_pipe1")
+
+;; ABS, and NEG are 1 cycle
+(define_insn_reservation "thunderx_fabs" 1
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "thunderx_pipe1")
+
+(define_insn_reservation "thunderx_fcsel" 3
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "fcsel"))
"thunderx_pipe1")
(define_insn_reservation "thunderx_fmovgpr" 2
@@ -132,6 +154,11 @@
(eq_attr "type" "f_mrc, f_mcr"))
"thunderx_pipe1")
+(define_insn_reservation "thunderx_fcmp" 3
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "thunderx_pipe1")
+
(define_insn_reservation "thunderx_fmul" 6
(and (eq_attr "tune" "thunderx")
(eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
@@ -152,21 +179,21 @@
(eq_attr "type" "fsqrts"))
"thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
-(define_insn_reservation "thunderx_fsqrtd" 28
+(define_insn_reservation "thunderx_fsqrtd" 31
(and (eq_attr "tune" "thunderx")
(eq_attr "type" "fsqrtd"))
- "thunderx_pipe1 + thunderx_divide, thunderx_divide*31")
+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*27")
;; The rounding conversion inside fp is 4 cycles
(define_insn_reservation "thunderx_frint" 4
(and (eq_attr "tune" "thunderx")
- (eq_attr "type" "f_rints,f_rintd"))
+ (eq_attr "type" "f_cvt,f_rints,f_rintd"))
"thunderx_pipe1")
;; Float to integer with a move from int to/from float is 6 cycles
(define_insn_reservation "thunderx_f_cvt" 6
(and (eq_attr "tune" "thunderx")
- (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+ (eq_attr "type" "f_cvtf2i,f_cvti2f"))
"thunderx_pipe1")
;; FP/SIMD load/stores happen in pipe 0
@@ -184,9 +211,12 @@
"thunderx_pipe0+thunderx_pipe1")
;; FP/SIMD Stores takes one cycle in pipe 0
+;; ST1 with one registers either multiple structures or single structure is
+;; also one cycle.
(define_insn_reservation "thunderx_simd_fp_store" 1
(and (eq_attr "tune" "thunderx")
- (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
+ (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \
+ neon_store1_one_lane, neon_store1_one_lane_q"))
"thunderx_pipe0")
;; 64bit neon store pairs are single issue for one cycle
@@ -201,24 +231,38 @@
(eq_attr "type" "neon_store1_2reg_q"))
"(thunderx_pipe0 + thunderx_pipe1)*2")
+;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0
+(define_insn_reservation "thunderx_neon_ld1" 6
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "neon_load1_all_lanes"))
+ "thunderx_pipe0")
;; SIMD/NEON (q forms take an extra cycle)
+;; SIMD For ThunderX is 64bit wide,
-;; Thunder simd move instruction types - 2/3 cycles
+;; ThunderX simd move instruction types - 2/3 cycles
+;; ThunderX dup, ins is the same
+;; ThunderX SIMD fabs/fneg instruction types
(define_insn_reservation "thunderx_neon_move" 2
(and (eq_attr "tune" "thunderx")
(eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
- neon_fp_compare_d, neon_move"))
+ neon_fp_compare_d, neon_move, neon_dup, \
+ neon_ins, neon_from_gp, neon_to_gp, \
+ neon_abs, neon_neg, \
+ neon_fp_neg_s, neon_fp_abs_s"))
"thunderx_pipe1 + thunderx_simd")
(define_insn_reservation "thunderx_neon_move_q" 3
(and (eq_attr "tune" "thunderx")
(eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
- neon_fp_compare_d_q, neon_move_q"))
+ neon_fp_compare_d_q, neon_move_q, neon_dup_q, \
+ neon_ins_q, neon_from_gp_q, neon_to_gp_q, \
+ neon_abs_q, neon_neg_q, \
+ neon_fp_neg_s_q, neon_fp_neg_d_q, \
+ neon_fp_abs_s_q, neon_fp_abs_d_q"))
"thunderx_pipe1 + thunderx_simd, thunderx_simd")
-
-;; Thunder simd simple/add instruction types - 4/5 cycles
+;; ThunderX simd simple/add instruction types - 4/5 cycles
(define_insn_reservation "thunderx_neon_add" 4
(and (eq_attr "tune" "thunderx")
@@ -227,7 +271,9 @@
neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
- neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
+ neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \
+ neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \
+ neon_fp_reduc_minmax_s"))
"thunderx_pipe1 + thunderx_simd")
;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
@@ -240,13 +286,74 @@
neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
- neon_add_long, neon_sub_long"))
+ neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \
+ neon_arith_acc_q, neon_rev_q, \
+ neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q"))
"thunderx_pipe1 + thunderx_simd, thunderx_simd")
+;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions
+;; are 6/7 cycles
+(define_insn_reservation "thunderx_neon_mult" 6
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \
+ neon_mla_b, neon_mla_h, neon_mla_s, \
+ neon_mla_h_scalar, neon_mla_s_scalar, \
+ neon_ext, neon_shift_imm, neon_permute, \
+ neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \
+ neon_sat_shift_reg, neon_shift_acc, \
+ neon_mul_b, neon_mul_h, neon_mul_s, \
+ neon_mul_h_scalar, neon_mul_s_scalar, \
+ neon_fp_mul_s_scalar, \
+ neon_fp_mla_s_scalar"))
+ "thunderx_pipe1 + thunderx_simd")
+
+(define_insn_reservation "thunderx_neon_mult_q" 7
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q, \
+ neon_ext_q, neon_shift_imm_q, neon_permute_q, \
+ neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \
+ neon_sat_shift_reg_q, neon_shift_acc_q, \
+ neon_shift_imm_long, \
+ neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q, \
+ neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \
+ neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \
+ neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \
+ neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q"))
+ "thunderx_pipe1 + thunderx_simd, thunderx_simd")
+
+
+;; AES[ED] is 5 cycles
+(define_insn_reservation "thunderx_crypto_aese" 5
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "crypto_aese"))
+ "thunderx_pipe1 + thunderx_simd, thunderx_simd")
-;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
-(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
-(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")
+;; AES{,I}MC is 3 cycles
+(define_insn_reservation "thunderx_crypto_aesmc" 3
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "crypto_aesmc"))
+ "thunderx_pipe1 + thunderx_simd, thunderx_simd")
+
+
+;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle
+(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
+(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
+(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
+
+;; 64bit TBL is emulated and takes 160 cycles
+(define_insn_reservation "thunderx_tbl" 160
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "neon_tbl1"))
+ "(thunderx_pipe1+thunderx_pipe0)*160")
+
+;; 128bit TBL is emulated and takes 320 cycles
+(define_insn_reservation "thunderx_tblq" 320
+ (and (eq_attr "tune" "thunderx")
+ (eq_attr "type" "neon_tbl1_q"))
+ "(thunderx_pipe1+thunderx_pipe0)*320")
;; Assume both pipes are needed for unknown and multiple-instruction
;; patterns.
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index ca07cc7052b..32bb36eec33 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -3110,7 +3110,7 @@ alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
}
tmp = gen_rtx_MEM (QImode, func);
- tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
+ tmp = emit_call_insn (gen_call_value (reg, tmp, const0_rtx,
const0_rtx, const0_rtx));
CALL_INSN_FUNCTION_USAGE (tmp) = usage;
RTL_CONST_CALL_P (tmp) = 1;
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 7626d3f0233..5068f60ad6c 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -3646,7 +3646,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def
index 3dafaa5fbc8..ddf6c3c330f 100644
--- a/gcc/config/arm/arm-arches.def
+++ b/gcc/config/arm/arm-arches.def
@@ -23,39 +23,40 @@
The NAME is the name of the architecture, represented as a string
constant. The CORE is the identifier for a core representative of
- this architecture. ARCH is the architecture revision. FLAGS are
- the flags implied by the architecture.
+ this architecture. ARCH is the architecture revision. FLAGS is
+ the set of feature flags implied by the architecture.
genopt.sh assumes no whitespace up to the first "," in each entry. */
-ARM_ARCH("armv2", arm2, 2, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)
-ARM_ARCH("armv2a", arm2, 2, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)
-ARM_ARCH("armv3", arm6, 3, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3)
-ARM_ARCH("armv3m", arm7m, 3M, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M)
-ARM_ARCH("armv4", arm7tdmi, 4, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4)
+ARM_ARCH("armv2", arm2, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2))
+ARM_ARCH("armv2a", arm2, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2))
+ARM_ARCH("armv3", arm6, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3))
+ARM_ARCH("armv3m", arm7m, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M))
+ARM_ARCH("armv4", arm7tdmi, 4, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4))
/* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
implementations that support it, so we will leave it out for now. */
-ARM_ARCH("armv4t", arm7tdmi, 4T, FL_CO_PROC | FL_FOR_ARCH4T)
-ARM_ARCH("armv5", arm10tdmi, 5, FL_CO_PROC | FL_FOR_ARCH5)
-ARM_ARCH("armv5t", arm10tdmi, 5T, FL_CO_PROC | FL_FOR_ARCH5T)
-ARM_ARCH("armv5e", arm1026ejs, 5E, FL_CO_PROC | FL_FOR_ARCH5E)
-ARM_ARCH("armv5te", arm1026ejs, 5TE, FL_CO_PROC | FL_FOR_ARCH5TE)
-ARM_ARCH("armv6", arm1136js, 6, FL_CO_PROC | FL_FOR_ARCH6)
-ARM_ARCH("armv6j", arm1136js, 6J, FL_CO_PROC | FL_FOR_ARCH6J)
-ARM_ARCH("armv6k", mpcore, 6K, FL_CO_PROC | FL_FOR_ARCH6K)
-ARM_ARCH("armv6z", arm1176jzs, 6Z, FL_CO_PROC | FL_FOR_ARCH6Z)
-ARM_ARCH("armv6kz", arm1176jzs, 6KZ, FL_CO_PROC | FL_FOR_ARCH6KZ)
-ARM_ARCH("armv6zk", arm1176jzs, 6KZ, FL_CO_PROC | FL_FOR_ARCH6KZ)
-ARM_ARCH("armv6t2", arm1156t2s, 6T2, FL_CO_PROC | FL_FOR_ARCH6T2)
-ARM_ARCH("armv6-m", cortexm1, 6M, FL_FOR_ARCH6M)
-ARM_ARCH("armv6s-m", cortexm1, 6M, FL_FOR_ARCH6M)
-ARM_ARCH("armv7", cortexa8, 7, FL_CO_PROC | FL_FOR_ARCH7)
-ARM_ARCH("armv7-a", cortexa8, 7A, FL_CO_PROC | FL_FOR_ARCH7A)
-ARM_ARCH("armv7ve", cortexa8, 7A, FL_CO_PROC | FL_FOR_ARCH7VE)
-ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R)
-ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M)
-ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM)
-ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A)
-ARM_ARCH("armv8-a+crc",cortexa53, 8A,FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A)
-ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)
-ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
+ARM_ARCH("armv4t", arm7tdmi, 4T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T))
+ARM_ARCH("armv5", arm10tdmi, 5, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5))
+ARM_ARCH("armv5t", arm10tdmi, 5T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5T))
+ARM_ARCH("armv5e", arm1026ejs, 5E, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5E))
+ARM_ARCH("armv5te", arm1026ejs, 5TE, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH5TE))
+ARM_ARCH("armv6", arm1136js, 6, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6))
+ARM_ARCH("armv6j", arm1136js, 6J, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6J))
+ARM_ARCH("armv6k", mpcore, 6K, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6K))
+ARM_ARCH("armv6z", arm1176jzs, 6Z, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6Z))
+ARM_ARCH("armv6kz", arm1176jzs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6KZ))
+ARM_ARCH("armv6zk", arm1176jzs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6KZ))
+ARM_ARCH("armv6t2", arm1156t2s, 6T2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH6T2))
+ARM_ARCH("armv6-m", cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_FOR_ARCH6M))
+ARM_ARCH("armv6s-m", cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_FOR_ARCH6M))
+ARM_ARCH("armv7", cortexa8, 7, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7))
+ARM_ARCH("armv7-a", cortexa8, 7A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7A))
+ARM_ARCH("armv7ve", cortexa8, 7A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7VE))
+ARM_ARCH("armv7-r", cortexr4, 7R, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7R))
+ARM_ARCH("armv7-m", cortexm3, 7M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7M))
+ARM_ARCH("armv7e-m", cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH7EM))
+ARM_ARCH("armv8-a", cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH8A))
+ARM_ARCH("armv8-a+crc",cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A))
+ARM_ARCH("iwmmxt", iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT))
+ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2))
+
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 030d8d19441..4391f17c655 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -769,13 +769,6 @@ arm_init_simd_builtin_types (void)
int nelts = sizeof (arm_simd_types) / sizeof (arm_simd_types[0]);
tree tdecl;
- /* Initialize the HFmode scalar type. */
- arm_simd_floatHF_type_node = make_node (REAL_TYPE);
- TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
- layout_type (arm_simd_floatHF_type_node);
- (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node,
- "__builtin_neon_hf");
-
/* Poly types are a world of their own. In order to maintain legacy
ABI, they get initialized using the old interface, and don't get
an entry in our mangling table, consequently, they get default
@@ -823,6 +816,8 @@ arm_init_simd_builtin_types (void)
mangling. */
/* Continue with standard types. */
+ /* The __builtin_simd{64,128}_float16 types are kept private unless
+ we have a scalar __fp16 type. */
arm_simd_types[Float16x4_t].eltype = arm_simd_floatHF_type_node;
arm_simd_types[Float32x2_t].eltype = float_type_node;
arm_simd_types[Float32x4_t].eltype = float_type_node;
@@ -1106,10 +1101,11 @@ arm_init_neon_builtins (void)
#undef NUM_DREG_TYPES
#undef NUM_QREG_TYPES
-#define def_mbuiltin(MASK, NAME, TYPE, CODE) \
+#define def_mbuiltin(FLAGS, NAME, TYPE, CODE) \
do \
{ \
- if ((MASK) & insn_flags) \
+ const arm_feature_set flags = FLAGS; \
+ if (ARM_FSET_CPU_SUBSET (flags, insn_flags)) \
{ \
tree bdecl; \
bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
@@ -1121,7 +1117,7 @@ arm_init_neon_builtins (void)
struct builtin_description
{
- const unsigned int mask;
+ const arm_feature_set features;
const enum insn_code icode;
const char * const name;
const enum arm_builtins code;
@@ -1132,11 +1128,13 @@ struct builtin_description
static const struct builtin_description bdesc_2arg[] =
{
#define IWMMXT_BUILTIN(code, string, builtin) \
- { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
+ { ARM_FSET_MAKE_CPU1 (FL_IWMMXT), CODE_FOR_##code, \
+ "__builtin_arm_" string, \
ARM_BUILTIN_##builtin, UNKNOWN, 0 },
#define IWMMXT2_BUILTIN(code, string, builtin) \
- { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
+ { ARM_FSET_MAKE_CPU1 (FL_IWMMXT2), CODE_FOR_##code, \
+ "__builtin_arm_" string, \
ARM_BUILTIN_##builtin, UNKNOWN, 0 },
IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
@@ -1219,10 +1217,12 @@ static const struct builtin_description bdesc_2arg[] =
IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
#define IWMMXT_BUILTIN2(code, builtin) \
- { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+ { ARM_FSET_MAKE_CPU1 (FL_IWMMXT), CODE_FOR_##code, NULL, \
+ ARM_BUILTIN_##builtin, UNKNOWN, 0 },
#define IWMMXT2_BUILTIN2(code, builtin) \
- { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+ { ARM_FSET_MAKE_CPU2 (FL_IWMMXT2), CODE_FOR_##code, NULL, \
+ ARM_BUILTIN_##builtin, UNKNOWN, 0 },
IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
@@ -1237,7 +1237,7 @@ static const struct builtin_description bdesc_2arg[] =
#define FP_BUILTIN(L, U) \
- {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
+ {ARM_FSET_EMPTY, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
UNKNOWN, 0},
FP_BUILTIN (get_fpscr, GET_FPSCR)
@@ -1245,8 +1245,8 @@ static const struct builtin_description bdesc_2arg[] =
#undef FP_BUILTIN
#define CRC32_BUILTIN(L, U) \
- {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
- UNKNOWN, 0},
+ {ARM_FSET_EMPTY, CODE_FOR_##L, "__builtin_arm_"#L, \
+ ARM_BUILTIN_##U, UNKNOWN, 0},
CRC32_BUILTIN (crc32b, CRC32B)
CRC32_BUILTIN (crc32h, CRC32H)
CRC32_BUILTIN (crc32w, CRC32W)
@@ -1256,9 +1256,9 @@ static const struct builtin_description bdesc_2arg[] =
#undef CRC32_BUILTIN
-#define CRYPTO_BUILTIN(L, U) \
- {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
- UNKNOWN, 0},
+#define CRYPTO_BUILTIN(L, U) \
+ {ARM_FSET_EMPTY, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, \
+ ARM_BUILTIN_CRYPTO_##U, UNKNOWN, 0},
#undef CRYPTO1
#undef CRYPTO2
#undef CRYPTO3
@@ -1514,7 +1514,9 @@ arm_init_iwmmxt_builtins (void)
machine_mode mode;
tree type;
- if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
+ if (d->name == 0 ||
+ !(ARM_FSET_HAS_CPU1 (d->features, FL_IWMMXT) ||
+ ARM_FSET_HAS_CPU1 (d->features, FL_IWMMXT2)))
continue;
mode = insn_data[d->icode].operand[1].mode;
@@ -1538,17 +1540,17 @@ arm_init_iwmmxt_builtins (void)
gcc_unreachable ();
}
- def_mbuiltin (d->mask, d->name, type, d->code);
+ def_mbuiltin (d->features, d->name, type, d->code);
}
/* Add the remaining MMX insns with somewhat more complicated types. */
#define iwmmx_mbuiltin(NAME, TYPE, CODE) \
- def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
- ARM_BUILTIN_ ## CODE)
+ def_mbuiltin (ARM_FSET_MAKE_CPU1 (FL_IWMMXT), "__builtin_arm_" NAME, \
+ (TYPE), ARM_BUILTIN_ ## CODE)
#define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
- def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
- ARM_BUILTIN_ ## CODE)
+ def_mbuiltin (ARM_FSET_MAKE_CPU1 (FL_IWMMXT2), "__builtin_arm_" NAME, \
+ (TYPE), ARM_BUILTIN_ ## CODE)
iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
@@ -1702,10 +1704,12 @@ arm_init_iwmmxt_builtins (void)
static void
arm_init_fp16_builtins (void)
{
- tree fp16_type = make_node (REAL_TYPE);
- TYPE_PRECISION (fp16_type) = 16;
- layout_type (fp16_type);
- (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+ arm_simd_floatHF_type_node = make_node (REAL_TYPE);
+ TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
+ layout_type (arm_simd_floatHF_type_node);
+ if (arm_fp16_format)
+ (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node,
+ "__fp16");
}
static void
@@ -1750,12 +1754,13 @@ arm_init_builtins (void)
if (TARGET_REALLY_IWMMXT)
arm_init_iwmmxt_builtins ();
+ /* This creates the arm_simd_floatHF_type_node so must come before
+ arm_init_neon_builtins which uses it. */
+ arm_init_fp16_builtins ();
+
if (TARGET_NEON)
arm_init_neon_builtins ();
- if (arm_fp16_format)
- arm_init_fp16_builtins ();
-
if (TARGET_CRC32)
arm_init_crc32_builtins ();
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 9d47fcfbcd7..4c35200b3f8 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -33,7 +33,7 @@
The TUNE_IDENT is the name of the core for which scheduling decisions
should be made, represented as an identifier.
ARCH is the architecture revision implemented by the chip.
- FLAGS are the bitwise-or of the traits that apply to that core.
+ FLAGS is the set of feature flags of that core.
This need not include flags implied by the architecture.
COSTS is the name of the rtx_costs routine to use.
@@ -43,134 +43,134 @@
Some tools assume no whitespace up to the first "," in each entry. */
/* V2/V2A Architecture Processors */
-ARM_CORE("arm2", arm2, arm2, 2, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm250", arm250, arm250, 2, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm3", arm3, arm3, 2, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm2", arm2, arm2, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul)
+ARM_CORE("arm250", arm250, arm250, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul)
+ARM_CORE("arm3", arm3, arm3, 2, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul)
/* V3 Architecture Processors */
-ARM_CORE("arm6", arm6, arm6, 3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm60", arm60, arm60, 3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm600", arm600, arm600, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm610", arm610, arm610, 3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm620", arm620, arm620, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm7", arm7, arm7, 3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm7d", arm7d, arm7d, 3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm7di", arm7di, arm7di, 3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm70", arm70, arm70, 3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm700", arm700, arm700, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm700i", arm700i, arm700i, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm710", arm710, arm710, 3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm720", arm720, arm720, 3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm710c", arm710c, arm710c, 3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm7100", arm7100, arm7100, 3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm7500", arm7500, arm7500, 3, FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm6", arm6, arm6, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm60", arm60, arm60, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm600", arm600, arm600, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm610", arm610, arm610, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm620", arm620, arm620, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7", arm7, arm7, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7d", arm7d, arm7d, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7di", arm7di, arm7di, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm70", arm70, arm70, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm700", arm700, arm700, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm700i", arm700i, arm700i, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm710", arm710, arm710, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm720", arm720, arm720, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm710c", arm710c, arm710c, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7100", arm7100, arm7100, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7500", arm7500, arm7500, 3, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
/* Doesn't have an external co-proc, but does have embedded fpa. */
-ARM_CORE("arm7500fe", arm7500fe, arm7500fe, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7500fe", arm7500fe, arm7500fe, 3, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
/* V3M Architecture Processors */
/* arm7m doesn't exist on its own, but only with D, ("and", and I), but
those don't alter the code, so arm7m is sometimes used. */
-ARM_CORE("arm7m", arm7m, arm7m, 3M, FL_CO_PROC | FL_MODE26, fastmul)
-ARM_CORE("arm7dm", arm7dm, arm7dm, 3M, FL_CO_PROC | FL_MODE26, fastmul)
-ARM_CORE("arm7dmi", arm7dmi, arm7dmi, 3M, FL_CO_PROC | FL_MODE26, fastmul)
+ARM_CORE("arm7m", arm7m, arm7m, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul)
+ARM_CORE("arm7dm", arm7dm, arm7dm, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul)
+ARM_CORE("arm7dmi", arm7dmi, arm7dmi, 3M, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul)
/* V4 Architecture Processors */
-ARM_CORE("arm8", arm8, arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul)
-ARM_CORE("arm810", arm810, arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul)
-ARM_CORE("strongarm", strongarm, strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("strongarm110", strongarm110, strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("fa526", fa526, fa526, 4, FL_LDSCHED, fastmul)
-ARM_CORE("fa626", fa626, fa626, 4, FL_LDSCHED, fastmul)
+ARM_CORE("arm8", arm8, arm8, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_FOR_ARCH4), fastmul)
+ARM_CORE("arm810", arm810, arm810, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_FOR_ARCH4), fastmul)
+ARM_CORE("strongarm", strongarm, strongarm, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("strongarm110", strongarm110, strongarm110, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4, ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("fa526", fa526, fa526, 4, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4), fastmul)
+ARM_CORE("fa626", fa626, fa626, 4, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4), fastmul)
/* V4T Architecture Processors */
-ARM_CORE("arm7tdmi", arm7tdmi, arm7tdmi, 4T, FL_CO_PROC, fastmul)
-ARM_CORE("arm7tdmi-s", arm7tdmis, arm7tdmis, 4T, FL_CO_PROC, fastmul)
-ARM_CORE("arm710t", arm710t, arm710t, 4T, FL_WBUF, fastmul)
-ARM_CORE("arm720t", arm720t, arm720t, 4T, FL_WBUF, fastmul)
-ARM_CORE("arm740t", arm740t, arm740t, 4T, FL_WBUF, fastmul)
-ARM_CORE("arm9", arm9, arm9, 4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm9tdmi", arm9tdmi, arm9tdmi, 4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm920", arm920, arm920, 4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm920t", arm920t, arm920t, 4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm922t", arm922t, arm922t, 4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm940t", arm940t, arm940t, 4T, FL_LDSCHED, fastmul)
-ARM_CORE("ep9312", ep9312, ep9312, 4T, FL_LDSCHED, fastmul)
+ARM_CORE("arm7tdmi", arm7tdmi, arm7tdmi, 4T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm7tdmi-s", arm7tdmis, arm7tdmis, 4T, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm710t", arm710t, arm710t, 4T, ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm720t", arm720t, arm720t, 4T, ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm740t", arm740t, arm740t, 4T, ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm9", arm9, arm9, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm9tdmi", arm9tdmi, arm9tdmi, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm920", arm920, arm920, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm920t", arm920t, arm920t, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm922t", arm922t, arm922t, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm940t", arm940t, arm940t, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("ep9312", ep9312, ep9312, 4T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
/* V5T Architecture Processors */
-ARM_CORE("arm10tdmi", arm10tdmi, arm10tdmi, 5T, FL_LDSCHED, fastmul)
-ARM_CORE("arm1020t", arm1020t, arm1020t, 5T, FL_LDSCHED, fastmul)
+ARM_CORE("arm10tdmi", arm10tdmi, arm10tdmi, 5T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5T), fastmul)
+ARM_CORE("arm1020t", arm1020t, arm1020t, 5T, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5T), fastmul)
/* V5TE Architecture Processors */
-ARM_CORE("arm9e", arm9e, arm9e, 5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm946e-s", arm946es, arm946es, 5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm966e-s", arm966es, arm966es, 5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm968e-s", arm968es, arm968es, 5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm10e", arm10e, arm10e, 5TE, FL_LDSCHED, fastmul)
-ARM_CORE("arm1020e", arm1020e, arm1020e, 5TE, FL_LDSCHED, fastmul)
-ARM_CORE("arm1022e", arm1022e, arm1022e, 5TE, FL_LDSCHED, fastmul)
-ARM_CORE("xscale", xscale, xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
-ARM_CORE("iwmmxt", iwmmxt, iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
-ARM_CORE("iwmmxt2", iwmmxt2, iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2, xscale)
-ARM_CORE("fa606te", fa606te, fa606te, 5TE, FL_LDSCHED, 9e)
-ARM_CORE("fa626te", fa626te, fa626te, 5TE, FL_LDSCHED, 9e)
-ARM_CORE("fmp626", fmp626, fmp626, 5TE, FL_LDSCHED, 9e)
-ARM_CORE("fa726te", fa726te, fa726te, 5TE, FL_LDSCHED, fa726te)
+ARM_CORE("arm9e", arm9e, arm9e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm946e-s", arm946es, arm946es, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm966e-s", arm966es, arm966es, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm968e-s", arm968es, arm968es, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm10e", arm10e, arm10e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul)
+ARM_CORE("arm1020e", arm1020e, arm1020e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul)
+ARM_CORE("arm1022e", arm1022e, arm1022e, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul)
+ARM_CORE("xscale", xscale, xscale, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_FOR_ARCH5TE), xscale)
+ARM_CORE("iwmmxt", iwmmxt, iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_FOR_ARCH5TE), xscale)
+ARM_CORE("iwmmxt2", iwmmxt2, iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2 | FL_FOR_ARCH5TE), xscale)
+ARM_CORE("fa606te", fa606te, fa606te, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("fa626te", fa626te, fa626te, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("fmp626", fmp626, fmp626, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("fa726te", fa726te, fa726te, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fa726te)
/* V5TEJ Architecture Processors */
-ARM_CORE("arm926ej-s", arm926ejs, arm926ejs, 5TEJ, FL_LDSCHED, 9e)
-ARM_CORE("arm1026ej-s", arm1026ejs, arm1026ejs, 5TEJ, FL_LDSCHED, 9e)
+ARM_CORE("arm926ej-s", arm926ejs, arm926ejs, 5TEJ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TEJ), 9e)
+ARM_CORE("arm1026ej-s", arm1026ejs, arm1026ejs, 5TEJ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TEJ), 9e)
/* V6 Architecture Processors */
-ARM_CORE("arm1136j-s", arm1136js, arm1136js, 6J, FL_LDSCHED, 9e)
-ARM_CORE("arm1136jf-s", arm1136jfs, arm1136jfs, 6J, FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("arm1176jz-s", arm1176jzs, arm1176jzs, 6KZ, FL_LDSCHED, 9e)
-ARM_CORE("arm1176jzf-s", arm1176jzfs, arm1176jzfs, 6KZ, FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("mpcorenovfp", mpcorenovfp, mpcorenovfp, 6K, FL_LDSCHED, 9e)
-ARM_CORE("mpcore", mpcore, mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("arm1156t2-s", arm1156t2s, arm1156t2s, 6T2, FL_LDSCHED, v6t2)
-ARM_CORE("arm1156t2f-s", arm1156t2fs, arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2)
+ARM_CORE("arm1136j-s", arm1136js, arm1136js, 6J, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6J), 9e)
+ARM_CORE("arm1136jf-s", arm1136jfs, arm1136jfs, 6J, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6J), 9e)
+ARM_CORE("arm1176jz-s", arm1176jzs, arm1176jzs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6KZ), 9e)
+ARM_CORE("arm1176jzf-s", arm1176jzfs, arm1176jzfs, 6KZ, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6KZ), 9e)
+ARM_CORE("mpcorenovfp", mpcorenovfp, mpcorenovfp, 6K, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6K), 9e)
+ARM_CORE("mpcore", mpcore, mpcore, 6K, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6K), 9e)
+ARM_CORE("arm1156t2-s", arm1156t2s, arm1156t2s, 6T2, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6T2), v6t2)
+ARM_CORE("arm1156t2f-s", arm1156t2fs, arm1156t2fs, 6T2, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6T2), v6t2)
/* V6M Architecture Processors */
-ARM_CORE("cortex-m1", cortexm1, cortexm1, 6M, FL_LDSCHED, v6m)
-ARM_CORE("cortex-m0", cortexm0, cortexm0, 6M, FL_LDSCHED, v6m)
-ARM_CORE("cortex-m0plus", cortexm0plus, cortexm0plus, 6M, FL_LDSCHED, v6m)
+ARM_CORE("cortex-m1", cortexm1, cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0", cortexm0, cortexm0, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0plus", cortexm0plus, cortexm0plus, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m)
/* V6M Architecture Processors for small-multiply implementations. */
-ARM_CORE("cortex-m1.small-multiply", cortexm1smallmultiply, cortexm1, 6M, FL_LDSCHED | FL_SMALLMUL, v6m)
-ARM_CORE("cortex-m0.small-multiply", cortexm0smallmultiply, cortexm0, 6M, FL_LDSCHED | FL_SMALLMUL, v6m)
-ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply, cortexm0plus,6M, FL_LDSCHED | FL_SMALLMUL, v6m)
+ARM_CORE("cortex-m1.small-multiply", cortexm1smallmultiply, cortexm1, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0.small-multiply", cortexm0smallmultiply, cortexm0, 6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply, cortexm0plus,6M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m)
/* V7 Architecture Processors */
-ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex)
-ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5)
-ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7)
-ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex_a8)
-ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9)
-ARM_CORE("cortex-a12", cortexa12, cortexa17, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
-ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
-ARM_CORE("cortex-a17", cortexa17, cortexa17, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
-ARM_CORE("cortex-r4", cortexr4, cortexr4, 7R, FL_LDSCHED, cortex)
-ARM_CORE("cortex-r4f", cortexr4f, cortexr4f, 7R, FL_LDSCHED, cortex)
-ARM_CORE("cortex-r5", cortexr5, cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
-ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
-ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7)
-ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m)
-ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m)
-ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, marvell_pj4)
+ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex)
+ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a5)
+ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a7)
+ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a8)
+ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a9)
+ARM_CORE("cortex-a12", cortexa12, cortexa17, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12)
+ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a15)
+ARM_CORE("cortex-a17", cortexa17, cortexa17, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12)
+ARM_CORE("cortex-r4", cortexr4, cortexr4, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-r4f", cortexr4f, cortexr4f, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-r5", cortexr5, cortexr5, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARM_DIV | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARM_DIV | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_NO_VOLATILE_CE | FL_FOR_ARCH7EM), cortex_m7)
+ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7EM), v7m)
+ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7M), v7m)
+ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), marvell_pj4)
/* V7 big.LITTLE implementations */
-ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
-ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
+ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a15)
+ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12)
/* V8 Architecture Processors */
-ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a53)
-ARM_CORE("cortex-a57", cortexa57, cortexa57, 8A, FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("cortex-a72", cortexa72, cortexa57, 8A, FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("exynos-m1", exynosm1, cortexa57, 8A, FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("xgene1", xgene1, xgene1, 8A, FL_LDSCHED, xgene1)
+ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a53)
+ARM_CORE("cortex-a57", cortexa57, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("cortex-a72", cortexa72, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("exynos-m1", exynosm1, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("xgene1", xgene1, xgene1, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8A), xgene1)
/* V8 big.LITTLE implementations */
-ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index cef9eec277d..8df312f3c67 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -319,6 +319,7 @@ extern int vfp3_const_double_for_bits (rtx);
extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
rtx);
+extern bool arm_valid_symbolic_address_p (rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
#endif /* RTX_CODE */
@@ -346,6 +347,8 @@ extern bool arm_is_constant_pool_ref (rtx);
/* Flags used to identify the presence of processor capabilities. */
/* Bit values used to identify processor capabilities. */
+#define FL_NONE (0) /* No flags. */
+#define FL_ANY (0xffffffff) /* All flags. */
#define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
#define FL_ARCH3M (1 << 1) /* Extended multiply */
#define FL_MODE26 (1 << 2) /* 26-bit mode support */
@@ -413,13 +416,116 @@ extern bool arm_is_constant_pool_ref (rtx);
#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
#define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
+/* There are too many feature bits to fit in a single word so the set of cpu and
+ fpu capabilities is a structure. A feature set is created and manipulated
+ with the ARM_FSET macros. */
+
+typedef struct
+{
+ unsigned long cpu[2];
+} arm_feature_set;
+
+
+/* Initialize a feature set. */
+
+#define ARM_FSET_MAKE(CPU1,CPU2) { { (CPU1), (CPU2) } }
+
+#define ARM_FSET_MAKE_CPU1(CPU1) ARM_FSET_MAKE ((CPU1), (FL_NONE))
+#define ARM_FSET_MAKE_CPU2(CPU2) ARM_FSET_MAKE ((FL_NONE), (CPU2))
+
+/* Accessors. */
+
+#define ARM_FSET_CPU1(S) ((S).cpu[0])
+#define ARM_FSET_CPU2(S) ((S).cpu[1])
+
+/* Useful combinations. */
+
+#define ARM_FSET_EMPTY ARM_FSET_MAKE (FL_NONE, FL_NONE)
+#define ARM_FSET_ANY ARM_FSET_MAKE (FL_ANY, FL_ANY)
+
+/* Tests for a specific CPU feature. */
+
+#define ARM_FSET_HAS_CPU1(A, F) \
+ (((A).cpu[0] & ((unsigned long)(F))) == ((unsigned long)(F)))
+#define ARM_FSET_HAS_CPU2(A, F) \
+ (((A).cpu[1] & ((unsigned long)(F))) == ((unsigned long)(F)))
+#define ARM_FSET_HAS_CPU(A, F1, F2) \
+ (ARM_FSET_HAS_CPU1 ((A), (F1)) && ARM_FSET_HAS_CPU2 ((A), (F2)))
+
+/* Add a feature to a feature set. */
+
+#define ARM_FSET_ADD_CPU1(DST, F) \
+ do { \
+ (DST).cpu[0] |= (F); \
+ } while (0)
+
+#define ARM_FSET_ADD_CPU2(DST, F) \
+ do { \
+ (DST).cpu[1] |= (F); \
+ } while (0)
+
+/* Remove a feature from a feature set. */
+
+#define ARM_FSET_DEL_CPU1(DST, F) \
+ do { \
+ (DST).cpu[0] &= ~(F); \
+ } while (0)
+
+#define ARM_FSET_DEL_CPU2(DST, F) \
+ do { \
+ (DST).cpu[1] &= ~(F); \
+ } while (0)
+
+/* Union of feature sets. */
+
+#define ARM_FSET_UNION(DST,F1,F2) \
+ do { \
+ (DST).cpu[0] = (F1).cpu[0] | (F2).cpu[0]; \
+ (DST).cpu[1] = (F1).cpu[1] | (F2).cpu[1]; \
+ } while (0)
+
+/* Intersection of feature sets. */
+
+#define ARM_FSET_INTER(DST,F1,F2) \
+ do { \
+ (DST).cpu[0] = (F1).cpu[0] & (F2).cpu[0]; \
+ (DST).cpu[1] = (F1).cpu[1] & (F2).cpu[1]; \
+ } while (0)
+
+/* Exclusive disjunction. */
+
+#define ARM_FSET_XOR(DST,F1,F2) \
+ do { \
+ (DST).cpu[0] = (F1).cpu[0] ^ (F2).cpu[0]; \
+ (DST).cpu[1] = (F1).cpu[1] ^ (F2).cpu[1]; \
+ } while (0)
+
+/* Difference of feature sets: F1 excluding the elements of F2. */
+
+#define ARM_FSET_EXCLUDE(DST,F1,F2) \
+ do { \
+ (DST).cpu[0] = (F1).cpu[0] & ~(F2).cpu[0]; \
+ (DST).cpu[1] = (F1).cpu[1] & ~(F2).cpu[1]; \
+ } while (0)
+
+/* Test for an empty feature set. */
+
+#define ARM_FSET_IS_EMPTY(A) \
+ (!((A).cpu[0]) && !((A).cpu[1]))
+
+/* Tests whether the cpu features of A are a subset of B. */
+
+#define ARM_FSET_CPU_SUBSET(A,B) \
+ ((((A).cpu[0] & (B).cpu[0]) == (A).cpu[0]) \
+ && (((A).cpu[1] & (B).cpu[1]) == (A).cpu[1]))
+
/* The bits in this mask specify which
instructions we are allowed to generate. */
-extern unsigned long insn_flags;
+extern arm_feature_set insn_flags;
/* The bits in this mask specify which instruction scheduling options should
be used. */
-extern unsigned long tune_flags;
+extern arm_feature_set tune_flags;
/* Nonzero if this chip supports the ARM Architecture 3M extensions. */
extern int arm_arch3m;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 57702cbc8de..fa4e083adfe 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -97,6 +97,7 @@ static void arm_add_gc_roots (void);
static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int, int);
static unsigned bit_count (unsigned long);
+static unsigned feature_count (const arm_feature_set*);
static int arm_address_register_rtx_p (rtx, int);
static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
static bool is_called_in_ARM_mode (tree);
@@ -767,11 +768,11 @@ static int thumb_call_reg_needed;
/* The bits in this mask specify which
instructions we are allowed to generate. */
-unsigned long insn_flags = 0;
+arm_feature_set insn_flags = ARM_FSET_EMPTY;
/* The bits in this mask specify which instruction scheduling options should
be used. */
-unsigned long tune_flags = 0;
+arm_feature_set tune_flags = ARM_FSET_EMPTY;
/* The highest ARM architecture version supported by the
target. */
@@ -927,7 +928,7 @@ struct processors
enum processor_type core;
const char *arch;
enum base_architecture base_arch;
- const unsigned long flags;
+ const arm_feature_set flags;
const struct tune_params *const tune;
};
@@ -2196,10 +2197,10 @@ static const struct processors all_cores[] =
/* ARM Cores */
#define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
{NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
- FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
+ FLAGS, &arm_##COSTS##_tune},
#include "arm-cores.def"
#undef ARM_CORE
- {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
+ {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
};
static const struct processors all_architectures[] =
@@ -2212,7 +2213,7 @@ static const struct processors all_architectures[] =
{NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
#include "arm-arches.def"
#undef ARM_ARCH
- {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
+ {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
};
@@ -2278,6 +2279,14 @@ bit_count (unsigned long value)
return count;
}
+/* Return the number of features in feature-set SET. */
+static unsigned
+feature_count (const arm_feature_set * set)
+{
+ return (bit_count (ARM_FSET_CPU1 (*set))
+ + bit_count (ARM_FSET_CPU2 (*set)));
+}
+
typedef struct
{
machine_mode mode;
@@ -2703,7 +2712,7 @@ arm_option_check_internal (struct gcc_options *opts)
/* Make sure that the processor choice does not conflict with any of the
other command line choices. */
- if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM))
+ if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
error ("target CPU does not support ARM mode");
/* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
@@ -2803,7 +2812,8 @@ static void
arm_option_override_internal (struct gcc_options *opts,
struct gcc_options *opts_set)
{
- if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB))
+ if (TARGET_THUMB_P (opts->x_target_flags)
+ && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
{
warning (0, "target CPU does not support THUMB instructions");
opts->x_target_flags &= ~MASK_THUMB;
@@ -2890,8 +2900,13 @@ arm_option_override (void)
{
if (arm_selected_cpu)
{
+ const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
+ arm_feature_set selected_flags;
+ ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
+ arm_selected_arch->flags);
+ ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
/* Check for conflict between mcpu and march. */
- if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
+ if (!ARM_FSET_IS_EMPTY (selected_flags))
{
warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
arm_selected_cpu->name, arm_selected_arch->name);
@@ -2915,7 +2930,7 @@ arm_option_override (void)
if (!arm_selected_cpu)
{
const struct processors * sel;
- unsigned int sought;
+ arm_feature_set sought = ARM_FSET_EMPTY;;
arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
if (!arm_selected_cpu->name)
@@ -2935,26 +2950,27 @@ arm_option_override (void)
/* Now check to see if the user has specified some command line
switch that require certain abilities from the cpu. */
- sought = 0;
if (TARGET_INTERWORK || TARGET_THUMB)
{
- sought |= (FL_THUMB | FL_MODE32);
+ ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
+ ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
/* There are no ARM processors that support both APCS-26 and
interworking. Therefore we force FL_MODE26 to be removed
from insn_flags here (if it was set), so that the search
below will always be able to find a compatible processor. */
- insn_flags &= ~FL_MODE26;
+ ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
}
- if (sought != 0 && ((sought & insn_flags) != sought))
+ if (!ARM_FSET_IS_EMPTY (sought)
+ && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
{
/* Try to locate a CPU type that supports all of the abilities
of the default CPU, plus the extra abilities requested by
the user. */
for (sel = all_cores; sel->name != NULL; sel++)
- if ((sel->flags & sought) == (sought | insn_flags))
+ if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
break;
if (sel->name == NULL)
@@ -2974,19 +2990,23 @@ arm_option_override (void)
command line options we scan the array again looking
for a best match. */
for (sel = all_cores; sel->name != NULL; sel++)
- if ((sel->flags & sought) == sought)
- {
- unsigned count;
-
- count = bit_count (sel->flags & insn_flags);
-
- if (count >= current_bit_count)
- {
- best_fit = sel;
- current_bit_count = count;
- }
- }
+ {
+ arm_feature_set required = ARM_FSET_EMPTY;
+ ARM_FSET_UNION (required, sought, insn_flags);
+ if (ARM_FSET_CPU_SUBSET (required, sel->flags))
+ {
+ unsigned count;
+ arm_feature_set flags;
+ ARM_FSET_INTER (flags, sel->flags, insn_flags);
+ count = feature_count (&flags);
+ if (count >= current_bit_count)
+ {
+ best_fit = sel;
+ current_bit_count = count;
+ }
+ }
+ }
gcc_assert (best_fit);
sel = best_fit;
}
@@ -3014,7 +3034,8 @@ arm_option_override (void)
/* BPABI targets use linker tricks to allow interworking on cores
without thumb support. */
- if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
+ if (TARGET_INTERWORK
+ && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
{
warning (0, "target CPU does not support interworking" );
target_flags &= ~MASK_INTERWORK;
@@ -3039,34 +3060,34 @@ arm_option_override (void)
warning (0, "passing floating point arguments in fp regs not yet supported");
/* Initialize boolean versions of the flags, for use in the arm.md file. */
- arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
- arm_arch4 = (insn_flags & FL_ARCH4) != 0;
- arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
- arm_arch5 = (insn_flags & FL_ARCH5) != 0;
- arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
- arm_arch6 = (insn_flags & FL_ARCH6) != 0;
- arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
- arm_arch6kz = arm_arch6k && (insn_flags & FL_ARCH6KZ);
- arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+ arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
+ arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
+ arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
+ arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
+ arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
+ arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
+ arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
+ arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
+ arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
arm_arch6m = arm_arch6 && !arm_arch_notm;
- arm_arch7 = (insn_flags & FL_ARCH7) != 0;
- arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
- arm_arch8 = (insn_flags & FL_ARCH8) != 0;
- arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
- arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
-
- arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
- arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
- arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
- arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
- arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
- arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
- arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
- arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
- arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
+ arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
+ arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
+ arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
+ arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
+ arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
+
+ arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
+ arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
+ arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
+ arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
+ arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
+ arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
+ arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
+ arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
+ arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
- arm_arch_crc = (insn_flags & FL_CRC32) != 0;
- arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
+ arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
+ arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
/* V5 code we generate is completely interworking capable, so we turn off
TARGET_INTERWORK here to avoid many tests later on. */
@@ -3158,7 +3179,7 @@ arm_option_override (void)
/* For arm2/3 there is no need to do any scheduling if we are doing
software floating-point. */
- if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
+ if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
flag_schedule_insns = flag_schedule_insns_after_reload = 0;
/* Use the cp15 method if it is available. */
@@ -12607,22 +12628,12 @@ neon_vdup_constant (rtx vals)
{
machine_mode mode = GET_MODE (vals);
machine_mode inner_mode = GET_MODE_INNER (mode);
- int n_elts = GET_MODE_NUNITS (mode);
- bool all_same = true;
rtx x;
- int i;
if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
return NULL_RTX;
- for (i = 0; i < n_elts; ++i)
- {
- x = XVECEXP (vals, 0, i);
- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
- all_same = false;
- }
-
- if (!all_same)
+ if (!const_vec_duplicate_p (vals, &x))
/* The elements are not all the same. We could handle repeating
patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
{0, C, 0, C, 0, C, 0, C} which can be loaded using
@@ -12633,7 +12644,7 @@ neon_vdup_constant (rtx vals)
single ARM register. This will be cheaper than a vector
load. */
- x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ x = copy_to_mode_reg (inner_mode, x);
return gen_rtx_VEC_DUPLICATE (mode, x);
}
@@ -12809,10 +12820,10 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
if (lane < low || lane >= high)
{
if (exp)
- error ("%K%s %lld out of range %lld - %lld",
+ error ("%K%s %wd out of range %wd - %wd",
exp, desc, lane, low, high - 1);
else
- error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1);
+ error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
}
}
@@ -28674,6 +28685,38 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
#undef BRANCH
}
+/* Returns true if the pattern is a valid symbolic address, which is either a
+ symbol_ref or (symbol_ref + addend).
+
+ According to the ARM ELF ABI, the initial addend of REL-type relocations
+ processing MOVW and MOVT instructions is formed by interpreting the 16-bit
+ literal field of the instruction as a 16-bit signed value in the range
+ -32768 <= A < 32768. */
+
+bool
+arm_valid_symbolic_address_p (rtx addr)
+{
+ rtx xop0, xop1 = NULL_RTX;
+ rtx tmp = addr;
+
+ if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
+ return true;
+
+ /* (const (plus: symbol_ref const_int)) */
+ if (GET_CODE (addr) == CONST)
+ tmp = XEXP (addr, 0);
+
+ if (GET_CODE (tmp) == PLUS)
+ {
+ xop0 = XEXP (tmp, 0);
+ xop1 = XEXP (tmp, 1);
+
+ if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
+ return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
+ }
+
+ return false;
+}
/* Returns true if a valid comparison operation and makes
the operands in a form that is valid. */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 288bbb9f836..b6c20478f9c 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5774,7 +5774,7 @@
[(set (match_operand:SI 0 "nonimmediate_operand" "=r")
(lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0")
(match_operand:SI 2 "general_operand" "i")))]
- "arm_arch_thumb2"
+ "arm_arch_thumb2 && arm_valid_symbolic_address_p (operands[2])"
"movt%?\t%0, #:upper16:%c2"
[(set_attr "predicable" "yes")
(set_attr "predicable_short_it" "no")
@@ -8162,8 +8162,7 @@
size += GET_MODE_SIZE (GET_MODE (src));
}
- emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL,
- const0_rtx));
+ emit_call_insn (gen_call_value (par, operands[0], const0_rtx, NULL));
size = 0;
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index c923e294cda..2b30be61a46 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -41,7 +41,9 @@ typedef __simd64_int8_t int8x8_t;
typedef __simd64_int16_t int16x4_t;
typedef __simd64_int32_t int32x2_t;
typedef __builtin_neon_di int64x1_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
typedef __simd64_float16_t float16x4_t;
+#endif
typedef __simd64_float32_t float32x2_t;
typedef __simd64_poly8_t poly8x8_t;
typedef __simd64_poly16_t poly16x4_t;
@@ -6220,21 +6222,25 @@ vcvtq_u32_f32 (float32x4_t __a)
}
#if ((__ARM_FP & 0x2) != 0)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_f32 (float32x4_t __a)
{
return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
}
-
#endif
+#endif
+
#if ((__ARM_FP & 0x2) != 0)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_f32_f16 (float16x4_t __a)
{
return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
}
-
#endif
+#endif
+
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
{
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 42935a4ca6d..e24858fe45e 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -21,7 +21,7 @@
;; The following register constraints have been used:
;; - in ARM/Thumb-2 state: t, w, x, y, z
;; - in Thumb state: h, b
-;; - in both states: l, c, k, q, US
+;; - in both states: l, c, k, q, Cs, Ts, US
;; In ARM state, 'l' is an alias for 'r'
;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
@@ -67,7 +67,8 @@
(define_constraint "j"
"A constant suitable for a MOVW instruction. (ARM/Thumb-2)"
(and (match_test "TARGET_32BIT && arm_arch_thumb2")
- (ior (match_code "high")
+ (ior (and (match_code "high")
+ (match_test "arm_valid_symbolic_address_p (XEXP (op, 0))"))
(and (match_code "const_int")
(match_test "(ival & 0xffff0000) == 0")))))
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 3c477bc8eee..bec9a8bb788 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -9255,10 +9255,10 @@ avr_pgm_check_var_decl (tree node)
{
if (TYPE_P (node))
error ("%qT uses address space %qs beyond flash of %d KiB",
- node, avr_addrspace[as].name, avr_n_flash);
+ node, avr_addrspace[as].name, 64 * avr_n_flash);
else
error ("%s %q+D uses address space %qs beyond flash of %d KiB",
- reason, node, avr_addrspace[as].name, avr_n_flash);
+ reason, node, avr_addrspace[as].name, 64 * avr_n_flash);
}
else
{
@@ -9305,7 +9305,7 @@ avr_insert_attributes (tree node, tree *attributes)
if (avr_addrspace[as].segment >= avr_n_flash)
{
error ("variable %q+D located in address space %qs beyond flash "
- "of %d KiB", node, avr_addrspace[as].name, avr_n_flash);
+ "of %d KiB", node, avr_addrspace[as].name, 64 * avr_n_flash);
}
else if (!AVR_HAVE_LPM && avr_addrspace[as].pointer_size > 2)
{
diff --git a/gcc/config/cr16/cr16.c b/gcc/config/cr16/cr16.c
index 8185b59b282..7b3b6efd3db 100644
--- a/gcc/config/cr16/cr16.c
+++ b/gcc/config/cr16/cr16.c
@@ -583,7 +583,7 @@ cr16_function_arg (cumulative_args_t cum_v, machine_mode mode,
/* function_arg () is called with this type just after all the args have
had their registers assigned. The rtx that function_arg returns from
this type is supposed to pass to 'gen_call' but currently it is not
- implemented (see macro GEN_CALL). */
+ implemented. */
if (type == void_type_node)
return NULL_RTX;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 05fa5e10ebf..c69c738caa0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -436,7 +436,7 @@ struct processor_costs iamcu_cost = {
COSTS_N_INSNS (3), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
- 6, /* MOVE_RATIO */
+ 9, /* MOVE_RATIO */
6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
@@ -25531,7 +25531,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
- emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+ emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
tmp = gen_rtx_REG (CCmode, FLAGS_REG);
cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
@@ -39510,60 +39510,57 @@ rdseed_step:
return target;
case IX86_BUILTIN_SBB32:
- icode = CODE_FOR_subsi3_carry;
+ icode = CODE_FOR_subborrowsi;
mode0 = SImode;
- goto addcarryx;
+ goto handlecarry;
case IX86_BUILTIN_SBB64:
- icode = CODE_FOR_subdi3_carry;
+ icode = CODE_FOR_subborrowdi;
mode0 = DImode;
- goto addcarryx;
+ goto handlecarry;
case IX86_BUILTIN_ADDCARRYX32:
- icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
+ icode = CODE_FOR_addcarrysi;
mode0 = SImode;
- goto addcarryx;
+ goto handlecarry;
case IX86_BUILTIN_ADDCARRYX64:
- icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
+ icode = CODE_FOR_addcarrydi;
mode0 = DImode;
-addcarryx:
+ handlecarry:
arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
- op0 = gen_reg_rtx (QImode);
-
- /* Generate CF from input operand. */
op1 = expand_normal (arg0);
op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
- emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
- /* Gen ADCX instruction to compute X+Y+CF. */
op2 = expand_normal (arg1);
- op3 = expand_normal (arg2);
-
- if (!REG_P (op2))
+ if (!register_operand (op2, mode0))
op2 = copy_to_mode_reg (mode0, op2);
- if (!REG_P (op3))
- op3 = copy_to_mode_reg (mode0, op3);
-
- op0 = gen_reg_rtx (mode0);
- op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
- pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
- emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
+ op3 = expand_normal (arg2);
+ if (!register_operand (op3, mode0))
+ op3 = copy_to_mode_reg (mode0, op3);
- /* Store the result. */
op4 = expand_normal (arg3);
if (!address_operand (op4, VOIDmode))
{
op4 = convert_memory_address (Pmode, op4);
op4 = copy_addr_to_reg (op4);
}
- emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
+
+ /* Generate CF from input operand. */
+ emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
+
+ /* Generate instruction that consumes CF. */
+ op0 = gen_reg_rtx (mode0);
+
+ op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
+ pat = gen_rtx_LTU (mode0, op1, const0_rtx);
+ emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
/* Return current CF value. */
if (target == 0)
@@ -39571,6 +39568,10 @@ addcarryx:
PUT_MODE (pat, QImode);
emit_insn (gen_rtx_SET (target, pat));
+
+ /* Store the result. */
+ emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
+
return target;
case IX86_BUILTIN_READ_FLAGS:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e6c2d30e507..7017913afe2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -102,7 +102,6 @@
UNSPEC_SAHF
UNSPEC_PARITY
UNSPEC_FSTCW
- UNSPEC_ADD_CARRY
UNSPEC_FLDCW
UNSPEC_REP
UNSPEC_LD_MPIC ; load_macho_picbase
@@ -848,8 +847,6 @@
(define_code_attr plusminus_mnemonic
[(plus "add") (ss_plus "adds") (us_plus "addus")
(minus "sub") (ss_minus "subs") (us_minus "subus")])
-(define_code_attr plusminus_carry_mnemonic
- [(plus "adc") (minus "sbb")])
(define_code_attr multdiv_mnemonic
[(mult "mul") (div "div")])
@@ -5317,46 +5314,21 @@
"ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
"#"
"reload_completed"
- [(parallel [(set (reg:CC FLAGS_REG)
- (unspec:CC [(match_dup 1) (match_dup 2)]
- UNSPEC_ADD_CARRY))
+ [(parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:DWIH (match_dup 1) (match_dup 2))
+ (match_dup 1)))
(set (match_dup 0)
(plus:DWIH (match_dup 1) (match_dup 2)))])
(parallel [(set (match_dup 3)
(plus:DWIH
- (match_dup 4)
(plus:DWIH
(ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
- (match_dup 5))))
+ (match_dup 4))
+ (match_dup 5)))
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
-(define_insn "*add<mode>3_cc"
- [(set (reg:CC FLAGS_REG)
- (unspec:CC
- [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI48 2 "<general_operand>" "r<i>,rm")]
- UNSPEC_ADD_CARRY))
- (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
- (plus:SWI48 (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "add{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "addqi3_cc"
- [(set (reg:CC FLAGS_REG)
- (unspec:CC
- [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
- (match_operand:QI 2 "general_operand" "qn,qm")]
- UNSPEC_ADD_CARRY))
- (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
- (plus:QI (match_dup 1) (match_dup 2)))]
- "ix86_binary_operator_ok (PLUS, QImode, operands)"
- "add{b}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI")])
-
(define_insn "*add<mode>_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r")
(plus:SWI48
@@ -6264,10 +6236,10 @@
(minus:DWIH (match_dup 1) (match_dup 2)))])
(parallel [(set (match_dup 3)
(minus:DWIH
- (match_dup 4)
- (plus:DWIH
- (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
- (match_dup 5))))
+ (minus:DWIH
+ (match_dup 4)
+ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+ (match_dup 5)))
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
@@ -6431,29 +6403,17 @@
;; Add with carry and subtract with borrow
-(define_expand "<plusminus_insn><mode>3_carry"
- [(parallel
- [(set (match_operand:SWI 0 "nonimmediate_operand")
- (plusminus:SWI
- (match_operand:SWI 1 "nonimmediate_operand")
- (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator"
- [(match_operand 3 "flags_reg_operand")
- (const_int 0)])
- (match_operand:SWI 2 "<general_operand>"))))
- (clobber (reg:CC FLAGS_REG))])]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)")
-
-(define_insn "*<plusminus_insn><mode>3_carry"
+(define_insn "add<mode>3_carry"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
- (plusminus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+ (plus:SWI
(plus:SWI
- (match_operator 3 "ix86_carry_flag_operator"
- [(reg FLAGS_REG) (const_int 0)])
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))))
+ (match_operator:SWI 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "<plusminus_carry_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+ "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
@@ -6462,10 +6422,11 @@
(define_insn "*addsi3_carry_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
- (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
- (plus:SI (match_operator 3 "ix86_carry_flag_operator"
- [(reg FLAGS_REG) (const_int 0)])
- (match_operand:SI 2 "x86_64_general_operand" "rme")))))
+ (plus:SI
+ (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SI 1 "register_operand" "%0"))
+ (match_operand:SI 2 "x86_64_general_operand" "rme"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
"adc{l}\t{%2, %k0|%k0, %2}"
@@ -6474,45 +6435,96 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
+;; There is no point to generate ADCX instruction. ADC is shorter and faster.
+
+(define_insn "addcarry<mode>"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
+ (match_operand:SWI48 2 "nonimmediate_operand" "rm"))
+ (match_dup 1)))
+ (set (match_operand:SWI48 0 "register_operand" "=r")
+ (plus:SWI48 (plus:SWI48 (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sub<mode>3_carry"
+ [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (minus:SWI
+ (minus:SWI
+ (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+ (match_operator:SWI 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)]))
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+ "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*subsi3_carry_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
- (minus:SI (match_operand:SI 1 "register_operand" "0")
- (plus:SI (match_operator 3 "ix86_carry_flag_operator"
- [(reg FLAGS_REG) (const_int 0)])
- (match_operand:SI 2 "x86_64_general_operand" "rme")))))
+ (minus:SI
+ (minus:SI
+ (match_operand:SI 1 "register_operand" "0")
+ (match_operator:SI 3 "ix86_carry_flag_operator"
+ [(reg FLAGS_REG) (const_int 0)]))
+ (match_operand:SI 2 "x86_64_general_operand" "rme"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
"sbb{l}\t{%2, %k0|%k0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
-
-;; ADCX instruction
-(define_insn "adcx<mode>3"
+(define_insn "subborrow<mode>"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
+ (match_operand:SWI48 1 "nonimmediate_operand" "0")
(plus:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "%0")
- (plus:SWI48
- (match_operator 4 "ix86_carry_flag_operator"
- [(match_operand 3 "flags_reg_operand") (const_int 0)])
- (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
- (const_int 0)))
+ (match_operator:SWI48 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (match_operand:SWI48 2 "nonimmediate_operand" "rm"))))
(set (match_operand:SWI48 0 "register_operand" "=r")
- (plus:SWI48 (match_dup 1)
- (plus:SWI48 (match_op_dup 4
- [(match_dup 3) (const_int 0)])
- (match_dup 2))))]
- "TARGET_ADX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
- "adcx\t{%2, %0|%0, %2}"
+ (minus:SWI48 (minus:SWI48 (match_dup 1)
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 2)))]
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+ "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "use_carry" "1")
+ (set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
;; Overflow setting add instructions
+(define_expand "addqi3_cconly_overflow"
+ [(parallel
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:QI
+ (match_operand:QI 0 "nonimmediate_operand")
+ (match_operand:QI 1 "general_operand"))
+ (match_dup 0)))
+ (clobber (match_scratch:QI 2))])]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
+
(define_insn "*add<mode>3_cconly_overflow"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
@@ -8842,9 +8854,9 @@
(set (match_dup 0) (neg:DWIH (match_dup 1)))])
(parallel
[(set (match_dup 2)
- (plus:DWIH (match_dup 3)
- (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
- (const_int 0))))
+ (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 3))
+ (const_int 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 2)
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a9c8623ada2..bc76a5b7cee 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -912,25 +912,9 @@
/* Return true if operand is a vector constant that is all ones. */
(define_predicate "vector_all_ones_operand"
- (match_code "const_vector")
-{
- int nunits = GET_MODE_NUNITS (mode);
-
- if (GET_CODE (op) == CONST_VECTOR
- && CONST_VECTOR_NUNITS (op) == nunits)
- {
- int i;
- for (i = 0; i < nunits; ++i)
- {
- rtx x = CONST_VECTOR_ELT (op, i);
- if (x != constm1_rtx)
- return false;
- }
- return true;
- }
-
- return false;
-})
+ (and (match_code "const_vector")
+ (match_test "INTEGRAL_MODE_P (GET_MODE (op))")
+ (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
; Return true when OP is operand acceptable for standard SSE move.
(define_predicate "vector_move_operand"
diff --git a/gcc/config/iq2000/iq2000.md b/gcc/config/iq2000/iq2000.md
index e87cb6802e3..bba67600d96 100644
--- a/gcc/config/iq2000/iq2000.md
+++ b/gcc/config/iq2000/iq2000.md
@@ -1708,7 +1708,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
diff --git a/gcc/config/m32c/blkmov.md b/gcc/config/m32c/blkmov.md
index 88d04066d16..02ad3455bd1 100644
--- a/gcc/config/m32c/blkmov.md
+++ b/gcc/config/m32c/blkmov.md
@@ -178,10 +178,10 @@
;; 3 = alignment
(define_expand "cmpstrsi"
- [(match_operand:HI 0 "" "")
- (match_operand 1 "ap_operand" "")
- (match_operand 2 "ap_operand" "")
- (match_operand 3 "" "")
+ [(match_operand:HI 0 "register_operand" "")
+ (match_operand 1 "memory_operand" "")
+ (match_operand 2 "memory_operand" "")
+ (match_operand 3 "const_int_operand" "")
]
"TARGET_A24"
"if (m32c_expand_cmpstr(operands)) DONE; FAIL;"
diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 7f4195a9f2f..463c8277b43 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -6908,7 +6908,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c
index 40a659a8d7b..6e7745ad137 100644
--- a/gcc/config/microblaze/microblaze.c
+++ b/gcc/config/microblaze/microblaze.c
@@ -661,7 +661,7 @@ microblaze_classify_unspec (struct microblaze_address_info *info, rtx x)
else if (XINT (x, 1) == UNSPEC_TLS)
{
info->type = ADDRESS_TLS;
- info->tls_type = tls_reloc INTVAL(XVECEXP(x, 0, 1));
+ info->tls_type = tls_reloc (INTVAL (XVECEXP (x, 0, 1)));
}
else
{
diff --git a/gcc/config/mips/mips-opts.h b/gcc/config/mips/mips-opts.h
index 79882051595..3c2c6590e3d 100644
--- a/gcc/config/mips/mips-opts.h
+++ b/gcc/config/mips/mips-opts.h
@@ -47,4 +47,10 @@ enum mips_r10k_cache_barrier_setting {
#define MIPS_ARCH_OPTION_FROM_ABI -1
#define MIPS_ARCH_OPTION_NATIVE -2
+/* Enumerates the setting of the -mcompact-branches= option. */
+enum mips_cb_setting {
+ MIPS_CB_NEVER,
+ MIPS_CB_OPTIMAL,
+ MIPS_CB_ALWAYS
+};
#endif
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index d9ad9100f99..8a9ae0147ed 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -298,6 +298,9 @@ extern const char *mips_output_conditional_branch (rtx_insn *, rtx *,
const char *, const char *);
extern const char *mips_output_order_conditional_branch (rtx_insn *, rtx *,
bool);
+extern const char *mips_output_equal_conditional_branch (rtx_insn *, rtx *,
+ bool);
+extern const char *mips_output_jump (rtx *, int, int, bool);
extern const char *mips_output_sync (void);
extern const char *mips_output_sync_loop (rtx_insn *, rtx *);
extern unsigned int mips_sync_loop_insns (rtx_insn *, rtx *);
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 401d73bfeaa..0e0ecf232d9 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -176,7 +176,8 @@ along with GCC; see the file COPYING3. If not see
/* Return the opcode to jump to register DEST. When the JR opcode is not
available use JALR $0, DEST. */
#define MIPS_JR(DEST) \
- (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9))
+ (TARGET_CB_ALWAYS ? ((0x1b << 27) | ((DEST) << 16)) \
+ : (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9)))
/* Return the opcode for:
@@ -5181,7 +5182,8 @@ mips_allocate_fcc (machine_mode mode)
conditions are:
- EQ or NE between two registers.
- - any comparison between a register and zero. */
+ - any comparison between a register and zero.
+ - if compact branches are available then any condition is valid. */
static void
mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p)
@@ -5203,6 +5205,44 @@ mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p)
else
*op1 = force_reg (GET_MODE (cmp_op0), cmp_op1);
}
+ else if (!need_eq_ne_p && TARGET_CB_MAYBE)
+ {
+ bool swap = false;
+ switch (*code)
+ {
+ case LE:
+ swap = true;
+ *code = GE;
+ break;
+ case GT:
+ swap = true;
+ *code = LT;
+ break;
+ case LEU:
+ swap = true;
+ *code = GEU;
+ break;
+ case GTU:
+ swap = true;
+ *code = LTU;
+ break;
+ case GE:
+ case LT:
+ case GEU:
+ case LTU:
+ /* Do nothing. */
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1);
+ if (swap)
+ {
+ rtx tmp = *op1;
+ *op1 = *op0;
+ *op0 = tmp;
+ }
+ }
else
{
/* The comparison needs a separate scc instruction. Store the
@@ -7260,7 +7300,7 @@ mips16_build_call_stub (rtx retval, rtx *fn_ptr, rtx args_size, int fp_code)
if (fp_ret_p)
{
/* Now call the non-MIPS16 function. */
- output_asm_insn (MIPS_CALL ("jal", &fn, 0, -1), &fn);
+ output_asm_insn (mips_output_jump (&fn, 0, -1, true), &fn);
fprintf (asm_out_file, "\t.cfi_register 31,18\n");
/* Move the result from floating-point registers to
@@ -7630,12 +7670,22 @@ mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
half-word alignment, it is usually better to move in half words.
For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr
and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr.
- Otherwise move word-sized chunks. */
- if (MEM_ALIGN (src) == BITS_PER_WORD / 2
- && MEM_ALIGN (dest) == BITS_PER_WORD / 2)
- bits = BITS_PER_WORD / 2;
+ Otherwise move word-sized chunks.
+
+ For ISA_HAS_LWL_LWR we rely on the lwl/lwr & swl/swr load. Otherwise
+ picking the minimum of alignment or BITS_PER_WORD gets us the
+ desired size for bits. */
+
+ if (!ISA_HAS_LWL_LWR)
+ bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
else
- bits = BITS_PER_WORD;
+ {
+ if (MEM_ALIGN (src) == BITS_PER_WORD / 2
+ && MEM_ALIGN (dest) == BITS_PER_WORD / 2)
+ bits = BITS_PER_WORD / 2;
+ else
+ bits = BITS_PER_WORD;
+ }
mode = mode_for_size (bits, MODE_INT, 0);
delta = bits / BITS_PER_UNIT;
@@ -7754,8 +7804,9 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
bool
mips_expand_block_move (rtx dest, rtx src, rtx length)
{
- /* Disable entirely for R6 initially. */
- if (!ISA_HAS_LWL_LWR)
+ if (!ISA_HAS_LWL_LWR
+ && (MEM_ALIGN (src) < MIPS_MIN_MOVE_MEM_ALIGN
+ || MEM_ALIGN (dest) < MIPS_MIN_MOVE_MEM_ALIGN))
return false;
if (CONST_INT_P (length))
@@ -8367,7 +8418,7 @@ mips_pop_asm_switch (struct mips_asm_switch *asm_switch)
'!' Print "s" to use the short version if the delay slot contains a
16-bit instruction.
- See also mips_init_print_operand_pucnt. */
+ See also mips_init_print_operand_punct. */
static void
mips_print_operand_punctuation (FILE *file, int ch)
@@ -8451,7 +8502,8 @@ mips_print_operand_punctuation (FILE *file, int ch)
case ':':
/* When final_sequence is 0, the delay slot will be a nop. We can
- use the compact version for microMIPS. */
+ use the compact version where available. The %: formatter will
+ only be present if a compact form of the branch is available. */
if (final_sequence == 0)
putc ('c', file);
break;
@@ -8459,8 +8511,9 @@ mips_print_operand_punctuation (FILE *file, int ch)
case '!':
/* If the delay slot instruction is short, then use the
compact version. */
- if (final_sequence == 0
- || get_attr_length (final_sequence->insn (1)) == 2)
+ if (TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED && mips_isa_rev <= 5
+ && (final_sequence == 0
+ || get_attr_length (final_sequence->insn (1)) == 2))
putc ('s', file);
break;
@@ -12958,6 +13011,7 @@ mips_adjust_insn_length (rtx_insn *insn, int length)
break;
case HAZARD_DELAY:
+ case HAZARD_FORBIDDEN_SLOT:
length += NOP_INSN_LENGTH;
break;
@@ -12969,6 +13023,78 @@ mips_adjust_insn_length (rtx_insn *insn, int length)
return length;
}
+/* Return the asm template for a call. OPERANDS are the operands, TARGET_OPNO
+ is the operand number of the target. SIZE_OPNO is the operand number of
+ the argument size operand that can optionally hold the call attributes. If
+ SIZE_OPNO is not -1 and the call is indirect, use the function symbol from
+ the call attributes to attach a R_MIPS_JALR relocation to the call. LINK_P
+ indicates whether the jump is a call and needs to set the link register.
+
+ When generating GOT code without explicit relocation operators, all calls
+ should use assembly macros. Otherwise, all indirect calls should use "jr"
+ or "jalr"; we will arrange to restore $gp afterwards if necessary. Finally,
+ we can only generate direct calls for -mabicalls by temporarily switching
+ to non-PIC mode.
+
+ For microMIPS jal(r), we try to generate jal(r)s when a 16-bit
+ instruction is in the delay slot of jal(r).
+
+ Where compact branches are available, we try to use them if the delay slot
+ has a NOP (or equivalently delay slots were not enabled for the instruction
+ anyway). */
+
+const char *
+mips_output_jump (rtx *operands, int target_opno, int size_opno, bool link_p)
+{
+ static char buffer[300];
+ char *s = buffer;
+ bool reg_p = REG_P (operands[target_opno]);
+
+ const char *and_link = link_p ? "al" : "";
+ const char *reg = reg_p ? "r" : "";
+ const char *compact = "";
+ const char *nop = "%/";
+ const char *short_delay = link_p ? "%!" : "";
+ const char *insn_name = TARGET_CB_NEVER || reg_p ? "j" : "b";
+
+ /* Compact branches can only be described when the ISA has support for them
+ as both the compact formatter '%:' and the delay slot NOP formatter '%/'
+ work as a mutually exclusive pair. I.e. a NOP is never required if a
+ compact form is available. */
+ if (!final_sequence
+ && (TARGET_CB_MAYBE
+ || (ISA_HAS_JRC && !link_p && reg_p)))
+ {
+ compact = "c";
+ nop = "";
+ }
+
+ if (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS)
+ sprintf (s, "%%*%s%s\t%%%d%%/", insn_name, and_link, target_opno);
+ else
+ {
+ if (!reg_p && TARGET_ABICALLS_PIC2)
+ s += sprintf (s, ".option\tpic0\n\t");
+
+ if (reg_p && mips_get_pic_call_symbol (operands, size_opno))
+ {
+ s += sprintf (s, "%%*.reloc\t1f,R_MIPS_JALR,%%%d\n1:\t", size_opno);
+ /* Not sure why this shouldn't permit a short delay but it did not
+ allow it before so we still don't allow it. */
+ short_delay = "";
+ }
+ else
+ s += sprintf (s, "%%*");
+
+ s += sprintf (s, "%s%s%s%s%s\t%%%d%s", insn_name, and_link, reg, compact, short_delay,
+ target_opno, nop);
+
+ if (!reg_p && TARGET_ABICALLS_PIC2)
+ s += sprintf (s, "\n\t.option\tpic2");
+ }
+ return buffer;
+}
+
/* Return the assembly code for INSN, which has the operands given by
OPERANDS, and which branches to OPERANDS[0] if some condition is true.
BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
@@ -13022,12 +13148,25 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands,
}
/* Output the unconditional branch to TAKEN. */
- if (TARGET_ABSOLUTE_JUMPS)
+ if (TARGET_ABSOLUTE_JUMPS && TARGET_CB_MAYBE)
+ {
+ /* Add a hazard nop. */
+ if (!final_sequence)
+ {
+ output_asm_insn ("nop\t\t# hazard nop", 0);
+ fprintf (asm_out_file, "\n");
+ }
+ output_asm_insn (MIPS_ABSOLUTE_JUMP ("bc\t%0"), &taken);
+ }
+ else if (TARGET_ABSOLUTE_JUMPS)
output_asm_insn (MIPS_ABSOLUTE_JUMP ("j\t%0%/"), &taken);
else
{
mips_output_load_label (taken);
- output_asm_insn ("jr\t%@%]%/", 0);
+ if (TARGET_CB_MAYBE)
+ output_asm_insn ("jrc\t%@%]", 0);
+ else
+ output_asm_insn ("jr\t%@%]%/", 0);
}
/* Now deal with its delay slot; see above. */
@@ -13041,7 +13180,7 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands,
asm_out_file, optimize, 1, NULL);
final_sequence->insn (1)->set_deleted ();
}
- else
+ else if (TARGET_CB_NEVER)
output_asm_insn ("nop", 0);
fprintf (asm_out_file, "\n");
}
@@ -13053,42 +13192,155 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands,
}
/* Return the assembly code for INSN, which branches to OPERANDS[0]
+ if some equality condition is true. The condition is given by
+ OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
+ OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
+ OPERANDS[3] is the second operand and may be zero or a register. */
+
+const char *
+mips_output_equal_conditional_branch (rtx_insn* insn, rtx *operands,
+ bool inverted_p)
+{
+ const char *branch[2];
+ /* For a simple BNEZ or BEQZ microMIPSr3 branch. */
+ if (TARGET_MICROMIPS
+ && mips_isa_rev <= 5
+ && operands[3] == const0_rtx
+ && get_attr_length (insn) <= 8)
+ {
+ if (mips_cb == MIPS_CB_OPTIMAL)
+ {
+ branch[!inverted_p] = "%*b%C1z%:\t%2,%0";
+ branch[inverted_p] = "%*b%N1z%:\t%2,%0";
+ }
+ else
+ {
+ branch[!inverted_p] = "%*b%C1z\t%2,%0%/";
+ branch[inverted_p] = "%*b%N1z\t%2,%0%/";
+ }
+ }
+ else if (TARGET_CB_MAYBE)
+ {
+ if (operands[3] == const0_rtx)
+ {
+ branch[!inverted_p] = MIPS_BRANCH_C ("b%C1z", "%2,%0");
+ branch[inverted_p] = MIPS_BRANCH_C ("b%N1z", "%2,%0");
+ }
+ else if (REGNO (operands[2]) != REGNO (operands[3]))
+ {
+ branch[!inverted_p] = MIPS_BRANCH_C ("b%C1", "%2,%3,%0");
+ branch[inverted_p] = MIPS_BRANCH_C ("b%N1", "%2,%3,%0");
+ }
+ else
+ {
+ /* This case is degenerate. It should not happen, but does. */
+ if (GET_CODE (operands[1]) == NE)
+ inverted_p = !inverted_p;
+
+ branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0");
+ branch[inverted_p] = "%*\t\t# branch never";
+ }
+ }
+ else
+ {
+ branch[!inverted_p] = MIPS_BRANCH ("b%C1", "%2,%z3,%0");
+ branch[inverted_p] = MIPS_BRANCH ("b%N1", "%2,%z3,%0");
+ }
+
+ return mips_output_conditional_branch (insn, operands, branch[1], branch[0]);
+}
+
+/* Return the assembly code for INSN, which branches to OPERANDS[0]
if some ordering condition is true. The condition is given by
OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
- its second is always zero. */
+ OPERANDS[3] is the second operand and may be zero or a register. */
const char *
-mips_output_order_conditional_branch (rtx_insn *insn, rtx *operands, bool inverted_p)
+mips_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+ bool inverted_p)
{
const char *branch[2];
/* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
Make BRANCH[0] branch on the inverse condition. */
- switch (GET_CODE (operands[1]))
+ if (operands[3] != const0_rtx)
{
- /* These cases are equivalent to comparisons against zero. */
- case LEU:
- inverted_p = !inverted_p;
- /* Fall through. */
- case GTU:
- branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0");
- branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0");
- break;
+ /* Handle degenerate cases that should not, but do, occur. */
+ if (REGNO (operands[2]) == REGNO (operands[3]))
+ {
+ switch (GET_CODE (operands[1]))
+ {
+ case LT:
+ case LTU:
+ inverted_p = !inverted_p;
+ /* Fall through. */
+ case GE:
+ case GEU:
+ branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0");
+ branch[inverted_p] = "%*\t\t# branch never";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ branch[!inverted_p] = MIPS_BRANCH_C ("b%C1", "%2,%3,%0");
+ branch[inverted_p] = MIPS_BRANCH_C ("b%N1", "%2,%3,%0");
+ }
+ }
+ else
+ {
+ switch (GET_CODE (operands[1]))
+ {
+ /* These cases are equivalent to comparisons against zero. */
+ case LEU:
+ inverted_p = !inverted_p;
+ /* Fall through. */
+ case GTU:
+ if (TARGET_CB_MAYBE)
+ {
+ branch[!inverted_p] = MIPS_BRANCH_C ("bnez", "%2,%0");
+ branch[inverted_p] = MIPS_BRANCH_C ("beqz", "%2,%0");
+ }
+ else
+ {
+ branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0");
+ branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0");
+ }
+ break;
- /* These cases are always true or always false. */
- case LTU:
- inverted_p = !inverted_p;
- /* Fall through. */
- case GEU:
- branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0");
- branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0");
- break;
+ /* These cases are always true or always false. */
+ case LTU:
+ inverted_p = !inverted_p;
+ /* Fall through. */
+ case GEU:
+ if (TARGET_CB_MAYBE)
+ {
+ branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0");
+ branch[inverted_p] = "%*\t\t# branch never";
+ }
+ else
+ {
+ branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0");
+ branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0");
+ }
+ break;
- default:
- branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0");
- branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0");
- break;
+ default:
+ if (TARGET_CB_MAYBE)
+ {
+ branch[!inverted_p] = MIPS_BRANCH_C ("b%C1z", "%2,%0");
+ branch[inverted_p] = MIPS_BRANCH_C ("b%N1z", "%2,%0");
+ }
+ else
+ {
+ branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0");
+ branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0");
+ }
+ break;
+ }
}
return mips_output_conditional_branch (insn, operands, branch[1], branch[0]);
}
@@ -13291,11 +13543,18 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands)
at, oldval, inclusive_mask, NULL);
tmp1 = at;
}
- mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
+ if (TARGET_CB_NEVER)
+ mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
/* CMP = 0 [delay slot]. */
if (cmp)
mips_multi_add_insn ("li\t%0,0", cmp, NULL);
+
+ if (TARGET_CB_MAYBE && required_oldval == const0_rtx)
+ mips_multi_add_insn ("bnezc\t%0,2f", tmp1, NULL);
+ else if (TARGET_CB_MAYBE)
+ mips_multi_add_insn ("bnec\t%0,%1,2f", tmp1, required_oldval, NULL);
+
}
/* $TMP1 = OLDVAL & EXCLUSIVE_MASK. */
@@ -13358,7 +13617,10 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands)
be annulled. To ensure this behaviour unconditionally use a NOP
in the delay slot for the branch likely case. */
- mips_multi_add_insn ("beq%?\t%0,%.,1b%~", at, NULL);
+ if (TARGET_CB_MAYBE)
+ mips_multi_add_insn ("beqzc\t%0,1b", at, NULL);
+ else
+ mips_multi_add_insn ("beq%?\t%0,%.,1b%~", at, NULL);
/* if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot]. */
if (insn1 != SYNC_INSN1_MOVE && insn1 != SYNC_INSN1_LI && tmp3 != newval)
@@ -16640,7 +16902,7 @@ mips_orphaned_high_part_p (mips_offset_table *htab, rtx_insn *insn)
static void
mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
- rtx *delayed_reg, rtx lo_reg)
+ rtx *delayed_reg, rtx lo_reg, bool *fs_delay)
{
rtx pattern, set;
int nops, ninsns;
@@ -16666,6 +16928,15 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
nops = 2 - *hilo_delay;
else if (*delayed_reg != 0 && reg_referenced_p (*delayed_reg, pattern))
nops = 1;
+ /* If processing a forbidden slot hazard then a NOP is required if the
+ branch instruction was not in a sequence (as the sequence would
+ imply it is not actually a compact branch anyway) and the current
+ insn is not an inline asm, and can't go in a delay slot. */
+ else if (*fs_delay && get_attr_can_delay (insn) == CAN_DELAY_NO
+ && GET_CODE (PATTERN (after)) != SEQUENCE
+ && GET_CODE (pattern) != ASM_INPUT
+ && asm_noperands (pattern) < 0)
+ nops = 1;
else
nops = 0;
@@ -16678,12 +16949,18 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
/* Set up the state for the next instruction. */
*hilo_delay += ninsns;
*delayed_reg = 0;
+ *fs_delay = false;
if (INSN_CODE (insn) >= 0)
switch (get_attr_hazard (insn))
{
case HAZARD_NONE:
break;
+ case HAZARD_FORBIDDEN_SLOT:
+ if (TARGET_CB_MAYBE)
+ *fs_delay = true;
+ break;
+
case HAZARD_HILO:
*hilo_delay = 0;
break;
@@ -16707,6 +16984,7 @@ mips_reorg_process_insns (void)
rtx_insn *insn, *last_insn, *subinsn, *next_insn;
rtx lo_reg, delayed_reg;
int hilo_delay;
+ bool fs_delay;
/* Force all instructions to be split into their final form. */
split_all_insns_noflow ();
@@ -16775,6 +17053,7 @@ mips_reorg_process_insns (void)
hilo_delay = 2;
delayed_reg = 0;
lo_reg = gen_rtx_REG (SImode, LO_REGNUM);
+ fs_delay = false;
/* Make a second pass over the instructions. Delete orphaned
high-part relocations or turn them into NOPs. Avoid hazards
@@ -16798,7 +17077,7 @@ mips_reorg_process_insns (void)
INSN_CODE (subinsn) = CODE_FOR_nop;
}
mips_avoid_hazard (last_insn, subinsn, &hilo_delay,
- &delayed_reg, lo_reg);
+ &delayed_reg, lo_reg, &fs_delay);
}
last_insn = insn;
}
@@ -16819,7 +17098,7 @@ mips_reorg_process_insns (void)
else
{
mips_avoid_hazard (last_insn, insn, &hilo_delay,
- &delayed_reg, lo_reg);
+ &delayed_reg, lo_reg, &fs_delay);
last_insn = insn;
}
}
@@ -17684,6 +17963,27 @@ mips_option_override (void)
target_flags |= MASK_ODD_SPREG;
}
+ if (!ISA_HAS_COMPACT_BRANCHES && mips_cb == MIPS_CB_ALWAYS)
+ {
+ error ("unsupported combination: %qs%s %s",
+ mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "",
+ "-mcompact-branches=always");
+ }
+ else if (!ISA_HAS_DELAY_SLOTS && mips_cb == MIPS_CB_NEVER)
+ {
+ error ("unsupported combination: %qs%s %s",
+ mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "",
+ "-mcompact-branches=never");
+ }
+
+ /* Require explicit relocs for MIPS R6 onwards. This enables simplification
+ of the compact branch and jump support through the backend. */
+ if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6)
+ {
+ error ("unsupported combination: %qs %s",
+ mips_arch_info->name, "-mno-explicit-relocs");
+ }
+
/* The effect of -mabicalls isn't defined for the EABI. */
if (mips_abi == ABI_EABI && TARGET_ABICALLS)
{
@@ -18703,6 +19003,18 @@ mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
#undef OP
+ /* If we are using compact branches we don't have delay slots so
+ place the instruction that was in the delay slot before the JRC
+ instruction. */
+
+ if (TARGET_CB_ALWAYS)
+ {
+ rtx temp;
+ temp = trampoline[i-2];
+ trampoline[i-2] = trampoline[i-1];
+ trampoline[i-1] = temp;
+ }
+
/* Copy the trampoline code. Leave any padding uninitialized. */
for (j = 0; j < i; j++)
{
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index da1de011fc9..25a1e0622cd 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -92,6 +92,33 @@ struct mips_cpu_info {
/* True if we are generating position-independent VxWorks RTP code. */
#define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic)
+/* Compact branches must not be used if the user either selects the
+ 'never' policy or the 'optimal' policy on a core that lacks
+ compact branch instructions. */
+#define TARGET_CB_NEVER (mips_cb == MIPS_CB_NEVER \
+ || (mips_cb == MIPS_CB_OPTIMAL \
+ && !ISA_HAS_COMPACT_BRANCHES))
+
+/* Compact branches may be used if the user either selects the
+ 'always' policy or the 'optimal' policy on a core that supports
+ compact branch instructions. */
+#define TARGET_CB_MAYBE (TARGET_CB_ALWAYS \
+ || (mips_cb == MIPS_CB_OPTIMAL \
+ && ISA_HAS_COMPACT_BRANCHES))
+
+/* Compact branches must always be generated if the user selects
+ the 'always' policy or the 'optimal' policy om a core that
+ lacks delay slot branch instructions. */
+#define TARGET_CB_ALWAYS (mips_cb == MIPS_CB_ALWAYS \
+ || (mips_cb == MIPS_CB_OPTIMAL \
+ && !ISA_HAS_DELAY_SLOTS))
+
+/* Special handling for JRC that exists in microMIPSR3 as well as R6
+ ISAs with full compact branch support. */
+#define ISA_HAS_JRC ((ISA_HAS_COMPACT_BRANCHES \
+ || TARGET_MICROMIPS) \
+ && mips_cb != MIPS_CB_NEVER)
+
/* True if the output file is marked as ".abicalls; .option pic0"
(-call_nonpic). */
#define TARGET_ABICALLS_PIC0 \
@@ -872,6 +899,10 @@ struct mips_cpu_info {
#define ISA_HAS_JR (mips_isa_rev <= 5)
+#define ISA_HAS_DELAY_SLOTS 1
+
+#define ISA_HAS_COMPACT_BRANCHES (mips_isa_rev >= 6)
+
/* ISA has branch likely instructions (e.g. mips2). */
/* Disable branchlikely for tx39 until compare rewrite. They haven't
been generated up to this point. */
@@ -2645,6 +2676,9 @@ typedef struct mips_args {
#define MIPS_BRANCH(OPCODE, OPERANDS) \
"%*" OPCODE "%?\t" OPERANDS "%/"
+#define MIPS_BRANCH_C(OPCODE, OPERANDS) \
+ "%*" OPCODE "%:\t" OPERANDS
+
/* Return an asm string that forces INSN to be treated as an absolute
J or JAL instruction instead of an assembler macro. */
#define MIPS_ABSOLUTE_JUMP(INSN) \
@@ -2652,45 +2686,6 @@ typedef struct mips_args {
? ".option\tpic0\n\t" INSN "\n\t.option\tpic2" \
: INSN)
-/* Return the asm template for a call. INSN is the instruction's mnemonic
- ("j" or "jal"), OPERANDS are its operands, TARGET_OPNO is the operand
- number of the target. SIZE_OPNO is the operand number of the argument size
- operand that can optionally hold the call attributes. If SIZE_OPNO is not
- -1 and the call is indirect, use the function symbol from the call
- attributes to attach a R_MIPS_JALR relocation to the call.
-
- When generating GOT code without explicit relocation operators,
- all calls should use assembly macros. Otherwise, all indirect
- calls should use "jr" or "jalr"; we will arrange to restore $gp
- afterwards if necessary. Finally, we can only generate direct
- calls for -mabicalls by temporarily switching to non-PIC mode.
-
- For microMIPS jal(r), we try to generate jal(r)s when a 16-bit
- instruction is in the delay slot of jal(r). */
-#define MIPS_CALL(INSN, OPERANDS, TARGET_OPNO, SIZE_OPNO) \
- (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS \
- ? "%*" INSN "\t%" #TARGET_OPNO "%/" \
- : REG_P (OPERANDS[TARGET_OPNO]) \
- ? (mips_get_pic_call_symbol (OPERANDS, SIZE_OPNO) \
- ? ("%*.reloc\t1f,R_MIPS_JALR,%" #SIZE_OPNO "\n" \
- "1:\t" INSN "r\t%" #TARGET_OPNO "%/") \
- : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED \
- ? "%*" INSN "r%!\t%" #TARGET_OPNO "%/" \
- : "%*" INSN "r\t%" #TARGET_OPNO "%/") \
- : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED \
- ? MIPS_ABSOLUTE_JUMP ("%*" INSN "%!\t%" #TARGET_OPNO "%/") \
- : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #TARGET_OPNO "%/")) \
-
-/* Similar to MIPS_CALL, but this is for MICROMIPS "j" to generate
- "jrc" when nop is in the delay slot of "jr". */
-
-#define MICROMIPS_J(INSN, OPERANDS, OPNO) \
- (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS \
- ? "%*j\t%" #OPNO "%/" \
- : REG_P (OPERANDS[OPNO]) \
- ? "%*jr%:\t%" #OPNO \
- : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #OPNO "%/"))
-
/* Control the assembler format that we output. */
@@ -2981,6 +2976,9 @@ while (0)
#undef PTRDIFF_TYPE
#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
+/* The minimum alignment of any expanded block move. */
+#define MIPS_MIN_MOVE_MEM_ALIGN 16
+
/* The maximum number of bytes that can be copied by one iteration of
a movmemsi loop; see mips_block_move_loop. */
#define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index a0079d5c974..1d1c42bf5f5 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -409,6 +409,15 @@
(eq_attr "sync_mem" "!none") (const_string "syncloop")]
(const_string "unknown")))
+(define_attr "compact_form" "always,maybe,never"
+ (cond [(eq_attr "jal" "direct")
+ (const_string "always")
+ (eq_attr "jal" "indirect")
+ (const_string "maybe")
+ (eq_attr "type" "jump")
+ (const_string "maybe")]
+ (const_string "never")))
+
;; Mode for conversion types (fcvt)
;; I2S integer to float single (SI/DI to SF)
;; I2D integer to float double (SI/DI to DF)
@@ -694,7 +703,7 @@
;; DELAY means that the next instruction cannot read the result
;; of this one. HILO means that the next two instructions cannot
;; write to HI or LO.
-(define_attr "hazard" "none,delay,hilo"
+(define_attr "hazard" "none,delay,hilo,forbidden_slot"
(cond [(and (eq_attr "type" "load,fpload,fpidxload")
(match_test "ISA_HAS_LOAD_DELAY"))
(const_string "delay")
@@ -1045,21 +1054,37 @@
(nil)
(eq_attr "can_delay" "yes")])
-;; Branches that don't have likely variants do not annul on false.
+;; Branches that have delay slots and don't have likely variants do
+;; not annul on false.
(define_delay (and (eq_attr "type" "branch")
(not (match_test "TARGET_MIPS16"))
+ (ior (match_test "TARGET_CB_NEVER")
+ (and (eq_attr "compact_form" "maybe")
+ (not (match_test "TARGET_CB_ALWAYS")))
+ (eq_attr "compact_form" "never"))
(eq_attr "branch_likely" "no"))
[(eq_attr "can_delay" "yes")
(nil)
(nil)])
-(define_delay (eq_attr "type" "jump")
+(define_delay (and (eq_attr "type" "jump")
+ (ior (match_test "TARGET_CB_NEVER")
+ (and (eq_attr "compact_form" "maybe")
+ (not (match_test "TARGET_CB_ALWAYS")))
+ (eq_attr "compact_form" "never")))
[(eq_attr "can_delay" "yes")
(nil)
(nil)])
+;; Call type instructions should never have a compact form as the
+;; type is only used for MIPS16 patterns. For safety put the compact
+;; branch detection condition in anyway.
(define_delay (and (eq_attr "type" "call")
- (eq_attr "jal_macro" "no"))
+ (eq_attr "jal_macro" "no")
+ (ior (match_test "TARGET_CB_NEVER")
+ (and (eq_attr "compact_form" "maybe")
+ (not (match_test "TARGET_CB_ALWAYS")))
+ (eq_attr "compact_form" "never")))
[(eq_attr "can_delay" "yes")
(nil)
(nil)])
@@ -5813,25 +5838,29 @@
[(set (pc)
(if_then_else
(match_operator 1 "order_operator"
- [(match_operand:GPR 2 "register_operand" "d")
- (const_int 0)])
+ [(match_operand:GPR 2 "register_operand" "d,d")
+ (match_operand:GPR 3 "reg_or_0_operand" "J,d")])
(label_ref (match_operand 0 "" ""))
(pc)))]
"!TARGET_MIPS16"
{ return mips_output_order_conditional_branch (insn, operands, false); }
- [(set_attr "type" "branch")])
+ [(set_attr "type" "branch")
+ (set_attr "compact_form" "maybe,always")
+ (set_attr "hazard" "forbidden_slot")])
(define_insn "*branch_order<mode>_inverted"
[(set (pc)
(if_then_else
(match_operator 1 "order_operator"
- [(match_operand:GPR 2 "register_operand" "d")
- (const_int 0)])
+ [(match_operand:GPR 2 "register_operand" "d,d")
+ (match_operand:GPR 3 "reg_or_0_operand" "J,d")])
(pc)
(label_ref (match_operand 0 "" ""))))]
"!TARGET_MIPS16"
{ return mips_output_order_conditional_branch (insn, operands, true); }
- [(set_attr "type" "branch")])
+ [(set_attr "type" "branch")
+ (set_attr "compact_form" "maybe,always")
+ (set_attr "hazard" "forbidden_slot")])
;; Conditional branch on equality comparison.
@@ -5844,20 +5873,10 @@
(label_ref (match_operand 0 "" ""))
(pc)))]
"!TARGET_MIPS16"
-{
- /* For a simple BNEZ or BEQZ microMIPS branch. */
- if (TARGET_MICROMIPS
- && operands[3] == const0_rtx
- && get_attr_length (insn) <= 8)
- return mips_output_conditional_branch (insn, operands,
- "%*b%C1z%:\t%2,%0",
- "%*b%N1z%:\t%2,%0");
-
- return mips_output_conditional_branch (insn, operands,
- MIPS_BRANCH ("b%C1", "%2,%z3,%0"),
- MIPS_BRANCH ("b%N1", "%2,%z3,%0"));
-}
- [(set_attr "type" "branch")])
+ { return mips_output_equal_conditional_branch (insn, operands, false); }
+ [(set_attr "type" "branch")
+ (set_attr "compact_form" "maybe")
+ (set_attr "hazard" "forbidden_slot")])
(define_insn "*branch_equality<mode>_inverted"
[(set (pc)
@@ -5868,20 +5887,10 @@
(pc)
(label_ref (match_operand 0 "" ""))))]
"!TARGET_MIPS16"
-{
- /* For a simple BNEZ or BEQZ microMIPS branch. */
- if (TARGET_MICROMIPS
- && operands[3] == const0_rtx
- && get_attr_length (insn) <= 8)
- return mips_output_conditional_branch (insn, operands,
- "%*b%N0z%:\t%2,%1",
- "%*b%C0z%:\t%2,%1");
-
- return mips_output_conditional_branch (insn, operands,
- MIPS_BRANCH ("b%N1", "%2,%z3,%0"),
- MIPS_BRANCH ("b%C1", "%2,%z3,%0"));
-}
- [(set_attr "type" "branch")])
+ { return mips_output_equal_conditional_branch (insn, operands, true); }
+ [(set_attr "type" "branch")
+ (set_attr "compact_form" "maybe")
+ (set_attr "hazard" "forbidden_slot")])
;; MIPS16 branches
@@ -6176,11 +6185,22 @@
"!TARGET_MIPS16 && TARGET_ABSOLUTE_JUMPS"
{
if (get_attr_length (insn) <= 8)
- return "%*b\t%l0%/";
+ {
+ if (TARGET_CB_MAYBE)
+ return MIPS_ABSOLUTE_JUMP ("%*b%:\t%l0");
+ else
+ return MIPS_ABSOLUTE_JUMP ("%*b\t%l0%/");
+ }
else
- return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/");
+ {
+ if (TARGET_CB_MAYBE && !final_sequence)
+ return MIPS_ABSOLUTE_JUMP ("%*bc\t%l0");
+ else
+ return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/");
+ }
}
- [(set_attr "type" "branch")])
+ [(set_attr "type" "branch")
+ (set_attr "compact_form" "maybe")])
(define_insn "*jump_pic"
[(set (pc)
@@ -6188,14 +6208,23 @@
"!TARGET_MIPS16 && !TARGET_ABSOLUTE_JUMPS"
{
if (get_attr_length (insn) <= 8)
- return "%*b\t%l0%/";
+ {
+ if (TARGET_CB_MAYBE)
+ return "%*b%:\t%l0";
+ else
+ return "%*b\t%l0%/";
+ }
else
{
mips_output_load_label (operands[0]);
- return "%*jr\t%@%/%]";
+ if (TARGET_CB_MAYBE)
+ return "%*jr%:\t%@%]";
+ else
+ return "%*jr\t%@%/%]";
}
}
- [(set_attr "type" "branch")])
+ [(set_attr "type" "branch")
+ (set_attr "compact_form" "maybe")])
;; We need a different insn for the mips16, because a mips16 branch
;; does not have a delay slot.
@@ -6242,12 +6271,9 @@
(define_insn "indirect_jump_<mode>"
[(set (pc) (match_operand:P 0 "register_operand" "d"))]
""
-{
- if (TARGET_MICROMIPS)
- return "%*jr%:\t%0";
- else
- return "%*j\t%0%/";
-}
+ {
+ return mips_output_jump (operands, 0, -1, false);
+ }
[(set_attr "type" "jump")
(set_attr "mode" "none")])
@@ -6291,12 +6317,9 @@
(match_operand:P 0 "register_operand" "d"))
(use (label_ref (match_operand 1 "" "")))]
""
-{
- if (TARGET_MICROMIPS)
- return "%*jr%:\t%0";
- else
- return "%*j\t%0%/";
-}
+ {
+ return mips_output_jump (operands, 0, -1, false);
+ }
[(set_attr "type" "jump")
(set_attr "mode" "none")])
@@ -6508,10 +6531,8 @@
[(any_return)]
""
{
- if (TARGET_MICROMIPS)
- return "%*jr%:\t$31";
- else
- return "%*j\t$31%/";
+ operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ return mips_output_jump (operands, 0, -1, false);
}
[(set_attr "type" "jump")
(set_attr "mode" "none")])
@@ -6522,12 +6543,10 @@
[(any_return)
(use (match_operand 0 "pmode_register_operand" ""))]
""
-{
- if (TARGET_MICROMIPS)
- return "%*jr%:\t%0";
- else
- return "%*j\t%0%/";
-}
+ {
+ operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+ return mips_output_jump (operands, 0, -1, false);
+ }
[(set_attr "type" "jump")
(set_attr "mode" "none")])
@@ -6783,12 +6802,7 @@
[(call (mem:SI (match_operand 0 "call_insn_operand" "j,S"))
(match_operand 1 "" ""))]
"TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
-{
- if (TARGET_MICROMIPS)
- return MICROMIPS_J ("j", operands, 0);
- else
- return MIPS_CALL ("j", operands, 0, 1);
-}
+ { return mips_output_jump (operands, 0, 1, false); }
[(set_attr "jal" "indirect,direct")
(set_attr "jal_macro" "no")])
@@ -6809,12 +6823,7 @@
(call (mem:SI (match_operand 1 "call_insn_operand" "j,S"))
(match_operand 2 "" "")))]
"TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
-{
- if (TARGET_MICROMIPS)
- return MICROMIPS_J ("j", operands, 1);
- else
- return MIPS_CALL ("j", operands, 1, 2);
-}
+ { return mips_output_jump (operands, 1, 2, false); }
[(set_attr "jal" "indirect,direct")
(set_attr "jal_macro" "no")])
@@ -6826,12 +6835,7 @@
(call (mem:SI (match_dup 1))
(match_dup 2)))]
"TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
-{
- if (TARGET_MICROMIPS)
- return MICROMIPS_J ("j", operands, 1);
- else
- return MIPS_CALL ("j", operands, 1, 2);
-}
+ { return mips_output_jump (operands, 1, 2, false); }
[(set_attr "jal" "indirect,direct")
(set_attr "jal_macro" "no")])
@@ -6887,7 +6891,10 @@
(match_operand 1 "" ""))
(clobber (reg:SI RETURN_ADDR_REGNUM))]
""
- { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, 1); }
+ {
+ return (TARGET_SPLIT_CALLS ? "#"
+ : mips_output_jump (operands, 0, 1, true));
+ }
"reload_completed && TARGET_SPLIT_CALLS"
[(const_int 0)]
{
@@ -6902,7 +6909,7 @@
(clobber (reg:SI RETURN_ADDR_REGNUM))
(clobber (reg:SI 28))]
"TARGET_SPLIT_CALLS"
- { return MIPS_CALL ("jal", operands, 0, 1); }
+ { return mips_output_jump (operands, 0, 1, true); }
[(set_attr "jal" "indirect,direct")
(set_attr "jal_macro" "no")])
@@ -6916,7 +6923,10 @@
(const_int 1)
(clobber (reg:SI RETURN_ADDR_REGNUM))]
""
- { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, -1); }
+ {
+ return (TARGET_SPLIT_CALLS ? "#"
+ : mips_output_jump (operands, 0, -1, true));
+ }
"reload_completed && TARGET_SPLIT_CALLS"
[(const_int 0)]
{
@@ -6933,7 +6943,7 @@
(clobber (reg:SI RETURN_ADDR_REGNUM))
(clobber (reg:SI 28))]
"TARGET_SPLIT_CALLS"
- { return MIPS_CALL ("jal", operands, 0, -1); }
+ { return mips_output_jump (operands, 0, -1, true); }
[(set_attr "jal" "direct")
(set_attr "jal_macro" "no")])
@@ -6956,7 +6966,10 @@
(match_operand 2 "" "")))
(clobber (reg:SI RETURN_ADDR_REGNUM))]
""
- { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+ {
+ return (TARGET_SPLIT_CALLS ? "#"
+ : mips_output_jump (operands, 1, 2, true));
+ }
"reload_completed && TARGET_SPLIT_CALLS"
[(const_int 0)]
{
@@ -6974,7 +6987,7 @@
(clobber (reg:SI RETURN_ADDR_REGNUM))
(clobber (reg:SI 28))]
"TARGET_SPLIT_CALLS"
- { return MIPS_CALL ("jal", operands, 1, 2); }
+ { return mips_output_jump (operands, 1, 2, true); }
[(set_attr "jal" "indirect,direct")
(set_attr "jal_macro" "no")])
@@ -6986,7 +6999,10 @@
(const_int 1)
(clobber (reg:SI RETURN_ADDR_REGNUM))]
""
- { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, -1); }
+ {
+ return (TARGET_SPLIT_CALLS ? "#"
+ : mips_output_jump (operands, 1, -1, true));
+ }
"reload_completed && TARGET_SPLIT_CALLS"
[(const_int 0)]
{
@@ -7005,7 +7021,7 @@
(clobber (reg:SI RETURN_ADDR_REGNUM))
(clobber (reg:SI 28))]
"TARGET_SPLIT_CALLS"
- { return MIPS_CALL ("jal", operands, 1, -1); }
+ { return mips_output_jump (operands, 1, -1, true); }
[(set_attr "jal" "direct")
(set_attr "jal_macro" "no")])
@@ -7019,7 +7035,10 @@
(match_dup 2)))
(clobber (reg:SI RETURN_ADDR_REGNUM))]
""
- { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+ {
+ return (TARGET_SPLIT_CALLS ? "#"
+ : mips_output_jump (operands, 1, 2, true));
+ }
"reload_completed && TARGET_SPLIT_CALLS"
[(const_int 0)]
{
@@ -7040,7 +7059,7 @@
(clobber (reg:SI RETURN_ADDR_REGNUM))
(clobber (reg:SI 28))]
"TARGET_SPLIT_CALLS"
- { return MIPS_CALL ("jal", operands, 1, 2); }
+ { return mips_output_jump (operands, 1, 2, true); }
[(set_attr "jal" "indirect,direct")
(set_attr "jal_macro" "no")])
@@ -7055,7 +7074,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
@@ -7411,7 +7430,7 @@
(clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
(clobber (reg:P RETURN_ADDR_REGNUM))]
"HAVE_AS_TLS && TARGET_MIPS16"
- { return MIPS_CALL ("jal", operands, 0, -1); }
+ { return mips_output_jump (operands, 0, -1, true); }
[(set_attr "type" "call")
(set_attr "insn_count" "3")
(set_attr "mode" "<MODE>")])
@@ -7452,7 +7471,7 @@
(clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
(clobber (reg:P RETURN_ADDR_REGNUM))]
"TARGET_HARD_FLOAT_ABI && TARGET_MIPS16"
- { return MIPS_CALL ("jal", operands, 0, -1); }
+ { return mips_output_jump (operands, 0, -1, true); }
[(set_attr "type" "call")
(set_attr "insn_count" "3")])
@@ -7482,7 +7501,7 @@
(clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
(clobber (reg:P RETURN_ADDR_REGNUM))]
"TARGET_HARD_FLOAT_ABI && TARGET_MIPS16"
- { return MIPS_CALL ("jal", operands, 0, -1); }
+ { return mips_output_jump (operands, 0, -1, true); }
[(set_attr "type" "call")
(set_attr "insn_count" "3")])
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 348c6e03f1e..84887d11623 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -418,3 +418,20 @@ Driver
mload-store-pairs
Target Report Var(TARGET_LOAD_STORE_PAIRS) Init(1)
Enable load/store bonding.
+
+mcompact-branches=
+Target RejectNegative JoinedOrMissing Var(mips_cb) Report Enum(mips_cb_setting) Init(MIPS_CB_OPTIMAL)
+Specify the compact branch usage policy
+
+Enum
+Name(mips_cb_setting) Type(enum mips_cb_setting)
+Policies available for use with -mcompact-branches=:
+
+EnumValue
+Enum(mips_cb_setting) String(never) Value(MIPS_CB_NEVER)
+
+EnumValue
+Enum(mips_cb_setting) String(optimal) Value(MIPS_CB_OPTIMAL)
+
+EnumValue
+Enum(mips_cb_setting) String(always) Value(MIPS_CB_ALWAYS)
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
index 4929c3dc27e..3259232bb89 100644
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -475,7 +475,18 @@
(match_code "eq,ne,lt,ltu,ge,geu"))
(define_predicate "order_operator"
- (match_code "lt,ltu,le,leu,ge,geu,gt,gtu"))
+ (match_code "lt,ltu,le,leu,ge,geu,gt,gtu")
+{
+ if (XEXP (op, 1) == const0_rtx)
+ return true;
+
+ if (TARGET_CB_MAYBE
+ && (GET_CODE (op) == LT || GET_CODE (op) == LTU
+ || GET_CODE (op) == GE || GET_CODE (op) == GEU))
+ return true;
+
+ return false;
+})
;; For NE, cstore uses sltu instructions in which the first operand is $0.
;; This isn't possible in mips16 code.
diff --git a/gcc/config/msp430/msp430.opt b/gcc/config/msp430/msp430.opt
index 3fed8799822..e055f61069d 100644
--- a/gcc/config/msp430/msp430.opt
+++ b/gcc/config/msp430/msp430.opt
@@ -12,7 +12,7 @@ Specify the MCU to build for.
mcpu=
Target Report Joined RejectNegative Var(target_cpu)
-Specify the ISA to build for: msp430, mdsp430x, msp430xv2
+Specify the ISA to build for: msp430, msp430x, msp430xv2
mlarge
Target Report Mask(LARGE) RejectNegative
diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c
index 1e154c8412c..ba0454e537a 100644
--- a/gcc/config/nvptx/mkoffload.c
+++ b/gcc/config/nvptx/mkoffload.c
@@ -881,10 +881,10 @@ process (FILE *in, FILE *out)
"extern \"C\" {\n"
"#endif\n");
- fprintf (out, "extern void GOMP_offload_register"
- " (const void *, int, const void *);\n");
- fprintf (out, "extern void GOMP_offload_unregister"
- " (const void *, int, const void *);\n");
+ fprintf (out, "extern void GOMP_offload_register_ver"
+ " (unsigned, const void *, int, const void *);\n");
+ fprintf (out, "extern void GOMP_offload_unregister_ver"
+ " (unsigned, const void *, int, const void *);\n");
fprintf (out, "#ifdef __cplusplus\n"
"}\n"
@@ -894,15 +894,19 @@ process (FILE *in, FILE *out)
fprintf (out, "static __attribute__((constructor)) void init (void)\n"
"{\n"
- " GOMP_offload_register (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
- " &target_data);\n"
- "};\n", GOMP_DEVICE_NVIDIA_PTX);
+ " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__,"
+ "%d/*NVIDIA_PTX*/, &target_data);\n"
+ "};\n",
+ GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX),
+ GOMP_DEVICE_NVIDIA_PTX);
fprintf (out, "static __attribute__((destructor)) void fini (void)\n"
"{\n"
- " GOMP_offload_unregister (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
- " &target_data);\n"
- "};\n", GOMP_DEVICE_NVIDIA_PTX);
+ " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__,"
+ "%d/*NVIDIA_PTX*/, &target_data);\n"
+ "};\n",
+ GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX),
+ GOMP_DEVICE_NVIDIA_PTX);
}
static void
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index a3569670d62..e6853680078 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -1,4 +1,3 @@
-
/* Target code for NVPTX.
Copyright (C) 2014-2015 Free Software Foundation, Inc.
Contributed by Bernd Schmidt <bernds@codesourcery.com>
@@ -322,7 +321,8 @@ nvptx_write_function_decl (std::stringstream &s, const char *name, const_tree de
/* Declare argument types. */
if ((args != NULL_TREE
- && !(TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) == void_type_node))
+ && !(TREE_CODE (args) == TREE_LIST
+ && TREE_VALUE (args) == void_type_node))
|| is_main
|| return_in_mem
|| DECL_STATIC_CHAIN (decl))
@@ -406,8 +406,8 @@ walk_args_for_param (FILE *file, tree argtypes, tree args, bool write_copy,
mode = DFmode;
}
- mode = arg_promotion (mode);
}
+ mode = arg_promotion (mode);
while (count-- > 0)
{
i++;
@@ -546,7 +546,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
else if (TYPE_MODE (result_type) != VOIDmode)
{
machine_mode mode = arg_promotion (TYPE_MODE (result_type));
- fprintf (file, ".reg%s %%retval;\n",
+ fprintf (file, "\t.reg%s %%retval;\n",
nvptx_ptx_type_from_mode (mode, false));
}
@@ -598,9 +598,11 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
sz = get_frame_size ();
if (sz > 0 || cfun->machine->has_call_with_sc)
{
+ int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
+
fprintf (file, "\t.reg.u%d %%frame;\n"
- "\t.local.align 8 .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n",
- BITS_PER_WORD, sz == 0 ? 1 : sz);
+ "\t.local.align %d .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n",
+ BITS_PER_WORD, alignment, sz == 0 ? 1 : sz);
fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n",
BITS_PER_WORD);
}
@@ -616,10 +618,10 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
true, return_in_mem);
if (return_in_mem)
- fprintf (file, "ld.param.u%d %%ar1, [%%in_ar1];\n",
+ fprintf (file, "\tld.param.u%d %%ar1, [%%in_ar1];\n",
GET_MODE_BITSIZE (Pmode));
if (stdarg_p (fntype))
- fprintf (file, "ld.param.u%d %%argp, [%%in_argp];\n",
+ fprintf (file, "\tld.param.u%d %%argp, [%%in_argp];\n",
GET_MODE_BITSIZE (Pmode));
}
@@ -726,6 +728,14 @@ nvptx_function_ok_for_sibcall (tree, tree)
return false;
}
+/* Return Dynamic ReAlignment Pointer RTX. For PTX there isn't any. */
+
+static rtx
+nvptx_get_drap_rtx (void)
+{
+ return NULL_RTX;
+}
+
/* Implement the TARGET_CALL_ARGS hook. Record information about one
argument to the next call. */
@@ -1908,7 +1918,7 @@ nvptx_reorg_subreg (void)
{
next = NEXT_INSN (insn);
if (!NONDEBUG_INSN_P (insn)
- || asm_noperands (insn) >= 0
+ || asm_noperands (PATTERN (insn)) >= 0
|| GET_CODE (PATTERN (insn)) == USE
|| GET_CODE (PATTERN (insn)) == CLOBBER)
continue;
@@ -2118,6 +2128,8 @@ nvptx_file_end (void)
#define TARGET_LIBCALL_VALUE nvptx_libcall_value
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
+#undef TARGET_GET_DRAP_RTX
+#define TARGET_GET_DRAP_RTX nvptx_get_drap_rtx
#undef TARGET_SPLIT_COMPLEX_ARG
#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
#undef TARGET_RETURN_IN_MEMORY
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index afe4fcdd361..60a922af93f 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -52,6 +52,8 @@
#define BIGGEST_ALIGNMENT 64
#define STRICT_ALIGNMENT 1
+#define MAX_STACK_ALIGNMENT (1024 * 8)
+
/* Copied from elf.h and other places. We'd otherwise use
BIGGEST_ALIGNMENT and fail a number of testcases. */
#define MAX_OFILE_ALIGNMENT (32768 * 8)
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 7c2cb9c15bc..b857e53bb22 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1241,6 +1241,12 @@
(match_operand 1 "nvptx_register_operand")]
""
{
+ /* The ptx documentation specifies an alloca intrinsic (for 32 bit
+ only) but notes it is not implemented. The assembler emits a
+ confused error message. Issue a blunt one now instead. */
+ sorry ("target cannot support alloca.");
+ emit_insn (gen_nop ());
+ DONE;
if (TARGET_ABI64)
emit_insn (gen_allocate_stack_di (operands[0], operands[1]));
else
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index ad9289236ff..46fc0f5719c 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -8248,7 +8248,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 3ef6bc85ecd..1c00099c78d 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -208,6 +208,8 @@
#define vec_lvebx __builtin_vec_lvebx
#define vec_lvehx __builtin_vec_lvehx
#define vec_lvewx __builtin_vec_lvewx
+#define vec_pmsum_be __builtin_vec_vpmsum
+#define vec_shasigma_be __builtin_crypto_vshasigma
/* Cell only intrinsics. */
#ifdef __PPU__
#define vec_lvlx __builtin_vec_lvlx
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ae74796849d..3edb4774e75 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -239,6 +239,25 @@
return INT_REGNO_P (REGNO (op));
})
+;; Like int_reg_operand, but don't return true for pseudo registers
+(define_predicate "int_reg_operand_not_pseudo"
+ (match_operand 0 "register_operand")
+{
+ if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+ return 0;
+
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+ return 0;
+
+ return INT_REGNO_P (REGNO (op));
+})
+
;; Like int_reg_operand, but only return true for base registers
(define_predicate "base_reg_operand"
(match_operand 0 "int_reg_operand")
@@ -883,12 +902,12 @@
(define_predicate "current_file_function_operand"
(and (match_code "symbol_ref")
(match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
- && ((SYMBOL_REF_LOCAL_P (op)
- && ((DEFAULT_ABI != ABI_AIX
- && DEFAULT_ABI != ABI_ELFv2)
- || !SYMBOL_REF_EXTERNAL_P (op)))
- || (op == XEXP (DECL_RTL (current_function_decl),
- 0)))")))
+ && (SYMBOL_REF_LOCAL_P (op)
+ || op == XEXP (DECL_RTL (current_function_decl), 0))
+ && !((DEFAULT_ABI == ABI_AIX
+ || DEFAULT_ABI == ABI_ELFv2)
+ && (SYMBOL_REF_EXTERNAL_P (op)
+ || SYMBOL_REF_WEAK (op)))")))
;; Return 1 if this operand is a valid input for a move insn.
(define_predicate "input_operand"
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 7beddf64d1b..85082ec0ee2 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1489,6 +1489,10 @@ BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpksdus)
+BU_P8V_AV_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb)
+BU_P8V_AV_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh)
+BU_P8V_AV_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw)
+BU_P8V_AV_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd)
BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3)
BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3)
BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
@@ -1570,6 +1574,7 @@ BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus")
+BU_P8V_OVERLOAD_2 (VPMSUM, "vpmsum")
BU_P8V_OVERLOAD_2 (VRLD, "vrld")
BU_P8V_OVERLOAD_2 (VSLD, "vsld")
BU_P8V_OVERLOAD_2 (VSRAD, "vsrad")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index d45bc93b10a..5fc2b53adfe 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2937,6 +2937,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
{ ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
{ ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS,
RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
{ ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
@@ -4171,6 +4179,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMH,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMW,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMD,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
{ P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
{ P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 9fd565286f2..03764aef740 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -53,6 +53,7 @@
| OPTION_MASK_P8_VECTOR \
| OPTION_MASK_CRYPTO \
| OPTION_MASK_DIRECT_MOVE \
+ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \
| OPTION_MASK_HTM \
| OPTION_MASK_QUAD_MEMORY \
| OPTION_MASK_QUAD_MEMORY_ATOMIC \
@@ -78,6 +79,7 @@
| OPTION_MASK_DFP \
| OPTION_MASK_DIRECT_MOVE \
| OPTION_MASK_DLMZB \
+ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \
| OPTION_MASK_FPRND \
| OPTION_MASK_HTM \
| OPTION_MASK_ISEL \
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 7262a151438..7be529fab49 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -54,6 +54,7 @@ extern const char *output_vec_const_move (rtx *);
extern const char *rs6000_output_move_128bit (rtx *);
extern bool rs6000_move_128bit_ok_p (rtx []);
extern bool rs6000_split_128bit_ok_p (rtx []);
+extern void rs6000_expand_float128_convert (rtx, rtx, bool);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2a969782f26..8107bec8e6e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3716,6 +3716,45 @@ rs6000_option_override_internal (bool global_init_p)
else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX)
error ("-mfloat128-software requires VSX support");
+ /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
+ support. If we only have ISA 2.06 support, and the user did not specify
+ the switch, leave it set to -1 so the movmisalign patterns are enabled,
+ but we don't enable the full vectorization support */
+ if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
+ TARGET_ALLOW_MOVMISALIGN = 1;
+
+ else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
+ {
+ if (TARGET_ALLOW_MOVMISALIGN > 0)
+ error ("-mallow-movmisalign requires -mvsx");
+
+ TARGET_ALLOW_MOVMISALIGN = 0;
+ }
+
+ /* Determine when unaligned vector accesses are permitted, and when
+ they are preferred over masked Altivec loads. Note that if
+ TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
+ TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
+ not true. */
+ if (TARGET_EFFICIENT_UNALIGNED_VSX)
+ {
+ if (!TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+ error ("-mefficient-unaligned-vsx requires -mvsx");
+
+ rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+ }
+
+ else if (!TARGET_ALLOW_MOVMISALIGN)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+ error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
+
+ rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+ }
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
@@ -4275,22 +4314,6 @@ rs6000_option_override_internal (bool global_init_p)
}
}
- /* Determine when unaligned vector accesses are permitted, and when
- they are preferred over masked Altivec loads. Note that if
- TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
- TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
- not true. */
- if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) {
- if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8
- && TARGET_ALLOW_MOVMISALIGN != 0)
- TARGET_EFFICIENT_UNALIGNED_VSX = 1;
- else
- TARGET_EFFICIENT_UNALIGNED_VSX = 0;
- }
-
- if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8)
- TARGET_ALLOW_MOVMISALIGN = 1;
-
/* Set the builtin mask of the various options used that could affect which
builtins were used. In the past we used target_flags, but we've run out
of bits, and some options like SPE and PAIRED are no longer in
@@ -8462,7 +8485,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
during expand. */
gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
- /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+ /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
V1TImode). */
if (mode == TImode || mode == V1TImode)
{
@@ -18519,6 +18542,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
{
unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+ bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
+ bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
/* Don't allow 64-bit types to overlap with 128-bit types that take a
single register under VSX because the scalar part of the register
@@ -18527,7 +18552,10 @@ rs6000_cannot_change_mode_class (machine_mode from,
IEEE floating point can't overlap, and neither can small
values. */
- if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+ if (to_float128_vector_p && from_float128_vector_p)
+ return false;
+
+ else if (to_float128_vector_p || from_float128_vector_p)
return true;
/* TDmode in floating-mode registers must always go into a register
@@ -18555,6 +18583,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
if (TARGET_E500_DOUBLE
&& ((((to) == DFmode) + ((from) == DFmode)) == 1
|| (((to) == TFmode) + ((from) == TFmode)) == 1
+ || (((to) == IFmode) + ((from) == IFmode)) == 1
+ || (((to) == KFmode) + ((from) == KFmode)) == 1
|| (((to) == DDmode) + ((from) == DDmode)) == 1
|| (((to) == TDmode) + ((from) == TDmode)) == 1
|| (((to) == DImode) + ((from) == DImode)) == 1))
@@ -18751,13 +18781,7 @@ rs6000_output_move_128bit (rtx operands[])
return output_vec_const_move (operands);
}
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr, "\n===== Bad 128 bit move:\n");
- debug_rtx (gen_rtx_SET (dest, src));
- }
-
- gcc_unreachable ();
+ fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
}
/* Validate a 128-bit move. */
@@ -19801,6 +19825,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfeq_gpr (compare_result, op0, op1)
: gen_cmptfeq_gpr (compare_result, op0, op1);
@@ -19828,6 +19854,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfgt_gpr (compare_result, op0, op1)
: gen_cmptfgt_gpr (compare_result, op0, op1);
@@ -19855,6 +19883,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttflt_gpr (compare_result, op0, op1)
: gen_cmptflt_gpr (compare_result, op0, op1);
@@ -19892,6 +19922,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfeq_gpr (compare_result2, op0, op1)
: gen_cmptfeq_gpr (compare_result2, op0, op1);
@@ -19914,14 +19946,117 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
emit_insn (cmp);
}
+
+ /* IEEE 128-bit support in VSX registers. The comparison function (__cmpkf2)
+ returns 0..15 that is laid out the same way as the PowerPC CR register
+ would for a normal floating point comparison. */
+ else if (FLOAT128_IEEE_P (mode))
+ {
+ rtx and_reg = gen_reg_rtx (SImode);
+ rtx dest = gen_reg_rtx (SImode);
+ rtx libfunc = optab_libfunc (cmp_optab, mode);
+ HOST_WIDE_INT mask_value = 0;
+
+ /* Values that __cmpkf2 returns. */
+#define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */
+#define PPC_CMP_EQUAL 0x2 /* a == b. */
+#define PPC_CMP_GREATER_THEN 0x4 /* a > b. */
+#define PPC_CMP_LESS_THEN 0x8 /* a < b. */
+
+ switch (code)
+ {
+ case EQ:
+ mask_value = PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case NE:
+ mask_value = PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case GT:
+ mask_value = PPC_CMP_GREATER_THEN;
+ code = NE;
+ break;
+
+ case GE:
+ mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case LT:
+ mask_value = PPC_CMP_LESS_THEN;
+ code = NE;
+ break;
+
+ case LE:
+ mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case UNLE:
+ mask_value = PPC_CMP_GREATER_THEN;
+ code = EQ;
+ break;
+
+ case UNLT:
+ mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case UNGE:
+ mask_value = PPC_CMP_LESS_THEN;
+ code = EQ;
+ break;
+
+ case UNGT:
+ mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case UNEQ:
+ mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+ code = NE;
+
+ case LTGT:
+ mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+ code = EQ;
+ break;
+
+ case UNORDERED:
+ mask_value = PPC_CMP_UNORDERED;
+ code = NE;
+ break;
+
+ case ORDERED:
+ mask_value = PPC_CMP_UNORDERED;
+ code = EQ;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (mask_value != 0);
+ and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2,
+ op0, mode, op1, mode);
+
+ emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value)));
+ compare_result = gen_reg_rtx (CCmode);
+ comp_mode = CCmode;
+
+ emit_insn (gen_rtx_SET (compare_result,
+ gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
+ }
+
else
{
/* Generate XLC-compatible TFmode compare as PARALLEL with extra
CLOBBERs to match cmptf_internal2 pattern. */
if (comp_mode == CCFPmode && TARGET_XL_COMPAT
- && GET_MODE (op0) == TFmode
- && !TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
+ && FLOAT128_IBM_P (GET_MODE (op0))
+ && TARGET_HARD_FLOAT && TARGET_FPRS)
emit_insn (gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (10,
gen_rtx_SET (compare_result,
@@ -19954,6 +20089,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
/* Some kinds of FP comparisons need an OR operation;
under flag_finite_math_only we don't bother. */
if (FLOAT_MODE_P (mode)
+ && !FLOAT128_IEEE_P (mode)
&& !flag_finite_math_only
&& !(TARGET_HARD_FLOAT && !TARGET_FPRS)
&& (code == LE || code == GE
@@ -19993,6 +20129,68 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
}
+/* Expand floating point conversion to/from __float128 and __ibm128. */
+
+void
+rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
+{
+ machine_mode dest_mode = GET_MODE (dest);
+ machine_mode src_mode = GET_MODE (src);
+ convert_optab cvt = unknown_optab;
+ rtx libfunc = NULL_RTX;
+ rtx dest2;
+
+ if (dest_mode == src_mode)
+ gcc_unreachable ();
+
+ if (FLOAT128_IEEE_P (dest_mode))
+ {
+ if (src_mode == SFmode
+ || src_mode == DFmode
+ || FLOAT128_IBM_P (src_mode))
+ cvt = sext_optab;
+
+ else if (GET_MODE_CLASS (src_mode) == MODE_INT)
+ cvt = (unsigned_p) ? ufloat_optab : sfloat_optab;
+
+ else if (FLOAT128_IEEE_P (src_mode))
+ emit_move_insn (dest, gen_lowpart (dest_mode, src));
+
+ else
+ gcc_unreachable ();
+ }
+
+ else if (FLOAT128_IEEE_P (src_mode))
+ {
+ if (dest_mode == SFmode
+ || dest_mode == DFmode
+ || FLOAT128_IBM_P (dest_mode))
+ cvt = trunc_optab;
+
+ else if (GET_MODE_CLASS (dest_mode) == MODE_INT)
+ cvt = (unsigned_p) ? ufix_optab : sfix_optab;
+
+ else
+ gcc_unreachable ();
+ }
+
+ else
+ gcc_unreachable ();
+
+ gcc_assert (cvt != unknown_optab);
+ libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
+ gcc_assert (libfunc != NULL_RTX);
+
+ dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
+ src_mode);
+
+ gcc_assert (dest != NULL_RTX);
+ if (!rtx_equal_p (dest, dest2))
+ emit_move_insn (dest, dest2);
+
+ return;
+}
+
/* Emit the RTL for an sISEL pattern. */
void
@@ -22635,6 +22833,7 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
|| ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& decl
&& !DECL_EXTERNAL (decl)
+ && !DECL_WEAK (decl)
&& (*targetm.binds_local_p) (decl))
|| (DEFAULT_ABI == ABI_V4
&& (!TARGET_SECURE_PLT
@@ -32921,6 +33120,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "crypto", OPTION_MASK_CRYPTO, false, true },
{ "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
+ { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
+ false, true },
{ "fprnd", OPTION_MASK_FPRND, false, true },
{ "hard-dfp", OPTION_MASK_DFP, false, true },
{ "htm", OPTION_MASK_HTM, false, true },
@@ -34786,7 +34987,7 @@ class swap_web_entry : public web_entry_base
/* A nonzero value indicates what kind of special handling for this
insn is required if doublewords are swapped. Undefined if
is_swappable is not set. */
- unsigned int special_handling : 3;
+ unsigned int special_handling : 4;
/* Set if the web represented by this entry cannot be optimized. */
unsigned int web_not_optimizable : 1;
/* Set if this insn should be deleted. */
@@ -34800,7 +35001,9 @@ enum special_handling_values {
SH_NOSWAP_LD,
SH_NOSWAP_ST,
SH_EXTRACT,
- SH_SPLAT
+ SH_SPLAT,
+ SH_XXPERMDI,
+ SH_CONCAT
};
/* Union INSN with all insns containing definitions that reach USE.
@@ -34992,6 +35195,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
*special = SH_EXTRACT;
return 1;
}
+ /* An XXPERMDI is ok if we adjust the lanes. Note that if the
+ XXPERMDI is a swap operation, it will be identified by
+ insn_is_swap_p and therefore we won't get here. */
+ else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
+ && (GET_MODE (XEXP (op, 0)) == V4DFmode
+ || GET_MODE (XEXP (op, 0)) == V4DImode)
+ && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+ && XVECLEN (parallel, 0) == 2
+ && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
+ && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
+ {
+ *special = SH_XXPERMDI;
+ return 1;
+ }
else
return 0;
@@ -35169,6 +35386,17 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
return 1;
}
+ /* A concatenation of two doublewords is ok if we reverse the
+ order of the inputs. */
+ if (GET_CODE (body) == SET
+ && GET_CODE (SET_SRC (body)) == VEC_CONCAT
+ && (GET_MODE (SET_SRC (body)) == V2DFmode
+ || GET_MODE (SET_SRC (body)) == V2DImode))
+ {
+ *special = SH_CONCAT;
+ return 1;
+ }
+
/* Otherwise check the operands for vector lane violations. */
return rtx_is_swappable_p (body, special);
}
@@ -35458,6 +35686,49 @@ adjust_splat (rtx_insn *insn)
fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
}
+/* Given OP that contains an XXPERMDI operation (that is not a doubleword
+ swap), reverse the order of the source operands and adjust the indices
+ of the source lanes to account for doubleword reversal. */
+static void
+adjust_xxpermdi (rtx_insn *insn)
+{
+ rtx set = PATTERN (insn);
+ rtx select = XEXP (set, 1);
+ rtx concat = XEXP (select, 0);
+ rtx src0 = XEXP (concat, 0);
+ XEXP (concat, 0) = XEXP (concat, 1);
+ XEXP (concat, 1) = src0;
+ rtx parallel = XEXP (select, 1);
+ int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
+ int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
+ int new_lane0 = 3 - lane1;
+ int new_lane1 = 3 - lane0;
+ XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
+ XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a VEC_CONCAT operation of two doublewords,
+ reverse the order of those inputs. */
+static void
+adjust_concat (rtx_insn *insn)
+{
+ rtx set = PATTERN (insn);
+ rtx concat = XEXP (set, 1);
+ rtx src0 = XEXP (concat, 0);
+ XEXP (concat, 0) = XEXP (concat, 1);
+ XEXP (concat, 1) = src0;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
+}
+
/* The insn described by INSN_ENTRY[I] can be swapped, but only
with special handling. Take care of that here. */
static void
@@ -35504,6 +35775,14 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
/* Change the lane on a direct-splat operation. */
adjust_splat (insn);
break;
+ case SH_XXPERMDI:
+ /* Change the lanes on an XXPERMDI operation. */
+ adjust_xxpermdi (insn);
+ break;
+ case SH_CONCAT:
+ /* Reverse the order of a concatenation operation. */
+ adjust_concat (insn);
+ break;
}
}
@@ -35576,6 +35855,10 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
fputs ("special:extract ", dump_file);
else if (insn_entry[i].special_handling == SH_SPLAT)
fputs ("special:splat ", dump_file);
+ else if (insn_entry[i].special_handling == SH_XXPERMDI)
+ fputs ("special:xxpermdi ", dump_file);
+ else if (insn_entry[i].special_handling == SH_CONCAT)
+ fputs ("special:concat ", dump_file);
}
if (insn_entry[i].web_not_optimizable)
fputs ("unoptimizable ", dump_file);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 527ad985423..cfdb286a2cb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -348,6 +348,8 @@
&& TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128")
+ (IF "TARGET_FLOAT128")
+ (KF "TARGET_FLOAT128")
(DD "TARGET_DFP")
(TD "TARGET_DFP")])
@@ -365,9 +367,14 @@
(define_mode_iterator FMOVE32 [SF SD])
(define_mode_iterator FMOVE64 [DF DD])
(define_mode_iterator FMOVE64X [DI DF DD])
-(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
+(define_mode_iterator FMOVE128 [(TF "TARGET_LONG_DOUBLE_128")
+ (IF "TARGET_LONG_DOUBLE_128")
(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+(define_mode_iterator FMOVE128_FPR [(TF "FLOAT128_2REG_P (TFmode)")
+ (IF "FLOAT128_2REG_P (IFmode)")
+ (TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+
; Iterators for 128 bit types for direct move
(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE")
(V16QI "")
@@ -376,7 +383,13 @@
(V4SF "")
(V2DI "")
(V2DF "")
- (V1TI "")])
+ (V1TI "")
+ (KF "")
+ (TF "")
+ (IF "")])
+
+; Iterator for 128-bit VSX types for pack/unpack
+(define_mode_iterator FMOVE128_VSX [V1TI KF])
; Whether a floating point move is ok, don't allow SD without hardware FP
(define_mode_attr fmove_ok [(SF "")
@@ -432,6 +445,25 @@
; Iterator for just SF/DF
(define_mode_iterator SFDF [SF DF])
+; Iterator for float128 floating conversions
+(define_mode_iterator FLOAT128_SFDFTF [
+ (SF "TARGET_FLOAT128")
+ (DF "TARGET_FLOAT128")
+ (TF "FLOAT128_IBM_P (TFmode)")
+ (IF "TARGET_FLOAT128")])
+
+; Iterator for special 128-bit floating point. This is for non-default
+; conversions, so TFmode is not used here.
+(define_mode_iterator IFKF [IF KF])
+
+; Iterator for 128-bit floating point that uses the IBM double-double format
+(define_mode_iterator IBM128 [IF TF])
+
+; Iterator for 128-bit floating point
+(define_mode_iterator TFIFKF [(KF "TARGET_FLOAT128")
+ (IF "TARGET_FLOAT128")
+ (TF "TARGET_LONG_DOUBLE_128")])
+
; SF/DF suffix for traditional floating instructions
(define_mode_attr Ftrad [(SF "s") (DF "")])
@@ -596,7 +628,7 @@
;; Reload iterator for creating the function to allocate a base register to
;; supplement addressing modes.
(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
- SF SD SI DF DD DI TI PTI])
+ SF SD SI DF DD DI TI PTI KF IF TF])
;; Start with fixed-point load and store insns. Here we put only the more
@@ -3037,15 +3069,15 @@
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
-(define_insn_and_split "*and<mode>3_imm_dot_shifted"
- [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+(define_insn "*and<mode>3_imm_dot_shifted"
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
(compare:CC
(and:GPR
- (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
- (match_operand:SI 4 "const_int_operand" "n,n"))
- (match_operand:GPR 2 "const_int_operand" "n,n"))
+ (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r")
+ (match_operand:SI 4 "const_int_operand" "n"))
+ (match_operand:GPR 2 "const_int_operand" "n"))
(const_int 0)))
- (clobber (match_scratch:GPR 0 "=r,r"))]
+ (clobber (match_scratch:GPR 0 "=r"))]
"logical_const_operand (GEN_INT (UINTVAL (operands[2])
<< INTVAL (operands[4])),
DImode)
@@ -3054,23 +3086,10 @@
&& rs6000_gen_cell_microcode"
{
operands[2] = GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4]));
- if (which_alternative == 0)
- return "andi%e2. %0,%1,%u2";
- else
- return "#";
+ return "andi%e2. %0,%1,%u2";
}
- "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
- [(set (match_dup 0)
- (and:GPR (lshiftrt:GPR (match_dup 1)
- (match_dup 4))
- (match_dup 2)))
- (set (match_dup 3)
- (compare:CC (match_dup 0)
- (const_int 0)))]
- ""
[(set_attr "type" "logical")
- (set_attr "dot" "yes")
- (set_attr "length" "4,8")])
+ (set_attr "dot" "yes")])
(define_insn "and<mode>3_mask"
@@ -3664,10 +3683,10 @@
; an insert instruction, in many cases.
(define_insn_and_split "*ior<mode>_mask"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
- (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
- (match_operand:GPR 2 "const_int_operand" "n")))]
- "can_create_pseudo_p ()
- && !logical_const_operand (operands[2], <MODE>mode)
+ (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "0")
+ (match_operand:GPR 2 "const_int_operand" "n")))
+ (clobber (match_scratch:GPR 3 "=r"))]
+ "!logical_const_operand (operands[2], <MODE>mode)
&& rs6000_is_valid_mask (operands[2], NULL, NULL, <MODE>mode)"
"#"
"&& 1"
@@ -3682,7 +3701,8 @@
{
int nb, ne;
rs6000_is_valid_mask (operands[2], &nb, &ne, <MODE>mode);
- operands[3] = gen_reg_rtx (<MODE>mode);
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (<MODE>mode);
operands[4] = GEN_INT (ne);
operands[5] = GEN_INT (~UINTVAL (operands[2]));
}
@@ -4216,19 +4236,18 @@
;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
;; builtins.c and optabs.c that are not correct for IBM long double
;; when little-endian.
-(define_expand "signbittf2"
+(define_expand "signbit<mode>2"
[(set (match_dup 2)
- (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+ (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "")))
(set (match_dup 3)
(subreg:DI (match_dup 2) 0))
(set (match_dup 4)
(match_dup 5))
(set (match_operand:SI 0 "gpc_reg_operand" "")
(match_dup 6))]
- "!TARGET_IEEEQUAD
+ "FLOAT128_IBM_P (<MODE>mode)
&& TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
- && TARGET_LONG_DOUBLE_128"
+ && (TARGET_FPRS || TARGET_E500_DOUBLE)"
{
operands[2] = gen_reg_rtx (DFmode);
operands[3] = gen_reg_rtx (DImode);
@@ -6402,9 +6421,10 @@
;; problematical. Don't allow direct move for this case.
(define_insn_and_split "*mov<mode>_64bit_dm"
- [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm")
- (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))]
+ [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm")
+ (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
+ && FLOAT128_2REG_P (<MODE>mode)
&& (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -6427,9 +6447,12 @@
[(set_attr "length" "8,8,8,8,12,12,8")])
(define_insn_and_split "*mov<mode>_32bit"
- [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
- (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r"))]
+ [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
+ (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r"))]
"TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
+ && (FLOAT128_2REG_P (<MODE>mode)
+ || int_reg_operand_not_pseudo (operands[0], <MODE>mode)
+ || int_reg_operand_not_pseudo (operands[1], <MODE>mode))
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
"#"
@@ -6453,12 +6476,12 @@
(define_expand "extenddftf2"
[(set (match_operand:TF 0 "nonimmediate_operand" "")
(float_extend:TF (match_operand:DF 1 "input_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
+ "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
{
- if (TARGET_E500_DOUBLE)
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else if (TARGET_E500_DOUBLE)
emit_insn (gen_spe_extenddftf2 (operands[0], operands[1]));
else
emit_insn (gen_extenddftf2_fprs (operands[0], operands[1]));
@@ -6507,25 +6530,34 @@
(define_expand "extendsftf2"
[(set (match_operand:TF 0 "nonimmediate_operand" "")
(float_extend:TF (match_operand:SF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
{
- rtx tmp = gen_reg_rtx (DFmode);
- emit_insn (gen_extendsfdf2 (tmp, operands[1]));
- emit_insn (gen_extenddftf2 (operands[0], tmp));
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else
+ {
+ rtx tmp = gen_reg_rtx (DFmode);
+ emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+ emit_insn (gen_extenddftf2 (operands[0], tmp));
+ }
DONE;
})
(define_expand "trunctfdf2"
[(set (match_operand:DF 0 "gpc_reg_operand" "")
(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
- "")
+{
+ if (TARGET_IEEEQUAD)
+ {
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+ }
+})
(define_insn_and_split "trunctfdf2_internal1"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d")
@@ -6556,12 +6588,13 @@
(define_expand "trunctfsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
{
- if (TARGET_E500_DOUBLE)
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else if (TARGET_E500_DOUBLE)
emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1]));
else
emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1]));
@@ -6612,10 +6645,12 @@
(define_expand "fix_trunctfsi2"
[(set (match_operand:SI 0 "gpc_reg_operand" "")
(fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128"
{
- if (TARGET_E500_DOUBLE)
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else if (TARGET_E500_DOUBLE)
emit_insn (gen_spe_fix_trunctfsi2 (operands[0], operands[1]));
else
emit_insn (gen_fix_trunctfsi2_fprs (operands[0], operands[1]));
@@ -6663,20 +6698,73 @@
DONE;
})
-(define_expand "negtf2"
- [(set (match_operand:TF 0 "gpc_reg_operand" "")
- (neg:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
- && TARGET_LONG_DOUBLE_128"
- "")
+(define_expand "fix_trunctfdi2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (fix:DI (match_operand:TF 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "fixuns_trunctf<mode>2"
+ [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+ (unsigned_fix:SDI (match_operand:TF 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
+(define_expand "floatditf2"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (float:TF (match_operand:DI 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "floatuns<mode>tf2"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (unsigned_float:TF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
+(define_expand "neg<mode>2"
+ [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "")
+ (neg:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))]
+ "FLOAT128_IEEE_P (<MODE>mode)
+ || (FLOAT128_IBM_P (<MODE>mode)
+ && TARGET_HARD_FLOAT
+ && (TARGET_FPRS || TARGET_E500_DOUBLE))"
+ "
+{
+ if (FLOAT128_IEEE_P (<MODE>mode))
+ {
+ if (TARGET_FLOAT128)
+ emit_insn (gen_ieee_128bit_vsx_neg<mode>2 (operands[0], operands[1]));
+ else
+ {
+ rtx libfunc = optab_libfunc (neg_optab, <MODE>mode);
+ rtx target = emit_library_call_value (libfunc, operands[0], LCT_CONST,
+ <MODE>mode, 1,
+ operands[1], <MODE>mode);
+
+ if (target && !rtx_equal_p (target, operands[0]))
+ emit_move_insn (operands[0], target);
+ }
+ DONE;
+ }
+}")
(define_insn "negtf2_internal"
[(set (match_operand:TF 0 "gpc_reg_operand" "=d")
(neg:TF (match_operand:TF 1 "gpc_reg_operand" "d")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && FLOAT128_IBM_P (TFmode)"
"*
{
if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
@@ -6687,16 +6775,29 @@
[(set_attr "type" "fp")
(set_attr "length" "8")])
-(define_expand "abstf2"
- [(set (match_operand:TF 0 "gpc_reg_operand" "")
- (abs:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
- && TARGET_LONG_DOUBLE_128"
+(define_expand "abs<mode>2"
+ [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "")
+ (abs:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))]
+ "FLOAT128_IEEE_P (<MODE>mode)
+ || (FLOAT128_IBM_P (<MODE>mode)
+ && TARGET_HARD_FLOAT
+ && (TARGET_FPRS || TARGET_E500_DOUBLE))"
"
{
- rtx label = gen_label_rtx ();
+ rtx label;
+
+ if (FLOAT128_IEEE_P (<MODE>mode))
+ {
+ if (TARGET_FLOAT128)
+ {
+ emit_insn (gen_ieee_128bit_vsx_abs<mode>2 (operands[0], operands[1]));
+ DONE;
+ }
+ else
+ FAIL;
+ }
+
+ label = gen_label_rtx ();
if (TARGET_E500_DOUBLE)
{
if (flag_finite_math_only && !flag_trapping_math)
@@ -6732,6 +6833,184 @@
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
}")
+
+
+;; Generate IEEE 128-bit -0.0 (0x80000000000000000000000000000000) in a vector
+;; register
+
+(define_expand "ieee_128bit_negative_zero"
+ [(set (match_operand:V16QI 0 "register_operand" "") (match_dup 1))]
+ "TARGET_FLOAT128"
+{
+ rtvec v = rtvec_alloc (16);
+ int i, high;
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = const0_rtx;
+
+ high = (BYTES_BIG_ENDIAN) ? 0 : 15;
+ RTVEC_ELT (v, high) = GEN_INT (0x80);
+
+ rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v));
+ DONE;
+})
+
+;; IEEE 128-bit negate
+
+;; We have 2 insns here for negate and absolute value. The first uses
+;; match_scratch so that phases like combine can recognize neg/abs as generic
+;; insns, and second insn after the first split pass loads up the bit to
+;; twiddle the sign bit. Later GCSE passes can then combine multiple uses of
+;; neg/abs to create the constant just once.
+
+(define_insn_and_split "ieee_128bit_vsx_neg<mode>2"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (clobber (match_scratch:V16QI 2 "=v"))]
+ "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0)
+ (neg:TFIFKF (match_dup 1)))
+ (use (match_dup 2))])]
+{
+ if (GET_CODE (operands[2]) == SCRATCH)
+ operands[2] = gen_reg_rtx (V16QImode);
+
+ operands[3] = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_neg<mode>2_internal"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (use (match_operand:V16QI 2 "register_operand" "=v"))]
+ "TARGET_FLOAT128"
+ "xxlxor %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+;; IEEE 128-bit absolute value
+(define_insn_and_split "ieee_128bit_vsx_abs<mode>2"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (clobber (match_scratch:V16QI 2 "=v"))]
+ "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0)
+ (abs:TFIFKF (match_dup 1)))
+ (use (match_dup 2))])]
+{
+ if (GET_CODE (operands[2]) == SCRATCH)
+ operands[2] = gen_reg_rtx (V16QImode);
+
+ operands[3] = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_abs<mode>2_internal"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (use (match_operand:V16QI 2 "register_operand" "=v"))]
+ "TARGET_FLOAT128"
+ "xxlandc %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+;; IEEE 128-bit negative absolute value
+(define_insn_and_split "*ieee_128bit_vsx_nabs<mode>2"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF
+ (abs:TFIFKF
+ (match_operand:TFIFKF 1 "register_operand" "wa"))))
+ (clobber (match_scratch:V16QI 2 "=v"))]
+ "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0)
+ (abs:TFIFKF (match_dup 1)))
+ (use (match_dup 2))])]
+{
+ if (GET_CODE (operands[2]) == SCRATCH)
+ operands[2] = gen_reg_rtx (V16QImode);
+
+ operands[3] = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_nabs<mode>2_internal"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF
+ (abs:TFIFKF
+ (match_operand:TFIFKF 1 "register_operand" "wa"))))
+ (use (match_operand:V16QI 2 "register_operand" "=v"))]
+ "TARGET_FLOAT128"
+ "xxlor %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+;; Float128 conversion functions. These expand to library function calls.
+
+(define_expand "extend<FLOAT128_SFDFTF:mode><IFKF:mode>2"
+ [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+ (float_extend:IFKF
+ (match_operand:FLOAT128_SFDFTF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "trunc<IFKF:mode><FLOAT128_SFDFTF:mode>2"
+ [(set (match_operand:FLOAT128_SFDFTF 0 "nonimmediate_operand" "")
+ (float_truncate:FLOAT128_SFDFTF
+ (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "fix_trunc<IFKF:mode><SDI:mode>2"
+ [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+ (fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "fixuns_trunc<IFKF:mode><SDI:mode>2"
+ [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+ (unsigned_fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
+(define_expand "float<SDI:mode><IFKF:mode>2"
+ [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+ (float:KF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "floatuns<SDI:mode><IFKF:mode>2"
+ [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+ (unsigned_float:IFKF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
;; Reload helper functions used by rs6000_secondary_reload. The patterns all
;; must have 3 arguments, and scratch register constraint must be a single
@@ -9516,7 +9795,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, const0_rtx, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
@@ -12134,7 +12413,10 @@
;; Pack/unpack 128-bit floating point types that take 2 scalar registers
; Type of the 64-bit part when packing/unpacking 128-bit floating point types
-(define_mode_attr FP128_64 [(TF "DF") (TD "DI")])
+(define_mode_attr FP128_64 [(TF "DF")
+ (IF "DF")
+ (TD "DI")
+ (KF "DI")])
(define_expand "unpack<mode>"
[(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "")
@@ -12142,7 +12424,7 @@
[(match_operand:FMOVE128 1 "register_operand" "")
(match_operand:QI 2 "const_0_to_1_operand" "")]
UNSPEC_UNPACK_128BIT))]
- ""
+ "FLOAT128_2REG_P (<MODE>mode)"
"")
(define_insn_and_split "unpack<mode>_dm"
@@ -12151,7 +12433,7 @@
[(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r")
(match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")]
UNSPEC_UNPACK_128BIT))]
- "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && FLOAT128_2REG_P (<MODE>mode)"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 3))]
@@ -12175,7 +12457,7 @@
[(match_operand:FMOVE128 1 "register_operand" "d,d")
(match_operand:QI 2 "const_0_to_1_operand" "i,i")]
UNSPEC_UNPACK_128BIT))]
- "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE"
+ "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (<MODE>mode)"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 3))]
@@ -12199,7 +12481,7 @@
[(match_operand:<FP128_64> 1 "register_operand" "0,d")
(match_operand:<FP128_64> 2 "register_operand" "d,d")]
UNSPEC_PACK_128BIT))]
- ""
+ "FLOAT128_2REG_P (<MODE>mode)"
"@
fmr %L0,%2
#"
@@ -12219,12 +12501,12 @@
[(set_attr "type" "fp,fp")
(set_attr "length" "4,8")])
-(define_insn "unpackv1ti"
+(define_insn "unpack<mode>"
[(set (match_operand:DI 0 "register_operand" "=d,d")
- (unspec:DI [(match_operand:V1TI 1 "register_operand" "0,wa")
+ (unspec:DI [(match_operand:FMOVE128_VSX 1 "register_operand" "0,wa")
(match_operand:QI 2 "const_0_to_1_operand" "O,i")]
UNSPEC_UNPACK_128BIT))]
- "TARGET_VSX"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
{
if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0)
return ASM_COMMENT_START " xxpermdi to same register";
@@ -12232,19 +12514,17 @@
operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3);
return "xxpermdi %x0,%x1,%x1,%3";
}
- [(set_attr "type" "vecperm")
- (set_attr "length" "4")])
+ [(set_attr "type" "vecperm")])
-(define_insn "packv1ti"
- [(set (match_operand:V1TI 0 "register_operand" "=wa")
- (unspec:V1TI
+(define_insn "pack<mode>"
+ [(set (match_operand:FMOVE128_VSX 0 "register_operand" "=wa")
+ (unspec:FMOVE128_VSX
[(match_operand:DI 1 "register_operand" "d")
(match_operand:DI 2 "register_operand" "d")]
UNSPEC_PACK_128BIT))]
"TARGET_VSX"
"xxpermdi %x0,%x1,%x2,0"
- [(set_attr "type" "vecperm")
- (set_attr "length" "4")])
+ [(set_attr "type" "vecperm")])
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 18ea27a3d90..6d11ff7dfdb 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -212,7 +212,7 @@ Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) Save
; Allow/disallow the movmisalign in DF/DI vectors
mefficient-unaligned-vector
-Target Undocumented Report Var(TARGET_EFFICIENT_UNALIGNED_VSX) Init(-1)
+Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags)
; Consider unaligned VSX accesses to be efficient/inefficient
mallow-df-permute
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index 24618e309f1..f48af43e7c5 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
%{R*} \
%(link_shlib) \
%{!T*: %(link_start) } \
-%(link_target) \
%(link_os)"
/* Shared libraries are not default. */
@@ -584,10 +583,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
%{shared:-G -dy -z text } \
%{symbolic:-Bsymbolic -G -dy -z text }"
-/* Override the default target of the linker. */
-#define LINK_TARGET_SPEC \
- ENDIAN_SELECT("", " --oformat elf32-powerpcle", "")
-
/* Any specific OS flags. */
#define LINK_OS_SPEC "\
%{mads : %(link_os_ads) ; \
@@ -873,7 +868,6 @@ ncrtn.o%s"
{ "endfile_openbsd", ENDFILE_OPENBSD_SPEC }, \
{ "endfile_default", ENDFILE_DEFAULT_SPEC }, \
{ "link_shlib", LINK_SHLIB_SPEC }, \
- { "link_target", LINK_TARGET_SPEC }, \
{ "link_start", LINK_START_SPEC }, \
{ "link_start_ads", LINK_START_ADS_SPEC }, \
{ "link_start_yellowknife", LINK_START_YELLOWKNIFE_SPEC }, \
diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h
index 7b1d6a1b4de..66ee7cadfe4 100644
--- a/gcc/config/rs6000/sysv4le.h
+++ b/gcc/config/rs6000/sysv4le.h
@@ -25,10 +25,6 @@
#undef DEFAULT_ASM_ENDIAN
#define DEFAULT_ASM_ENDIAN " -mlittle"
-#undef LINK_TARGET_SPEC
-#define LINK_TARGET_SPEC \
- ENDIAN_SELECT(" --oformat elf32-powerpc", "", "")
-
#undef MULTILIB_DEFAULTS
#define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 4a62fbbbdd4..8821dec5989 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -977,6 +977,8 @@
;; General shift amounts can be supported using vsro + vsr. We're
;; not expecting to see these yet (the vectorizer currently
;; generates only shifts by a whole number of vector elements).
+;; Note that the vec_shr operation is actually defined as
+;; 'shift toward element 0' so is a shr for LE and shl for BE.
(define_expand "vec_shr_<mode>"
[(match_operand:VEC_L 0 "vlogical_operand" "")
(match_operand:VEC_L 1 "vlogical_operand" "")
@@ -987,6 +989,7 @@
rtx bitshift = operands[2];
rtx shift;
rtx insn;
+ rtx zero_reg, op1, op2;
HOST_WIDE_INT bitshift_val;
HOST_WIDE_INT byteshift_val;
@@ -996,19 +999,29 @@
if (bitshift_val & 0x7)
FAIL;
byteshift_val = (bitshift_val >> 3);
+ zero_reg = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode));
if (!BYTES_BIG_ENDIAN)
- byteshift_val = 16 - byteshift_val;
+ {
+ byteshift_val = 16 - byteshift_val;
+ op1 = zero_reg;
+ op2 = operands[1];
+ }
+ else
+ {
+ op1 = operands[1];
+ op2 = zero_reg;
+ }
+
if (TARGET_VSX && (byteshift_val & 0x3) == 0)
{
shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
- insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
- shift);
+ insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift);
}
else
{
shift = gen_rtx_CONST_INT (QImode, byteshift_val);
- insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
- shift);
+ insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift);
}
emit_insn (insn);
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 8b124759b0b..6faf7719a9a 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -2315,7 +2315,7 @@
emit_move_insn (str1, force_operand (XEXP (operands[1], 0), NULL_RTX));
emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
- emit_move_insn (len, force_operand (operands[3], NULL_RTX));
+ emit_move_insn (len, operands[3]);
emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
DONE;
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index 0a24da9bcb1..b267b04e2a7 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -438,15 +438,15 @@ B_DEF (s390_vllezf, vec_insert_and_zerov4si,0,
B_DEF (s390_vllezg, vec_insert_and_zerov2di,0, B_VX, 0, BT_FN_UV2DI_ULONGLONGCONSTPTR)
OB_DEF (s390_vec_load_bndry, s390_vec_load_bndry_s8,s390_vec_load_bndry_dbl,B_VX, BT_FN_OV4SI_INTCONSTPTR_INT)
-OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U3, BT_OV_V16QI_SCHARCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U3, BT_OV_UV16QI_UCHARCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U3, BT_OV_V8HI_SHORTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U3, BT_OV_UV8HI_USHORTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U3, BT_OV_V4SI_INTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U3, BT_OV_UV4SI_UINTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U3, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U3, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U3, BT_OV_V2DF_DBLCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s8, s390_vlbb, O2_U16, BT_OV_V16QI_SCHARCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u8, s390_vlbb, O2_U16, BT_OV_UV16QI_UCHARCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s16, s390_vlbb, O2_U16, BT_OV_V8HI_SHORTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u16, s390_vlbb, O2_U16, BT_OV_UV8HI_USHORTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s32, s390_vlbb, O2_U16, BT_OV_V4SI_INTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u32, s390_vlbb, O2_U16, BT_OV_UV4SI_UINTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s64, s390_vlbb, O2_U16, BT_OV_V2DI_LONGLONGCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u64, s390_vlbb, O2_U16, BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_dbl, s390_vlbb, O2_U16, BT_OV_V2DF_DBLCONSTPTR_USHORT)
B_DEF (s390_vlbb, vlbb, 0, B_VX, O2_U3, BT_FN_UV16QI_UCHARCONSTPTR_USHORT)
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 5814694adbc..cbfc80073c9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2258,23 +2258,14 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
{
unsigned HOST_WIDE_INT mask;
int length, size;
+ rtx elt;
- if (!VECTOR_MODE_P (GET_MODE (op))
- || GET_CODE (op) != CONST_VECTOR
- || !CONST_INT_P (XVECEXP (op, 0, 0)))
+ if (!const_vec_duplicate_p (op, &elt)
+ || !CONST_INT_P (elt))
return false;
- if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
- {
- int i;
-
- for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
- if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
- return false;
- }
-
size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
- mask = UINTVAL (XVECEXP (op, 0, 0));
+ mask = UINTVAL (elt);
if (s390_contiguous_bitmask_p (mask, size, start,
end != NULL ? &length : NULL))
{
@@ -10360,6 +10351,7 @@ s390_emit_prologue (void)
current_function_name(), cfun_frame_layout.frame_size,
s390_stack_size);
emit_insn (gen_trap ());
+ emit_barrier ();
}
else
{
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index 5a552e2be81..3e4211be4de 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -159,6 +159,7 @@ extern int sh_eval_treg_value (rtx op);
extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op);
extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a);
extern bool sh_movsf_ie_ra_split_p (rtx, rtx, rtx);
+extern void sh_expand_sym_label2reg (rtx, rtx, rtx, bool);
/* Result value of sh_find_set_of_reg. */
struct set_of_reg
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 450d634e246..1442b7fc790 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -1604,6 +1604,10 @@ sh_asm_output_addr_const_extra (FILE *file, rtx x)
output_addr_const (file, XVECEXP (x, 0, 0));
fputs ("@GOTPLT", file);
break;
+ case UNSPEC_PCREL:
+ output_addr_const (file, XVECEXP (x, 0, 0));
+ fputs ("@PCREL", file);
+ break;
case UNSPEC_DTPOFF:
output_addr_const (file, XVECEXP (x, 0, 0));
fputs ("@DTPOFF", file);
@@ -10441,6 +10445,7 @@ nonpic_symbol_mentioned_p (rtx x)
|| XINT (x, 1) == UNSPEC_DTPOFF
|| XINT (x, 1) == UNSPEC_TPOFF
|| XINT (x, 1) == UNSPEC_PLT
+ || XINT (x, 1) == UNSPEC_PCREL
|| XINT (x, 1) == UNSPEC_SYMOFF
|| XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
return false;
@@ -10714,7 +10719,8 @@ sh_delegitimize_address (rtx orig_x)
rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
if (GET_CODE (symplt) == UNSPEC
- && XINT (symplt, 1) == UNSPEC_PLT)
+ && (XINT (symplt, 1) == UNSPEC_PLT
+ || XINT (symplt, 1) == UNSPEC_PCREL))
return XVECEXP (symplt, 0, 0);
}
}
@@ -11702,9 +11708,24 @@ sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
|| crtl->args.info.stack_regs == 0)
&& ! sh_cfun_interrupt_handler_p ()
&& (! flag_pic
- || (decl && ! TREE_PUBLIC (decl))
+ || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
|| (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
}
+
+/* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
+void
+sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
+{
+ const_tree decl = SYMBOL_REF_DECL (sym);
+ bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
+
+ if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
+ emit_insn (gen_sym_label2reg (reg, sym, lab));
+ else if (sibcall_p)
+ emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
+ else
+ emit_insn (gen_symPLT_label2reg (reg, sym, lab));
+}
/* Machine specific built-in functions. */
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index ad49f72c68d..4e7cd169f84 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -135,6 +135,7 @@
UNSPEC_PLT
UNSPEC_CALLER
UNSPEC_GOTPLT
+ UNSPEC_PCREL
UNSPEC_ICACHE
UNSPEC_INIT_TRAMP
UNSPEC_FCOSA
@@ -9470,11 +9471,8 @@ label:
[(const_int 0)]
{
rtx lab = PATTERN (gen_call_site ());
-
- if (SYMBOL_REF_LOCAL_P (operands[0]))
- emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
- else
- emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab));
+
+ sh_expand_sym_label2reg (operands[2], operands[0], lab, false);
emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab)));
DONE;
}
@@ -9605,10 +9603,7 @@ label:
{
rtx lab = PATTERN (gen_call_site ());
- if (SYMBOL_REF_LOCAL_P (operands[1]))
- emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
- else
- emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab));
+ sh_expand_sym_label2reg (operands[3], operands[1], lab, false);
emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3],
operands[2], copy_rtx (lab)));
DONE;
@@ -10008,7 +10003,7 @@ label:
rtx lab = PATTERN (gen_call_site ());
rtx call_insn;
- emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+ sh_expand_sym_label2reg (operands[2], operands[0], lab, true);
call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1],
copy_rtx (lab)));
SIBLING_CALL_P (call_insn) = 1;
@@ -10200,7 +10195,7 @@ label:
rtx lab = PATTERN (gen_call_site ());
rtx call_insn;
- emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+ sh_expand_sym_label2reg (operands[3], operands[1], lab, true);
call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0],
operands[3],
operands[2],
@@ -10748,6 +10743,16 @@ label:
UNSPEC_SYMOFF)))]
"TARGET_SH1" "")
+(define_expand "symPCREL_label2reg"
+ [(set (match_operand:SI 0 "" "")
+ (const:SI
+ (unspec:SI
+ [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PCREL))
+ (const:SI (plus:SI (match_operand:SI 2 "" "")
+ (const_int 2)))] UNSPEC_PCREL_SYMOFF)))]
+ "TARGET_SH1"
+ "")
+
(define_expand "symGOT_load"
[(set (match_dup 2) (match_operand 1 "" ""))
(set (match_dup 3) (plus (match_dup 2) (reg PIC_REG)))
@@ -12731,7 +12736,7 @@ label:
[(set (match_operand:SI 0 "register_operand")
(compare:SI (match_operand:BLK 1 "memory_operand")
(match_operand:BLK 2 "memory_operand")))
- (use (match_operand:SI 3 "immediate_operand"))
+ (use (match_operand:SI 3 "nonmemory_operand"))
(use (match_operand:SI 4 "immediate_operand"))]
"TARGET_SH1 && optimize"
{
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 9665ee6da9b..5b9f0517b90 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -6403,7 +6403,7 @@
/* Pass constm1 to indicate that it may expect a structure value, but
we don't know what size it is. */
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, constm1_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, NULL, constm1_rtx));
/* Save the function value registers. */
emit_move_insn (adjust_address (result, DImode, 0), valreg1);
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index ca762877a0f..05c81f5ed73 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -3185,11 +3185,8 @@ classify_immediate (rtx op, machine_mode mode)
&& mode == V4SImode
&& GET_CODE (op) == CONST_VECTOR
&& GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
- && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
- && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
- && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
- op = CONST_VECTOR_ELT (op, 0);
+ && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
+ op = unwrap_const_vec_duplicate (op);
switch (GET_CODE (op))
{
@@ -3507,9 +3504,7 @@ spu_legitimate_constant_p (machine_mode mode, rtx x)
&& (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
|| GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
|| GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
- return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
- && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
- && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
+ return const_vec_duplicate_p (x);
if (GET_CODE (x) == CONST_VECTOR
&& !const_vector_immediate_p (x))
diff --git a/gcc/config/tilegx/constraints.md b/gcc/config/tilegx/constraints.md
index 783e1ca98fe..f47d0f68296 100644
--- a/gcc/config/tilegx/constraints.md
+++ b/gcc/config/tilegx/constraints.md
@@ -96,21 +96,14 @@
"An 8-element vector constant with identical elements"
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 8")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)")))
+ (match_test "const_vec_duplicate_p (op)")))
(define_constraint "Y"
"A 4-element vector constant with identical elements"
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 4")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))
+ (match_test "const_vec_duplicate_p (op)")))
+
(define_constraint "Z0"
"The integer constant 0xffffffff"
(and (match_code "const_int")
diff --git a/gcc/config/tilegx/predicates.md b/gcc/config/tilegx/predicates.md
index 4cbebf18a91..ce04660f9ed 100644
--- a/gcc/config/tilegx/predicates.md
+++ b/gcc/config/tilegx/predicates.md
@@ -112,14 +112,8 @@
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 8
- && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)"))))
+ && (satisfies_constraint_I
+ (unwrap_const_vec_duplicate (op)))"))))
;; Return 1 if OP is a 4-element vector constant with identical signed
;; 8-bit elements or any register.
@@ -127,10 +121,8 @@
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 4
- && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))))
+ && (satisfies_constraint_I
+ (unwrap_const_vec_duplicate (op)))"))))
;; Return 1 if the operand is a valid second operand to an add insn.
(define_predicate "add_operand"
diff --git a/gcc/config/tilegx/tilegx.md b/gcc/config/tilegx/tilegx.md
index 75322e16721..944953c34b2 100644
--- a/gcc/config/tilegx/tilegx.md
+++ b/gcc/config/tilegx/tilegx.md
@@ -2670,7 +2670,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
diff --git a/gcc/config/tilepro/constraints.md b/gcc/config/tilepro/constraints.md
index 4d13fb0640a..3ab9ab75650 100644
--- a/gcc/config/tilepro/constraints.md
+++ b/gcc/config/tilepro/constraints.md
@@ -90,12 +90,10 @@
"A 4-element vector constant with identical elements"
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 4")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))
+ (match_test "const_vec_duplicate_p (op)")))
(define_constraint "Y"
"A 2-element vector constant with identical elements"
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 2")
- (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")))
+ (match_test "const_vec_duplicate_p (op)")))
diff --git a/gcc/config/tilepro/predicates.md b/gcc/config/tilepro/predicates.md
index 00d2bb989cd..ab62d20731a 100644
--- a/gcc/config/tilepro/predicates.md
+++ b/gcc/config/tilepro/predicates.md
@@ -75,10 +75,8 @@
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 4
- && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))))
+ && (satisfies_constraint_I
+ (unwrap_const_vec_duplicate (op)))"))))
;; Return 1 if OP is a 2-element vector constant with identical signed
;; 8-bit elements or any register.
@@ -86,8 +84,8 @@
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
(match_test "CONST_VECTOR_NUNITS (op) == 2
- && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
- && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)"))))
+ && (satisfies_constraint_I
+ (unwrap_const_vec_duplicate (op)))"))))
;; Return 1 if the operand is a valid second operand to an add insn.
(define_predicate "add_operand"
diff --git a/gcc/config/tilepro/tilepro.md b/gcc/config/tilepro/tilepro.md
index a97ebf9eb22..b1e6b81e71f 100644
--- a/gcc/config/tilepro/tilepro.md
+++ b/gcc/config/tilepro/tilepro.md
@@ -1516,7 +1516,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
diff --git a/gcc/config/visium/visium.md b/gcc/config/visium/visium.md
index 969cb887a6c..370b6a4b5b2 100644
--- a/gcc/config/visium/visium.md
+++ b/gcc/config/visium/visium.md
@@ -2375,7 +2375,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, NULL));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{