2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>

{{merging with even more of GCC 6, using subversion 1.9 svn merge -r227001:227400 ^/trunk ; there is some gengtype issue before svn r228000... }} git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233281 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2016-02-10 17:20:51 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2016-02-10 17:20:51 +0000
commit: 2d9d01985a7a7866916fafa19c5c296702e69714 (patch)
tree: 259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config
parent: c8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff)
download: gcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz
70 files changed, 2524 insertions, 1038 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 0f4f2b97022..e3a90b5e4dd 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -886,30 +886,6 @@ typedef enum
   SIMD_ARG_STOP
 } builtin_simd_arg;
 
-/* Relayout the decl of a function arg.  Keep the RTL component the same,
-   as varasm.c ICEs.  It doesn't like reinitializing the RTL
-   on PARM decls.  Something like this needs to be done when compiling a
-   file without SIMD and then tagging a function with +simd and using SIMD
-   intrinsics in there.  The types will have been laid out assuming no SIMD,
-   so we want to re-lay them out.  */
-
-static void
-aarch64_relayout_simd_param (tree arg)
-{
-  tree argdecl = arg;
-  if (TREE_CODE (argdecl) == SSA_NAME)
-    argdecl = SSA_NAME_VAR (argdecl);
-
-  if (argdecl
-      && (TREE_CODE (argdecl) == PARM_DECL
-	  || TREE_CODE (argdecl) == VAR_DECL))
-    {
-      rtx rtl = NULL_RTX;
-      rtl = DECL_RTL_IF_SET (argdecl);
-      relayout_decl (argdecl);
-      SET_DECL_RTL (argdecl, rtl);
-    }
-}
 
 static rtx
 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
@@ -940,7 +916,6 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
 	{
 	  tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
 	  enum machine_mode mode = insn_data[icode].operand[opc].mode;
-	  aarch64_relayout_simd_param (arg);
 	  op[opc] = expand_normal (arg);
 
 	  switch (thisarg)
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index a7b00f6975d..53bbef46eb2 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -20,19 +20,17 @@
 /* Pairs of instructions which can be fused. before including this file,
    define a macro:
 
-     AARCH64_FUSION_PAIR (name, internal_name, index_bit)
+     AARCH64_FUSION_PAIR (name, internal_name)
 
    Where:
 
      NAME is a string giving a friendly name for the instructions to fuse.
      INTERNAL_NAME gives the internal name suitable for appending to
-     AARCH64_FUSE_ to give an enum name.
-     INDEX_BIT is the bit to set in the bitmask of supported fusion
-     operations.  */
-
-AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK, 0)
-AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD, 1)
-AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK, 2)
-AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR, 3)
-AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH, 4)
+     AARCH64_FUSE_ to give an enum name. */
+
+AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK)
+AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD)
+AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK)
+AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
+AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
 
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 1762cc8d58f..b261a0f7c3c 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -34,11 +34,6 @@
    should contain a whitespace-separated list of the strings in 'Features'
    that are required.  Their order is not important.  */
 
-/* V8 Architecture Extensions.
-   This list currently contains example extensions for CPUs that implement
-   AArch64, and therefore serves as a template for adding more CPUs in the
-   future.  */
-
 AARCH64_OPT_EXTENSION("fp",	AARCH64_FL_FP,                          AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp")
 AARCH64_OPT_EXTENSION("simd",	AARCH64_FL_FPSIMD,                      AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA,   "asimd")
 AARCH64_OPT_EXTENSION("crypto",	AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD,  AARCH64_FL_CRYPTO,   "aes pmull sha1 sha2")
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 0b09d49f670..ff1985137b3 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -73,8 +73,12 @@ enum aarch64_symbol_context
 
    SYMBOL_SMALL_TLSGD
    SYMBOL_SMALL_TLSDESC
-   SYMBOL_SMALL_GOTTPREL
-   SYMBOL_TLSLE
+   SYMBOL_SMALL_TLSIE
+   SYMBOL_TINY_TLSIE
+   SYMBOL_TLSLE12
+   SYMBOL_TLSLE24
+   SYMBOL_TLSLE32
+   SYMBOL_TLSLE48
    Each of these represents a thread-local symbol, and corresponds to the
    thread local storage relocation operator for the symbol being referred to.
 
@@ -108,10 +112,14 @@ enum aarch64_symbol_type
   SYMBOL_SMALL_GOT_4G,
   SYMBOL_SMALL_TLSGD,
   SYMBOL_SMALL_TLSDESC,
-  SYMBOL_SMALL_GOTTPREL,
+  SYMBOL_SMALL_TLSIE,
   SYMBOL_TINY_ABSOLUTE,
   SYMBOL_TINY_GOT,
-  SYMBOL_TLSLE,
+  SYMBOL_TINY_TLSIE,
+  SYMBOL_TLSLE12,
+  SYMBOL_TLSLE24,
+  SYMBOL_TLSLE32,
+  SYMBOL_TLSLE48,
   SYMBOL_FORCE_TO_MEM
 };
 
@@ -201,41 +209,46 @@ struct tune_params
   unsigned int extra_tuning_flags;
 };
 
-#define AARCH64_FUSION_PAIR(x, name, index) \
-  AARCH64_FUSE_##name = (1 << index),
+#define AARCH64_FUSION_PAIR(x, name) \
+  AARCH64_FUSE_##name##_index, 
 /* Supported fusion operations.  */
-enum aarch64_fusion_pairs
+enum aarch64_fusion_pairs_index
 {
-  AARCH64_FUSE_NOTHING = 0,
 #include "aarch64-fusion-pairs.def"
-
-/* Hacky macro to build AARCH64_FUSE_ALL.  The sequence below expands
-   to:
-   AARCH64_FUSE_ALL = 0 | AARCH64_FUSE_index1 | AARCH64_FUSE_index2 ...  */
+  AARCH64_FUSE_index_END
+};
 #undef AARCH64_FUSION_PAIR
-#define AARCH64_FUSION_PAIR(x, name, y) \
-  | AARCH64_FUSE_##name
 
-  AARCH64_FUSE_ALL = 0
+#define AARCH64_FUSION_PAIR(x, name) \
+  AARCH64_FUSE_##name = (1u << AARCH64_FUSE_##name##_index),
+/* Supported fusion operations.  */
+enum aarch64_fusion_pairs
+{
+  AARCH64_FUSE_NOTHING = 0,
 #include "aarch64-fusion-pairs.def"
+  AARCH64_FUSE_ALL = (1u << AARCH64_FUSE_index_END) - 1
 };
 #undef AARCH64_FUSION_PAIR
 
-#define AARCH64_EXTRA_TUNING_OPTION(x, name, index) \
-  AARCH64_EXTRA_TUNE_##name = (1 << index),
+#define AARCH64_EXTRA_TUNING_OPTION(x, name) \
+  AARCH64_EXTRA_TUNE_##name##_index,
+/* Supported tuning flags indexes.  */
+enum aarch64_extra_tuning_flags_index
+{
+#include "aarch64-tuning-flags.def"
+  AARCH64_EXTRA_TUNE_index_END
+};
+#undef AARCH64_EXTRA_TUNING_OPTION
+
+
+#define AARCH64_EXTRA_TUNING_OPTION(x, name) \
+  AARCH64_EXTRA_TUNE_##name = (1u << AARCH64_EXTRA_TUNE_##name##_index),
 /* Supported tuning flags.  */
 enum aarch64_extra_tuning_flags
 {
   AARCH64_EXTRA_TUNE_NONE = 0,
 #include "aarch64-tuning-flags.def"
-
-/* Hacky macro to build the "all" flag mask.
-   Expands to 0 | AARCH64_TUNE_index0 | AARCH64_TUNE_index1 , etc.  */
-#undef AARCH64_EXTRA_TUNING_OPTION
-#define AARCH64_EXTRA_TUNING_OPTION(x, name, y) \
-  | AARCH64_EXTRA_TUNE_##name
-  AARCH64_EXTRA_TUNE_ALL = 0
-#include "aarch64-tuning-flags.def"
+  AARCH64_EXTRA_TUNE_ALL = (1u << AARCH64_EXTRA_TUNE_index_END) - 1
 };
 #undef AARCH64_EXTRA_TUNING_OPTION
 
@@ -310,12 +323,14 @@ rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
 bool aarch64_simd_mem_operand_p (rtx);
 rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool);
 rtx aarch64_tls_get_addr (void);
+std::string aarch64_get_extension_string_for_isa_flags (unsigned long);
 tree aarch64_fold_builtin (tree, int, tree *, bool);
 unsigned aarch64_dbx_register_number (unsigned);
 unsigned aarch64_trampoline_size (void);
 void aarch64_asm_output_labelref (FILE *, const char *);
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
+const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
 void aarch64_err_no_fpadvsimd (machine_mode, const char *);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 01aaca83594..628386b5a1d 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -20,15 +20,13 @@
 /* Additional control over certain tuning parameters.  Before including
    this file, define a macro:
 
-     AARCH64_EXTRA_TUNING_OPTION (name, internal_name, index_bit)
+     AARCH64_EXTRA_TUNING_OPTION (name, internal_name)
 
    Where:
 
      NAME is a string giving a friendly name for the tuning flag.
      INTERNAL_NAME gives the internal name suitable for appending to
-     AARCH64_TUNE_ to give an enum name.
-     INDEX_BIT is the bit to set in the bitmask of supported tuning
-     flags.  */
+     AARCH64_TUNE_ to give an enum name. */
 
-AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS, 0)
+AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aa268aeff4d..bc612e47d4f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -150,7 +150,6 @@ static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_override_options_after_change (void);
 static bool aarch64_vector_mode_supported_p (machine_mode);
-static unsigned bit_count (unsigned HOST_WIDE_INT);
 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
 						 const unsigned char *sel);
 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
@@ -172,7 +171,7 @@ struct aarch64_flag_desc
   unsigned int flag;
 };
 
-#define AARCH64_FUSION_PAIR(name, internal_name, y) \
+#define AARCH64_FUSION_PAIR(name, internal_name) \
   { name, AARCH64_FUSE_##internal_name },
 static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
 {
@@ -183,7 +182,7 @@ static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
 };
 #undef AARCH64_FUION_PAIR
 
-#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \
+#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
   { name, AARCH64_EXTRA_TUNE_##internal_name },
 static const struct aarch64_flag_desc aarch64_tuning_flags[] =
 {
@@ -587,6 +586,29 @@ static const char * const aarch64_condition_codes[] =
   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 };
 
+/* Generate code to enable conditional branches in functions over 1 MiB.  */
+const char *
+aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
+			const char * branch_format)
+{
+    rtx_code_label * tmp_label = gen_label_rtx ();
+    char label_buf[256];
+    char buffer[128];
+    ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
+				 CODE_LABEL_NUMBER (tmp_label));
+    const char *label_ptr = targetm.strip_name_encoding (label_buf);
+    rtx dest_label = operands[pos_label];
+    operands[pos_label] = tmp_label;
+
+    snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
+    output_asm_insn (buffer, operands);
+
+    snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
+    operands[pos_label] = dest_label;
+    output_asm_insn (buffer, operands);
+    return "";
+}
+
 void
 aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
 {
@@ -931,7 +953,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	       The generate instruction sequence for accessing global variable
 	       is:
 
-	         ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
+		 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
 
 	       Only one instruction needed. But we must initialize
 	       pic_offset_table_rtx properly.  We generate initialize insn for
@@ -940,12 +962,12 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	       The final instruction sequences will look like the following
 	       for multiply global variables access.
 
-	         adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
+		 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
 
-	         ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
-	         ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
-	         ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
-	         ...  */
+		 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
+		 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
+		 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
+		 ...  */
 
 	    rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
 	    crtl->uses_pic_offset_table = 1;
@@ -1081,7 +1103,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	return;
       }
 
-    case SYMBOL_SMALL_GOTTPREL:
+    case SYMBOL_SMALL_TLSIE:
       {
 	/* In ILP32, the mode of dest can be either SImode or DImode,
 	   while the got entry is always of SImode size.  The mode of
@@ -1115,14 +1137,43 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	return;
       }
 
-    case SYMBOL_TLSLE:
+    case SYMBOL_TLSLE12:
+    case SYMBOL_TLSLE24:
+    case SYMBOL_TLSLE32:
+    case SYMBOL_TLSLE48:
       {
+	machine_mode mode = GET_MODE (dest);
 	rtx tp = aarch64_load_tp (NULL);
 
-	if (GET_MODE (dest) != Pmode)
-	  tp = gen_lowpart (GET_MODE (dest), tp);
+	if (mode != Pmode)
+	  tp = gen_lowpart (mode, tp);
+
+	switch (type)
+	  {
+	  case SYMBOL_TLSLE12:
+	    emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
+			(dest, tp, imm));
+	    break;
+	  case SYMBOL_TLSLE24:
+	    emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
+			(dest, tp, imm));
+	  break;
+	  case SYMBOL_TLSLE32:
+	    emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
+			(dest, imm));
+	    emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
+			(dest, dest, tp));
+	  break;
+	  case SYMBOL_TLSLE48:
+	    emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
+			(dest, imm));
+	    emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
+			(dest, dest, tp));
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
 
-	emit_insn (gen_tlsle (dest, tp, imm));
 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
@@ -1131,6 +1182,31 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
       emit_insn (gen_ldr_got_tiny (dest, imm));
       return;
 
+    case SYMBOL_TINY_TLSIE:
+      {
+	machine_mode mode = GET_MODE (dest);
+	rtx tp = aarch64_load_tp (NULL);
+
+	if (mode == ptr_mode)
+	  {
+	    if (mode == DImode)
+	      emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
+	    else
+	      {
+		tp = gen_lowpart (mode, tp);
+		emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
+	      }
+	  }
+	else
+	  {
+	    gcc_assert (mode == Pmode);
+	    emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
+	  }
+
+	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	return;
+      }
+
     default:
       gcc_unreachable ();
     }
@@ -1661,10 +1737,11 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 
         case SYMBOL_SMALL_TLSGD:
         case SYMBOL_SMALL_TLSDESC:
-        case SYMBOL_SMALL_GOTTPREL:
+	case SYMBOL_SMALL_TLSIE:
 	case SYMBOL_SMALL_GOT_28K:
 	case SYMBOL_SMALL_GOT_4G:
 	case SYMBOL_TINY_GOT:
+	case SYMBOL_TINY_TLSIE:
 	  if (offset != const0_rtx)
 	    {
 	      gcc_assert(can_create_pseudo_p ());
@@ -1677,7 +1754,10 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 
 	case SYMBOL_SMALL_ABSOLUTE:
 	case SYMBOL_TINY_ABSOLUTE:
-	case SYMBOL_TLSLE:
+	case SYMBOL_TLSLE12:
+	case SYMBOL_TLSLE24:
+	case SYMBOL_TLSLE32:
+	case SYMBOL_TLSLE48:
 	  aarch64_load_symref_appropriately (dest, imm, sty);
 	  return;
 
@@ -4163,19 +4243,6 @@ aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
   return aarch64_const_vec_all_same_in_range_p (x, val, val);
 }
 
-static unsigned
-bit_count (unsigned HOST_WIDE_INT value)
-{
-  unsigned count = 0;
-
-  while (value)
-    {
-      count++;
-      value &= value - 1;
-    }
-
-  return count;
-}
 
 /* N Z C V.  */
 #define AARCH64_CC_V 1
@@ -4330,7 +4397,7 @@ aarch64_print_operand (FILE *f, rtx x, char code)
 	  return;
 	}
 
-      asm_fprintf (f, "%u", bit_count (INTVAL (x)));
+      asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
       break;
 
     case 'H':
@@ -4556,11 +4623,11 @@ aarch64_print_operand (FILE *f, rtx x, char code)
 	  asm_fprintf (asm_out_file, ":tlsdesc:");
 	  break;
 
-	case SYMBOL_SMALL_GOTTPREL:
+	case SYMBOL_SMALL_TLSIE:
 	  asm_fprintf (asm_out_file, ":gottprel:");
 	  break;
 
-	case SYMBOL_TLSLE:
+	case SYMBOL_TLSLE24:
 	  asm_fprintf (asm_out_file, ":tprel:");
 	  break;
 
@@ -4589,11 +4656,15 @@ aarch64_print_operand (FILE *f, rtx x, char code)
 	  asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
 	  break;
 
-	case SYMBOL_SMALL_GOTTPREL:
+	case SYMBOL_SMALL_TLSIE:
 	  asm_fprintf (asm_out_file, ":gottprel_lo12:");
 	  break;
 
-	case SYMBOL_TLSLE:
+	case SYMBOL_TLSLE12:
+	  asm_fprintf (asm_out_file, ":tprel_lo12:");
+	  break;
+
+	case SYMBOL_TLSLE24:
 	  asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
 	  break;
 
@@ -4601,6 +4672,10 @@ aarch64_print_operand (FILE *f, rtx x, char code)
 	  asm_fprintf (asm_out_file, ":got:");
 	  break;
 
+	case SYMBOL_TINY_TLSIE:
+	  asm_fprintf (asm_out_file, ":gottprel:");
+	  break;
+
 	default:
 	  break;
 	}
@@ -4611,7 +4686,7 @@ aarch64_print_operand (FILE *f, rtx x, char code)
 
       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
 	{
-	case SYMBOL_TLSLE:
+	case SYMBOL_TLSLE24:
 	  asm_fprintf (asm_out_file, ":tprel_hi12:");
 	  break;
 	default:
@@ -7506,6 +7581,40 @@ aarch64_parse_one_override_token (const char* token,
   return;
 }
 
+/* A checking mechanism for the implementation of the tls size.  */
+
+static void
+initialize_aarch64_tls_size (struct gcc_options *opts)
+{
+  if (aarch64_tls_size == 0)
+    aarch64_tls_size = 24;
+
+  switch (opts->x_aarch64_cmodel_var)
+    {
+    case AARCH64_CMODEL_TINY:
+      /* Both the default and maximum TLS size allowed under tiny is 1M which
+	 needs two instructions to address, so we clamp the size to 24.  */
+      if (aarch64_tls_size > 24)
+	aarch64_tls_size = 24;
+      break;
+    case AARCH64_CMODEL_SMALL:
+      /* The maximum TLS size allowed under small is 4G.  */
+      if (aarch64_tls_size > 32)
+	aarch64_tls_size = 32;
+      break;
+    case AARCH64_CMODEL_LARGE:
+      /* The maximum TLS size allowed under large is 16E.
+	 FIXME: 16E should be 64bit, we only support 48bit offset now.  */
+      if (aarch64_tls_size > 48)
+	aarch64_tls_size = 48;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return;
+}
+
 /* Parse STRING looking for options in the format:
      string	:: option:string
      option	:: name=substring
@@ -7598,6 +7707,7 @@ aarch64_override_options_internal (struct gcc_options *opts)
     }
 
   initialize_aarch64_code_model (opts);
+  initialize_aarch64_tls_size (opts);
 
   aarch64_override_options_after_change_1 (opts);
 }
@@ -7904,20 +8014,6 @@ initialize_aarch64_code_model (struct gcc_options *opts)
      aarch64_cmodel = opts->x_aarch64_cmodel_var;
 }
 
-/* Print to F the architecture features specified by ISA_FLAGS.  */
-
-static void
-aarch64_print_extension (FILE *f, unsigned long isa_flags)
-{
-  const struct aarch64_option_extension *opt = NULL;
-
-  for (opt = all_extensions; opt->name != NULL; opt++)
-    if ((isa_flags & opt->flags_on) == opt->flags_on)
-      asm_fprintf (f, "+%s", opt->name);
-
-  asm_fprintf (f, "\n");
-}
-
 /* Implement TARGET_OPTION_SAVE.  */
 
 static void
@@ -7950,10 +8046,12 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
     = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
   unsigned long isa_flags = ptr->x_aarch64_isa_flags;
   const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
+  std::string extension
+    = aarch64_get_extension_string_for_isa_flags (isa_flags);
 
   fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
-  fprintf (file, "%*sselected arch = %s", indent, "", arch->name);
-  aarch64_print_extension (file, isa_flags);
+  fprintf (file, "%*sselected arch = %s%s\n", indent, "",
+	   arch->name, extension.c_str ());
 }
 
 static GTY(()) tree aarch64_previous_fndecl;
@@ -8013,6 +8111,23 @@ aarch64_set_current_function (tree fndecl)
 	      = save_target_globals_default_opts ();
 	}
     }
+
+  if (!fndecl)
+    return;
+
+  /* If we turned on SIMD make sure that any vector parameters are re-laid out
+     so that they use proper vector modes.  */
+  if (TARGET_SIMD)
+    {
+      tree parms = DECL_ARGUMENTS (fndecl);
+      for (; parms && parms != void_list_node; parms = TREE_CHAIN (parms))
+	{
+	  if (TREE_CODE (parms) == PARM_DECL
+	      && VECTOR_TYPE_P (TREE_TYPE (parms))
+	      && DECL_MODE (parms) != TYPE_MODE (TREE_TYPE (parms)))
+	    relayout_decl (parms);
+	}
+    }
 }
 
 /* Enum describing the various ways we can handle attributes.
@@ -8683,10 +8798,26 @@ aarch64_classify_tls_symbol (rtx x)
       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
 
     case TLS_MODEL_INITIAL_EXEC:
-      return SYMBOL_SMALL_GOTTPREL;
+      switch (aarch64_cmodel)
+	{
+	case AARCH64_CMODEL_TINY:
+	case AARCH64_CMODEL_TINY_PIC:
+	  return SYMBOL_TINY_TLSIE;
+	default:
+	  return SYMBOL_SMALL_TLSIE;
+	}
 
     case TLS_MODEL_LOCAL_EXEC:
-      return SYMBOL_TLSLE;
+      if (aarch64_tls_size == 12)
+	return SYMBOL_TLSLE12;
+      else if (aarch64_tls_size == 24)
+	return SYMBOL_TLSLE24;
+      else if (aarch64_tls_size == 32)
+	return SYMBOL_TLSLE32;
+      else if (aarch64_tls_size == 48)
+	return SYMBOL_TLSLE48;
+      else
+	gcc_unreachable ();
 
     case TLS_MODEL_EMULATED:
     case TLS_MODEL_NONE:
@@ -9893,31 +10024,10 @@ sizetochar (int size)
 static bool
 aarch64_vect_float_const_representable_p (rtx x)
 {
-  int i = 0;
-  REAL_VALUE_TYPE r0, ri;
-  rtx x0, xi;
-
-  if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
-    return false;
-
-  x0 = CONST_VECTOR_ELT (x, 0);
-  if (!CONST_DOUBLE_P (x0))
-    return false;
-
-  REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
-
-  for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
-    {
-      xi = CONST_VECTOR_ELT (x, i);
-      if (!CONST_DOUBLE_P (xi))
-	return false;
-
-      REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
-      if (!REAL_VALUES_EQUAL (r0, ri))
-	return false;
-    }
-
-  return aarch64_float_const_representable_p (x0);
+  rtx elt;
+  return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
+	  && const_vec_duplicate_p (x, &elt)
+	  && aarch64_float_const_representable_p (elt));
 }
 
 /* Return true for valid and false for invalid.  */
@@ -10380,28 +10490,15 @@ aarch64_simd_dup_constant (rtx vals)
 {
   machine_mode mode = GET_MODE (vals);
   machine_mode inner_mode = GET_MODE_INNER (mode);
-  int n_elts = GET_MODE_NUNITS (mode);
-  bool all_same = true;
   rtx x;
-  int i;
-
-  if (GET_CODE (vals) != CONST_VECTOR)
-    return NULL_RTX;
-
-  for (i = 1; i < n_elts; ++i)
-    {
-      x = CONST_VECTOR_ELT (vals, i);
-      if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
-	all_same = false;
-    }
 
-  if (!all_same)
+  if (!const_vec_duplicate_p (vals, &x))
     return NULL_RTX;
 
   /* We can load this constant by using DUP and a constant in a
      single ARM register.  This will be cheaper than a vector
      load.  */
-  x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
+  x = copy_to_mode_reg (inner_mode, x);
   return gen_rtx_VEC_DUPLICATE (mode, x);
 }
 
@@ -10677,8 +10774,11 @@ aarch64_declare_function_name (FILE *stream, const char* name,
   const struct processor *this_arch
     = aarch64_get_arch (targ_options->x_explicit_arch);
 
-  asm_fprintf (asm_out_file, "\t.arch %s", this_arch->name);
-  aarch64_print_extension (asm_out_file, targ_options->x_aarch64_isa_flags);
+  unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
+  std::string extension
+    = aarch64_get_extension_string_for_isa_flags (isa_flags);
+  asm_fprintf (asm_out_file, "\t.arch %s%s\n",
+	       this_arch->name, extension.c_str ());
 
   /* Print the cpu name we're tuning for in the comments, might be
      useful to readers of the generated asm.  */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 1be78fc16dd..1e5f5dbd4fa 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -887,18 +887,18 @@ extern enum aarch64_code_model aarch64_cmodel;
   {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" },	\
   {"cpu",  "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" },
 
-#define BIG_LITTLE_SPEC \
-   " %{mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})}"
+#define MCPU_TO_MARCH_SPEC \
+   " %{mcpu=*:-march=%:rewrite_mcpu(%{mcpu=*:%*})}"
 
 extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
-#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \
+#define MCPU_TO_MARCH_SPEC_FUNCTIONS \
   { "rewrite_mcpu", aarch64_rewrite_mcpu },
 
 #if defined(__aarch64__)
 extern const char *host_detect_local_cpu (int argc, const char **argv);
 # define EXTRA_SPEC_FUNCTIONS						\
   { "local_cpu_detect", host_detect_local_cpu },			\
-  BIG_LITTLE_CPU_SPEC_FUNCTIONS
+  MCPU_TO_MARCH_SPEC_FUNCTIONS
 
 # define MCPU_MTUNE_NATIVE_SPECS					\
    " %{march=native:%<march=native %:local_cpu_detect(arch)}"		\
@@ -906,11 +906,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
 #else
 # define MCPU_MTUNE_NATIVE_SPECS ""
-# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS
+# define EXTRA_SPEC_FUNCTIONS MCPU_TO_MARCH_SPEC_FUNCTIONS
 #endif
 
 #define ASM_CPU_SPEC \
-   BIG_LITTLE_SPEC
+   MCPU_TO_MARCH_SPEC
 
 #define EXTRA_SPECS						\
   { "asm_cpu_spec",		ASM_CPU_SPEC }
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 35255e91a95..25229824fb5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -90,6 +90,7 @@
     UNSPEC_GOTSMALLPIC28K
     UNSPEC_GOTSMALLTLS
     UNSPEC_GOTTINYPIC
+    UNSPEC_GOTTINYTLS
     UNSPEC_LD1
     UNSPEC_LD2
     UNSPEC_LD2_DUP
@@ -117,7 +118,10 @@
     UNSPEC_ST4_LANE
     UNSPEC_TLS
     UNSPEC_TLSDESC
-    UNSPEC_TLSLE
+    UNSPEC_TLSLE12
+    UNSPEC_TLSLE24
+    UNSPEC_TLSLE32
+    UNSPEC_TLSLE48
     UNSPEC_USHL_2S
     UNSPEC_VSTRUCTDUMMY
     UNSPEC_SP_SET
@@ -181,6 +185,13 @@
 	     (const_string "no")
 	] (const_string "yes")))
 
+;; Attribute that specifies whether we are dealing with a branch to a
+;; label that is far away, i.e. further away than the maximum/minimum
+;; representable in a signed 21-bits number.
+;; 0 :=: no
+;; 1 :=: yes
+(define_attr "far_branch" "" (const_int 0))
+
 ;; -------------------------------------------------------------------
 ;; Pipeline descriptions and scheduling
 ;; -------------------------------------------------------------------
@@ -308,8 +319,23 @@
 			   (label_ref (match_operand 2 "" ""))
 			   (pc)))]
   ""
-  "b%m0\\t%l2"
-  [(set_attr "type" "branch")]
+  {
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t");
+    else
+      return  "b%m0\\t%l2";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
 )
 
 (define_expand "casesi"
@@ -488,9 +514,23 @@
 			   (label_ref (match_operand 1 "" ""))
 			   (pc)))]
   ""
-  "<cbz>\\t%<w>0, %l1"
-  [(set_attr "type" "branch")]
-
+  {
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
+    else
+      return "<cbz>\\t%<w>0, %l1";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
 )
 
 (define_insn "*tb<optab><mode>1"
@@ -506,8 +546,14 @@
   {
     if (get_attr_length (insn) == 8)
       {
-	operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	if (get_attr_far_branch (insn) == 1)
+	  return aarch64_gen_far_branch (operands, 2, "Ltb",
+					 "<inv_tb>\\t%<w>0, %1, ");
+	else
+	  {
+	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
+	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	  }
       }
     else
       return "<tbz>\t%<w>0, %1, %l2";
@@ -517,7 +563,13 @@
 	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
 			   (lt (minus (match_dup 2) (pc)) (const_int 32764)))
 		      (const_int 4)
-		      (const_int 8)))]
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
+
 )
 
 (define_insn "*cb<optab><mode>1"
@@ -530,12 +582,18 @@
   {
     if (get_attr_length (insn) == 8)
       {
-	char buf[64];
-	uint64_t val = ((uint64_t ) 1)
-			<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
-	sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
-	output_asm_insn (buf, operands);
-	return "<bcond>\t%l1";
+	if (get_attr_far_branch (insn) == 1)
+	  return aarch64_gen_far_branch (operands, 1, "Ltb",
+					 "<inv_tb>\\t%<w>0, <sizem1>, ");
+	else
+	  {
+	    char buf[64];
+	    uint64_t val = ((uint64_t) 1)
+		<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
+	    sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
+	    output_asm_insn (buf, operands);
+	    return "<bcond>\t%l1";
+	  }
       }
     else
       return "<tbz>\t%<w>0, <sizem1>, %l1";
@@ -545,7 +603,12 @@
 	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
 			   (lt (minus (match_dup 1) (pc)) (const_int 32764)))
 		      (const_int 4)
-		      (const_int 8)))]
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
 )
 
 ;; -------------------------------------------------------------------
@@ -768,7 +831,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
@@ -3923,6 +3986,16 @@
   [(set_attr "type" "bfm")]
 )
 
+(define_insn "*aarch64_bfi<GPI:mode><ALLX:mode>4"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+			  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 2 "const_int_operand" "n"))
+	(zero_extend:GPI (match_operand:ALLX 3  "register_operand" "r")))]
+  "UINTVAL (operands[1]) <= <ALLX:sizen>"
+  "bfi\\t%<GPI:w>0, %<GPI:w>3, %2, %1"
+  [(set_attr "type" "bfm")]
+)
+
 (define_insn "*extr_insv_lower_reg<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
 			  (match_operand 1 "const_int_operand" "n")
@@ -4512,31 +4585,72 @@
    (set_attr "length" "8")]
 )
 
-(define_expand "tlsle"
-  [(set (match_operand 0 "register_operand" "=r")
-        (unspec [(match_operand 1 "register_operand" "r")
-                   (match_operand 2 "aarch64_tls_le_symref" "S")]
-                   UNSPEC_TLSLE))]
+(define_insn "tlsie_tiny_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=&r")
+	(unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")
+		     (match_operand:PTR 2 "register_operand" "r")]
+		   UNSPEC_GOTTINYTLS))]
   ""
-{
-  machine_mode mode = GET_MODE (operands[0]);
-  emit_insn ((mode == DImode
-	      ? gen_tlsle_di
-	      : gen_tlsle_si) (operands[0], operands[1], operands[2]));
-  DONE;
-})
+  "ldr\\t%<w>0, %L1\;add\\t%<w>0, %<w>0, %<w>2"
+  [(set_attr "type" "multiple")
+   (set_attr "length" "8")]
+)
 
-(define_insn "tlsle_<mode>"
+(define_insn "tlsie_tiny_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(zero_extend:DI
+	  (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")
+		      (match_operand:DI 2 "register_operand" "r")
+		      ]
+		      UNSPEC_GOTTINYTLS)))]
+  ""
+  "ldr\\t%w0, %L1\;add\\t%<w>0, %<w>0, %<w>2"
+  [(set_attr "type" "multiple")
+   (set_attr "length" "8")]
+)
+
+(define_insn "tlsle12_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
-        (unspec:P [(match_operand:P 1 "register_operand" "r")
-                   (match_operand 2 "aarch64_tls_le_symref" "S")]
-		   UNSPEC_TLSLE))]
+	(unspec:P [(match_operand:P 1 "register_operand" "r")
+		   (match_operand 2 "aarch64_tls_le_symref" "S")]
+		   UNSPEC_TLSLE12))]
   ""
-  "add\\t%<w>0, %<w>1, #%G2, lsl #12\;add\\t%<w>0, %<w>0, #%L2"
+  "add\\t%<w>0, %<w>1, #%L2";
   [(set_attr "type" "alu_sreg")
+   (set_attr "length" "4")]
+)
+
+(define_insn "tlsle24_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand:P 1 "register_operand" "r")
+		   (match_operand 2 "aarch64_tls_le_symref" "S")]
+		   UNSPEC_TLSLE24))]
+  ""
+  "add\\t%<w>0, %<w>1, #%G2, lsl #12\;add\\t%<w>0, %<w>0, #%L2"
+  [(set_attr "type" "multiple")
    (set_attr "length" "8")]
 )
 
+(define_insn "tlsle32_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand 1 "aarch64_tls_le_symref" "S")]
+		   UNSPEC_TLSLE32))]
+  ""
+  "movz\\t%<w>0, #:tprel_g1:%1\;movk\\t%<w>0, #:tprel_g0_nc:%1"
+  [(set_attr "type" "multiple")
+   (set_attr "length" "8")]
+)
+
+(define_insn "tlsle48_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand 1 "aarch64_tls_le_symref" "S")]
+		   UNSPEC_TLSLE48))]
+  ""
+  "movz\\t%<w>0, #:tprel_g2:%1\;movk\\t%<w>0, #:tprel_g1_nc:%1\;movk\\t%<w>0, #:tprel_g0_nc:%1"
+  [(set_attr "type" "multiple")
+   (set_attr "length" "12")]
+)
+
 (define_insn "tlsdesc_small_<mode>"
   [(set (reg:PTR R0_REGNUM)
         (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 37c2c509fe2..8642bdb74f3 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -96,6 +96,25 @@ mtls-dialect=
 Target RejectNegative Joined Enum(tls_type) Var(aarch64_tls_dialect) Init(TLS_DESCRIPTORS) Save
 Specify TLS dialect
 
+mtls-size=
+Target RejectNegative Joined Var(aarch64_tls_size) Enum(aarch64_tls_size)
+Specifies bit size of immediate TLS offsets.  Valid values are 12, 24, 32, 48.
+
+Enum
+Name(aarch64_tls_size) Type(int)
+
+EnumValue
+Enum(aarch64_tls_size) String(12) Value(12)
+
+EnumValue
+Enum(aarch64_tls_size) String(24) Value(24)
+
+EnumValue
+Enum(aarch64_tls_size) String(32) Value(32)
+
+EnumValue
+Enum(aarch64_tls_size) String(48) Value(48)
+
 march=
 Target RejectNegative ToLower Joined Var(aarch64_arch_string)
 -march=ARCH	Use features of architecture ARCH
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b8a45d1d6ed..475aa6e6d37 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -817,9 +817,15 @@
 ;; Emit cbz/cbnz depending on comparison type.
 (define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")])
 
+;; Emit inverted cbz/cbnz depending on comparison type.
+(define_code_attr inv_cb [(eq "cbnz") (ne "cbz") (lt "cbz") (ge "cbnz")])
+
 ;; Emit tbz/tbnz depending on comparison type.
 (define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")])
 
+;; Emit inverted tbz/tbnz depending on comparison type.
+(define_code_attr inv_tb [(eq "tbnz") (ne "tbz") (lt "tbz") (ge "tbnz")])
+
 ;; Max/min attributes.
 (define_code_attr maxmin [(smax "max")
 			  (smin "min")
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index 914daf33a5a..cf9636862f2 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -39,7 +39,7 @@
 
 (define_insn_reservation "thunderx_shift" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
+       (eq_attr "type" "bfm,extend,shift_imm,shift_reg,rbit,rev"))
   "thunderx_pipe0 | thunderx_pipe1")
 
 
@@ -66,12 +66,18 @@
        (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
   "thunderx_pipe1 + thunderx_mult")
 
-;; Multiply high instructions take an extra cycle and cause the muliply unit to
-;; be busy for an extra cycle.
+;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1
 
-;(define_insn_reservation "thunderx_mul_high" 5
+(define_insn_reservation "thunderx_crc32" 4
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "crc"))
+  "thunderx_pipe1 + thunderx_mult")
+
+;; crcx is 5 cycles and only happen on pipe 1
+;(define_insn_reservation "thunderx_crc64" 5
 ;  (and (eq_attr "tune" "thunderx")
-;       (eq_attr "type" "smull,umull"))
+;       (eq_attr "type" "crc")
+;       (eq_attr "mode" "DI"))
 ;  "thunderx_pipe1 + thunderx_mult")
 
 (define_insn_reservation "thunderx_div32" 22
@@ -97,6 +103,11 @@
        (eq_attr "type" "store2"))
   "thunderx_pipe0 + thunderx_pipe1")
 
+;; Prefetch are single issued
+;(define_insn_reservation "thunderx_prefetch" 1
+;  (and (eq_attr "tune" "thunderx")
+;       (eq_attr "type" "prefetch"))
+;  "thunderx_pipe0 + thunderx_pipe1")
 
 ;; loads (and load pairs) from L1 take 3 cycles in pipe 0
 (define_insn_reservation "thunderx_load" 3
@@ -121,10 +132,21 @@
        (eq_attr "type" "fconsts,fconstd"))
   "thunderx_pipe1")
 
-;; Moves between fp are 2 cycles including min/max/select/abs/neg
+;; Moves between fp are 2 cycles including min/max
 (define_insn_reservation "thunderx_fmov" 2
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
+       (eq_attr "type" "fmov,f_minmaxs,f_minmaxd"))
+  "thunderx_pipe1")
+
+;; ABS, and NEG are 1 cycle
+(define_insn_reservation "thunderx_fabs" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "ffariths,ffarithd"))
+  "thunderx_pipe1")
+
+(define_insn_reservation "thunderx_fcsel" 3
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fcsel"))
   "thunderx_pipe1")
 
 (define_insn_reservation "thunderx_fmovgpr" 2
@@ -132,6 +154,11 @@
        (eq_attr "type" "f_mrc, f_mcr"))
   "thunderx_pipe1")
 
+(define_insn_reservation "thunderx_fcmp" 3
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fcmps,fcmpd"))
+  "thunderx_pipe1")
+
 (define_insn_reservation "thunderx_fmul" 6
   (and (eq_attr "tune" "thunderx")
        (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
@@ -152,21 +179,21 @@
        (eq_attr "type" "fsqrts"))
   "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
 
-(define_insn_reservation "thunderx_fsqrtd" 28
+(define_insn_reservation "thunderx_fsqrtd" 31
   (and (eq_attr "tune" "thunderx")
        (eq_attr "type" "fsqrtd"))
-  "thunderx_pipe1 + thunderx_divide, thunderx_divide*31")
+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*27")
 
 ;; The rounding conversion inside fp is 4 cycles
 (define_insn_reservation "thunderx_frint" 4
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "f_rints,f_rintd"))
+       (eq_attr "type" "f_cvt,f_rints,f_rintd"))
   "thunderx_pipe1")
 
 ;; Float to integer with a move from int to/from float is 6 cycles
 (define_insn_reservation "thunderx_f_cvt" 6
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+       (eq_attr "type" "f_cvtf2i,f_cvti2f"))
   "thunderx_pipe1")
 
 ;; FP/SIMD load/stores happen in pipe 0
@@ -184,9 +211,12 @@
   "thunderx_pipe0+thunderx_pipe1")
 
 ;; FP/SIMD Stores takes one cycle in pipe 0
+;; ST1 with one registers either multiple structures or single structure is
+;;    also one cycle.
 (define_insn_reservation "thunderx_simd_fp_store" 1
   (and (eq_attr "tune" "thunderx")
-       (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
+       (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \
+			neon_store1_one_lane, neon_store1_one_lane_q"))
   "thunderx_pipe0")
 
 ;; 64bit neon store pairs are single issue for one cycle
@@ -201,24 +231,38 @@
        (eq_attr "type" "neon_store1_2reg_q"))
   "(thunderx_pipe0 + thunderx_pipe1)*2")
 
+;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0
+(define_insn_reservation "thunderx_neon_ld1" 6
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_load1_all_lanes"))
+  "thunderx_pipe0")
 
 ;; SIMD/NEON (q forms take an extra cycle)
+;; SIMD For ThunderX is 64bit wide,
 
-;; Thunder simd move instruction types - 2/3 cycles
+;; ThunderX simd move instruction types - 2/3 cycles
+;; ThunderX dup, ins is the same
+;; ThunderX SIMD fabs/fneg instruction types
 (define_insn_reservation "thunderx_neon_move" 2
   (and (eq_attr "tune" "thunderx")
        (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
-			neon_fp_compare_d, neon_move"))
+			neon_fp_compare_d, neon_move, neon_dup, \
+			neon_ins, neon_from_gp, neon_to_gp, \
+			neon_abs, neon_neg, \
+			neon_fp_neg_s, neon_fp_abs_s"))
   "thunderx_pipe1 + thunderx_simd")
 
 (define_insn_reservation "thunderx_neon_move_q" 3
   (and (eq_attr "tune" "thunderx")
        (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
-			neon_fp_compare_d_q, neon_move_q"))
+			neon_fp_compare_d_q, neon_move_q, neon_dup_q, \
+			neon_ins_q, neon_from_gp_q, neon_to_gp_q, \
+			neon_abs_q, neon_neg_q, \
+			neon_fp_neg_s_q, neon_fp_neg_d_q, \
+			neon_fp_abs_s_q, neon_fp_abs_d_q"))
   "thunderx_pipe1 + thunderx_simd, thunderx_simd")
 
-
-;; Thunder simd simple/add instruction types - 4/5 cycles
+;; ThunderX simd simple/add instruction types - 4/5 cycles
 
 (define_insn_reservation "thunderx_neon_add" 4
   (and (eq_attr "tune" "thunderx")
@@ -227,7 +271,9 @@
 			neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
 			neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
 			neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
-			neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
+			neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \
+			neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \
+			neon_fp_reduc_minmax_s"))
   "thunderx_pipe1 + thunderx_simd")
 
 ;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
@@ -240,13 +286,74 @@
 			neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
 			neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
 			neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
-			neon_add_long, neon_sub_long"))
+			neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \
+			neon_arith_acc_q, neon_rev_q, \
+			neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q"))
   "thunderx_pipe1 + thunderx_simd, thunderx_simd")
 
+;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions
+;; are 6/7 cycles
+(define_insn_reservation "thunderx_neon_mult" 6
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \
+			neon_mla_b, neon_mla_h, neon_mla_s, \
+			neon_mla_h_scalar, neon_mla_s_scalar, \
+			neon_ext, neon_shift_imm, neon_permute, \
+			neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \
+			neon_sat_shift_reg, neon_shift_acc, \
+			neon_mul_b, neon_mul_h, neon_mul_s, \
+			neon_mul_h_scalar, neon_mul_s_scalar, \
+			neon_fp_mul_s_scalar, \
+			neon_fp_mla_s_scalar"))
+  "thunderx_pipe1 + thunderx_simd")
+
+(define_insn_reservation "thunderx_neon_mult_q" 7
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \
+			neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \
+			neon_mla_h_scalar_q, neon_mla_s_scalar_q, \
+			neon_ext_q, neon_shift_imm_q, neon_permute_q, \
+			neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \
+			neon_sat_shift_reg_q, neon_shift_acc_q, \
+			neon_shift_imm_long, \
+			neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \
+			neon_mul_h_scalar_q, neon_mul_s_scalar_q, \
+			neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \
+			neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \
+			neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \
+			neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q"))
+  "thunderx_pipe1 + thunderx_simd, thunderx_simd")
+
+
+;; AES[ED] is 5 cycles
+(define_insn_reservation "thunderx_crypto_aese" 5
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "crypto_aese"))
+  "thunderx_pipe1 + thunderx_simd, thunderx_simd")
 
-;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
-(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
-(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")
+;; AES{,I}MC is 3 cycles
+(define_insn_reservation "thunderx_crypto_aesmc" 3
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "crypto_aesmc"))
+  "thunderx_pipe1 + thunderx_simd, thunderx_simd")
+
+
+;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle
+(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
+(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
+(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
+
+;; 64bit TBL is emulated and takes 160 cycles
+(define_insn_reservation "thunderx_tbl" 160
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_tbl1"))
+  "(thunderx_pipe1+thunderx_pipe0)*160")
+
+;; 128bit TBL is emulated and takes 320 cycles
+(define_insn_reservation "thunderx_tblq" 320
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_tbl1_q"))
+  "(thunderx_pipe1+thunderx_pipe0)*320")
 
 ;; Assume both pipes are needed for unknown and multiple-instruction
 ;; patterns.
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index ca07cc7052b..32bb36eec33 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -3110,7 +3110,7 @@ alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
     }
 
   tmp = gen_rtx_MEM (QImode, func);
-  tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
+  tmp = emit_call_insn (gen_call_value (reg, tmp, const0_rtx,
 					const0_rtx, const0_rtx));
   CALL_INSN_FUNCTION_USAGE (tmp) = usage;
   RTL_CONST_CALL_P (tmp) = 1;
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 7626d3f0233..5068f60ad6c 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -3646,7 +3646,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def
index 3dafaa5fbc8..ddf6c3c330f 100644
--- a/gcc/config/arm/arm-arches.def
+++ b/gcc/config/arm/arm-arches.def
@@ -23,39 +23,40 @@
 
    The NAME is the name of the architecture, represented as a string
    constant.  The CORE is the identifier for a core representative of
-   this architecture.  ARCH is the architecture revision.  FLAGS are
-   the flags implied by the architecture.
+   this architecture.  ARCH is the architecture revision.  FLAGS is
+   the set of feature flags implied by the architecture.
 
    genopt.sh assumes no whitespace up to the first "," in each entry.  */
 
-ARM_ARCH("armv2",   arm2,       2,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)
-ARM_ARCH("armv2a",  arm2,       2,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2)
-ARM_ARCH("armv3",   arm6,       3,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3)
-ARM_ARCH("armv3m",  arm7m,      3M,  FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M)
-ARM_ARCH("armv4",   arm7tdmi,   4,   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4)
+ARM_ARCH("armv2",   arm2,       2,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2))
+ARM_ARCH("armv2a",  arm2,       2,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2))
+ARM_ARCH("armv3",   arm6,       3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3))
+ARM_ARCH("armv3m",  arm7m,      3M,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M))
+ARM_ARCH("armv4",   arm7tdmi,   4,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4))
 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
    implementations that support it, so we will leave it out for now.  */
-ARM_ARCH("armv4t",  arm7tdmi,   4T,  FL_CO_PROC |             FL_FOR_ARCH4T)
-ARM_ARCH("armv5",   arm10tdmi,  5,   FL_CO_PROC |             FL_FOR_ARCH5)
-ARM_ARCH("armv5t",  arm10tdmi,  5T,  FL_CO_PROC |             FL_FOR_ARCH5T)
-ARM_ARCH("armv5e",  arm1026ejs, 5E,  FL_CO_PROC |             FL_FOR_ARCH5E)
-ARM_ARCH("armv5te", arm1026ejs, 5TE, FL_CO_PROC |             FL_FOR_ARCH5TE)
-ARM_ARCH("armv6",   arm1136js,  6,   FL_CO_PROC |             FL_FOR_ARCH6)
-ARM_ARCH("armv6j",  arm1136js,  6J,  FL_CO_PROC |             FL_FOR_ARCH6J)
-ARM_ARCH("armv6k",  mpcore,	6K,  FL_CO_PROC |             FL_FOR_ARCH6K)
-ARM_ARCH("armv6z",  arm1176jzs, 6Z,  FL_CO_PROC |             FL_FOR_ARCH6Z)
-ARM_ARCH("armv6kz", arm1176jzs, 6KZ, FL_CO_PROC |             FL_FOR_ARCH6KZ)
-ARM_ARCH("armv6zk", arm1176jzs, 6KZ, FL_CO_PROC |             FL_FOR_ARCH6KZ)
-ARM_ARCH("armv6t2", arm1156t2s, 6T2, FL_CO_PROC |             FL_FOR_ARCH6T2)
-ARM_ARCH("armv6-m", cortexm1,	6M,			      FL_FOR_ARCH6M)
-ARM_ARCH("armv6s-m", cortexm1,	6M,			      FL_FOR_ARCH6M)
-ARM_ARCH("armv7",   cortexa8,	7,   FL_CO_PROC |	      FL_FOR_ARCH7)
-ARM_ARCH("armv7-a", cortexa8,	7A,  FL_CO_PROC |	      FL_FOR_ARCH7A)
-ARM_ARCH("armv7ve", cortexa8,	7A,  FL_CO_PROC |	      FL_FOR_ARCH7VE)
-ARM_ARCH("armv7-r", cortexr4,	7R,  FL_CO_PROC |	      FL_FOR_ARCH7R)
-ARM_ARCH("armv7-m", cortexm3,	7M,  FL_CO_PROC |	      FL_FOR_ARCH7M)
-ARM_ARCH("armv7e-m", cortexm4,  7EM, FL_CO_PROC |	      FL_FOR_ARCH7EM)
-ARM_ARCH("armv8-a", cortexa53,  8A,  FL_CO_PROC |             FL_FOR_ARCH8A)
-ARM_ARCH("armv8-a+crc",cortexa53, 8A,FL_CO_PROC | FL_CRC32  | FL_FOR_ARCH8A)
-ARM_ARCH("iwmmxt",  iwmmxt,     5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)
-ARM_ARCH("iwmmxt2", iwmmxt2,    5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
+ARM_ARCH("armv4t",  arm7tdmi,   4T,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH4T))
+ARM_ARCH("armv5",   arm10tdmi,  5,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH5))
+ARM_ARCH("armv5t",  arm10tdmi,  5T,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH5T))
+ARM_ARCH("armv5e",  arm1026ejs, 5E,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH5E))
+ARM_ARCH("armv5te", arm1026ejs, 5TE,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH5TE))
+ARM_ARCH("armv6",   arm1136js,  6,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH6))
+ARM_ARCH("armv6j",  arm1136js,  6J,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH6J))
+ARM_ARCH("armv6k",  mpcore,	6K,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH6K))
+ARM_ARCH("armv6z",  arm1176jzs, 6Z,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH6Z))
+ARM_ARCH("armv6kz", arm1176jzs, 6KZ,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH6KZ))
+ARM_ARCH("armv6zk", arm1176jzs, 6KZ,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH6KZ))
+ARM_ARCH("armv6t2", arm1156t2s, 6T2,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH6T2))
+ARM_ARCH("armv6-m", cortexm1,	6M,	ARM_FSET_MAKE_CPU1 (FL_FOR_ARCH6M))
+ARM_ARCH("armv6s-m", cortexm1,	6M,	ARM_FSET_MAKE_CPU1 (FL_FOR_ARCH6M))
+ARM_ARCH("armv7",   cortexa8,	7,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |	      FL_FOR_ARCH7))
+ARM_ARCH("armv7-a", cortexa8,	7A,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |	      FL_FOR_ARCH7A))
+ARM_ARCH("armv7ve", cortexa8,	7A,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |	      FL_FOR_ARCH7VE))
+ARM_ARCH("armv7-r", cortexr4,	7R,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |	      FL_FOR_ARCH7R))
+ARM_ARCH("armv7-m", cortexm3,	7M,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |	      FL_FOR_ARCH7M))
+ARM_ARCH("armv7e-m", cortexm4,  7EM,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |	      FL_FOR_ARCH7EM))
+ARM_ARCH("armv8-a", cortexa53,  8A,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC |             FL_FOR_ARCH8A))
+ARM_ARCH("armv8-a+crc",cortexa53, 8A,   ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_CRC32  | FL_FOR_ARCH8A))
+ARM_ARCH("iwmmxt",  iwmmxt,     5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT))
+ARM_ARCH("iwmmxt2", iwmmxt2,    5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2))
+
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 030d8d19441..4391f17c655 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -769,13 +769,6 @@ arm_init_simd_builtin_types (void)
   int nelts = sizeof (arm_simd_types) / sizeof (arm_simd_types[0]);
   tree tdecl;
 
-  /* Initialize the HFmode scalar type.  */
-  arm_simd_floatHF_type_node = make_node (REAL_TYPE);
-  TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
-  layout_type (arm_simd_floatHF_type_node);
-  (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node,
-					     "__builtin_neon_hf");
-
   /* Poly types are a world of their own.  In order to maintain legacy
      ABI, they get initialized using the old interface, and don't get
      an entry in our mangling table, consequently, they get default
@@ -823,6 +816,8 @@ arm_init_simd_builtin_types (void)
      mangling.  */
 
   /* Continue with standard types.  */
+  /* The __builtin_simd{64,128}_float16 types are kept private unless
+     we have a scalar __fp16 type.  */
   arm_simd_types[Float16x4_t].eltype = arm_simd_floatHF_type_node;
   arm_simd_types[Float32x2_t].eltype = float_type_node;
   arm_simd_types[Float32x4_t].eltype = float_type_node;
@@ -1106,10 +1101,11 @@ arm_init_neon_builtins (void)
 #undef NUM_DREG_TYPES
 #undef NUM_QREG_TYPES
 
-#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
+#define def_mbuiltin(FLAGS, NAME, TYPE, CODE)				\
   do									\
     {									\
-      if ((MASK) & insn_flags)						\
+      const arm_feature_set flags = FLAGS;				\
+      if (ARM_FSET_CPU_SUBSET (flags, insn_flags))			\
 	{								\
 	  tree bdecl;							\
 	  bdecl = add_builtin_function ((NAME), (TYPE), (CODE),		\
@@ -1121,7 +1117,7 @@ arm_init_neon_builtins (void)
 
 struct builtin_description
 {
-  const unsigned int       mask;
+  const arm_feature_set    features;
   const enum insn_code     icode;
   const char * const       name;
   const enum arm_builtins  code;
@@ -1132,11 +1128,13 @@ struct builtin_description
 static const struct builtin_description bdesc_2arg[] =
 {
 #define IWMMXT_BUILTIN(code, string, builtin) \
-  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
+  { ARM_FSET_MAKE_CPU1 (FL_IWMMXT), CODE_FOR_##code, \
+    "__builtin_arm_" string,			     \
     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
 
 #define IWMMXT2_BUILTIN(code, string, builtin) \
-  { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
+  { ARM_FSET_MAKE_CPU1 (FL_IWMMXT2), CODE_FOR_##code, \
+    "__builtin_arm_" string,			      \
     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
 
   IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
@@ -1219,10 +1217,12 @@ static const struct builtin_description bdesc_2arg[] =
   IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
 
 #define IWMMXT_BUILTIN2(code, builtin) \
-  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+  { ARM_FSET_MAKE_CPU1 (FL_IWMMXT), CODE_FOR_##code, NULL, \
+    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
 
 #define IWMMXT2_BUILTIN2(code, builtin) \
-  { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+  { ARM_FSET_MAKE_CPU2 (FL_IWMMXT2), CODE_FOR_##code, NULL, \
+    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
 
   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
@@ -1237,7 +1237,7 @@ static const struct builtin_description bdesc_2arg[] =
 
 
 #define FP_BUILTIN(L, U) \
-  {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
+  {ARM_FSET_EMPTY, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
    UNKNOWN, 0},
 
   FP_BUILTIN (get_fpscr, GET_FPSCR)
@@ -1245,8 +1245,8 @@ static const struct builtin_description bdesc_2arg[] =
 #undef FP_BUILTIN
 
 #define CRC32_BUILTIN(L, U) \
-  {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
-   UNKNOWN, 0},
+  {ARM_FSET_EMPTY, CODE_FOR_##L, "__builtin_arm_"#L, \
+   ARM_BUILTIN_##U, UNKNOWN, 0},
    CRC32_BUILTIN (crc32b, CRC32B)
    CRC32_BUILTIN (crc32h, CRC32H)
    CRC32_BUILTIN (crc32w, CRC32W)
@@ -1256,9 +1256,9 @@ static const struct builtin_description bdesc_2arg[] =
 #undef CRC32_BUILTIN
 
 
-#define CRYPTO_BUILTIN(L, U) \
-  {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
-   UNKNOWN, 0},
+#define CRYPTO_BUILTIN(L, U)					   \
+  {ARM_FSET_EMPTY, CODE_FOR_crypto_##L,	"__builtin_arm_crypto_"#L, \
+   ARM_BUILTIN_CRYPTO_##U, UNKNOWN, 0},
 #undef CRYPTO1
 #undef CRYPTO2
 #undef CRYPTO3
@@ -1514,7 +1514,9 @@ arm_init_iwmmxt_builtins (void)
       machine_mode mode;
       tree type;
 
-      if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
+      if (d->name == 0 ||
+	  !(ARM_FSET_HAS_CPU1 (d->features, FL_IWMMXT) ||
+	    ARM_FSET_HAS_CPU1 (d->features, FL_IWMMXT2)))
 	continue;
 
       mode = insn_data[d->icode].operand[1].mode;
@@ -1538,17 +1540,17 @@ arm_init_iwmmxt_builtins (void)
 	  gcc_unreachable ();
 	}
 
-      def_mbuiltin (d->mask, d->name, type, d->code);
+      def_mbuiltin (d->features, d->name, type, d->code);
     }
 
   /* Add the remaining MMX insns with somewhat more complicated types.  */
 #define iwmmx_mbuiltin(NAME, TYPE, CODE)			\
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),	\
-		ARM_BUILTIN_ ## CODE)
+  def_mbuiltin (ARM_FSET_MAKE_CPU1 (FL_IWMMXT), "__builtin_arm_" NAME, \
+		(TYPE), ARM_BUILTIN_ ## CODE)
 
 #define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
-  def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE),     \
-               ARM_BUILTIN_ ## CODE)
+  def_mbuiltin (ARM_FSET_MAKE_CPU1 (FL_IWMMXT2), "__builtin_arm_" NAME, \
+		(TYPE),	ARM_BUILTIN_ ## CODE)
 
   iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
   iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
@@ -1702,10 +1704,12 @@ arm_init_iwmmxt_builtins (void)
 static void
 arm_init_fp16_builtins (void)
 {
-  tree fp16_type = make_node (REAL_TYPE);
-  TYPE_PRECISION (fp16_type) = 16;
-  layout_type (fp16_type);
-  (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+  arm_simd_floatHF_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (arm_simd_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
+  layout_type (arm_simd_floatHF_type_node);
+  if (arm_fp16_format)
+    (*lang_hooks.types.register_builtin_type) (arm_simd_floatHF_type_node,
+					       "__fp16");
 }
 
 static void
@@ -1750,12 +1754,13 @@ arm_init_builtins (void)
   if (TARGET_REALLY_IWMMXT)
     arm_init_iwmmxt_builtins ();
 
+  /* This creates the arm_simd_floatHF_type_node so must come before
+     arm_init_neon_builtins which uses it.  */
+  arm_init_fp16_builtins ();
+
   if (TARGET_NEON)
     arm_init_neon_builtins ();
 
-  if (arm_fp16_format)
-    arm_init_fp16_builtins ();
-
   if (TARGET_CRC32)
     arm_init_crc32_builtins ();
 
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 9d47fcfbcd7..4c35200b3f8 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -33,7 +33,7 @@
    The TUNE_IDENT is the name of the core for which scheduling decisions
    should be made, represented as an identifier.
    ARCH is the architecture revision implemented by the chip.
-   FLAGS are the bitwise-or of the traits that apply to that core.
+   FLAGS is the set of feature flags of that core.
    This need not include flags implied by the architecture.
    COSTS is the name of the rtx_costs routine to use.
 
@@ -43,134 +43,134 @@
    Some tools assume no whitespace up to the first "," in each entry.  */
 
 /* V2/V2A Architecture Processors */
-ARM_CORE("arm2", 	arm2, arm2,	2, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm250", 	arm250, arm250,	2, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm3",	arm3, arm3,	2, FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm2",	arm2, arm2,	2,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul)
+ARM_CORE("arm250",	arm250, arm250,	2,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul)
+ARM_CORE("arm3",	arm3, arm3,	2,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2), slowmul)
 
 /* V3 Architecture Processors */
-ARM_CORE("arm6",	arm6, arm6,		3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm60",	arm60, arm60,		3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm600",	arm600, arm600,		3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm610",	arm610, arm610,		3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm620",	arm620, arm620,		3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm7",	arm7, arm7,		3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm7d",	arm7d, arm7d,		3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm7di",	arm7di, arm7di,		3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm70",	arm70, arm70,		3, FL_CO_PROC | FL_MODE26, slowmul)
-ARM_CORE("arm700",	arm700, arm700,		3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm700i",	arm700i, arm700i,	3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm710",	arm710, arm710,		3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm720",	arm720, arm720,		3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm710c",	arm710c, arm710c,	3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm7100",	arm7100, arm7100,	3, FL_MODE26 | FL_WBUF, slowmul)
-ARM_CORE("arm7500",	arm7500, arm7500,	3, FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm6",	arm6, arm6,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm60",	arm60, arm60,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm600",	arm600, arm600,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm610",	arm610, arm610,		3,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm620",	arm620, arm620,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7",	arm7, arm7,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7d",	arm7d, arm7d,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7di",	arm7di, arm7di,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm70",	arm70, arm70,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm700",	arm700, arm700,		3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm700i",	arm700i, arm700i,	3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm710",	arm710, arm710,		3,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm720",	arm720, arm720,		3,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm710c",	arm710c, arm710c,	3,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7100",	arm7100, arm7100,	3,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
+ARM_CORE("arm7500",	arm7500, arm7500,	3,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
 /* Doesn't have an external co-proc, but does have embedded fpa. */
-ARM_CORE("arm7500fe", arm7500fe, arm7500fe,	3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7500fe", arm7500fe, arm7500fe,	3,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_WBUF | FL_FOR_ARCH3), slowmul)
 
 /* V3M Architecture Processors */
 /* arm7m doesn't exist on its own, but only with D, ("and", and I), but
    those don't alter the code, so arm7m is sometimes used.  */
-ARM_CORE("arm7m",   arm7m, arm7m,	3M, FL_CO_PROC | FL_MODE26, fastmul)
-ARM_CORE("arm7dm",  arm7dm, arm7dm,	3M, FL_CO_PROC | FL_MODE26, fastmul)
-ARM_CORE("arm7dmi", arm7dmi, arm7dmi,	3M, FL_CO_PROC | FL_MODE26, fastmul)
+ARM_CORE("arm7m",   arm7m, arm7m,	3M,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul)
+ARM_CORE("arm7dm",  arm7dm, arm7dm,	3M,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul)
+ARM_CORE("arm7dmi", arm7dmi, arm7dmi,	3M,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M), fastmul)
 
 /* V4 Architecture Processors */
-ARM_CORE("arm8",          arm8, arm8,			4, FL_MODE26 | FL_LDSCHED, fastmul)
-ARM_CORE("arm810",        arm810, arm810,		4, FL_MODE26 | FL_LDSCHED, fastmul)
-ARM_CORE("strongarm",     strongarm, strongarm,		4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("strongarm110",  strongarm110, strongarm110,	4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
-ARM_CORE("fa526",         fa526, fa526,			4, FL_LDSCHED, fastmul)
-ARM_CORE("fa626",         fa626, fa626,			4, FL_LDSCHED, fastmul)
+ARM_CORE("arm8",          arm8, arm8,			4,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_FOR_ARCH4), fastmul)
+ARM_CORE("arm810",        arm810, arm810,		4,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_FOR_ARCH4), fastmul)
+ARM_CORE("strongarm",     strongarm, strongarm,		4,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("strongarm110",  strongarm110, strongarm110,	4,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4,	ARM_FSET_MAKE_CPU1 (FL_MODE26 | FL_LDSCHED | FL_STRONG | FL_FOR_ARCH4), strongarm)
+ARM_CORE("fa526",         fa526, fa526,			4,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4), fastmul)
+ARM_CORE("fa626",         fa626, fa626,			4,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4), fastmul)
 
 /* V4T Architecture Processors */
-ARM_CORE("arm7tdmi",	arm7tdmi, arm7tdmi,	4T, FL_CO_PROC, fastmul)
-ARM_CORE("arm7tdmi-s",	arm7tdmis, arm7tdmis,	4T, FL_CO_PROC, fastmul)
-ARM_CORE("arm710t",	arm710t, arm710t,	4T, FL_WBUF,    fastmul)
-ARM_CORE("arm720t",	arm720t, arm720t,	4T, FL_WBUF,    fastmul)
-ARM_CORE("arm740t",	arm740t, arm740t,	4T, FL_WBUF,    fastmul)
-ARM_CORE("arm9",	arm9, arm9,		4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm9tdmi",	arm9tdmi, arm9tdmi,	4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm920",	arm920, arm920,		4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm920t",	arm920t, arm920t,	4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm922t",	arm922t, arm922t,	4T, FL_LDSCHED, fastmul)
-ARM_CORE("arm940t",	arm940t, arm940t,	4T, FL_LDSCHED, fastmul)
-ARM_CORE("ep9312",	ep9312, ep9312,		4T, FL_LDSCHED, fastmul)
+ARM_CORE("arm7tdmi",	arm7tdmi, arm7tdmi,	4T,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm7tdmi-s",	arm7tdmis, arm7tdmis,	4T,	ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm710t",	arm710t, arm710t,	4T,	ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T),    fastmul)
+ARM_CORE("arm720t",	arm720t, arm720t,	4T,	ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T),    fastmul)
+ARM_CORE("arm740t",	arm740t, arm740t,	4T,	ARM_FSET_MAKE_CPU1 (FL_WBUF | FL_FOR_ARCH4T),    fastmul)
+ARM_CORE("arm9",	arm9, arm9,		4T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm9tdmi",	arm9tdmi, arm9tdmi,	4T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm920",	arm920, arm920,		4T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm920t",	arm920t, arm920t,	4T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm922t",	arm922t, arm922t,	4T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("arm940t",	arm940t, arm940t,	4T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
+ARM_CORE("ep9312",	ep9312, ep9312,		4T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH4T), fastmul)
 
 /* V5T Architecture Processors */
-ARM_CORE("arm10tdmi",	arm10tdmi, arm10tdmi,	5T, FL_LDSCHED, fastmul)
-ARM_CORE("arm1020t",	arm1020t, arm1020t,	5T, FL_LDSCHED, fastmul)
+ARM_CORE("arm10tdmi",	arm10tdmi, arm10tdmi,	5T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5T), fastmul)
+ARM_CORE("arm1020t",	arm1020t, arm1020t,	5T,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5T), fastmul)
 
 /* V5TE Architecture Processors */
-ARM_CORE("arm9e",	arm9e, arm9e,		5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm946e-s",	arm946es, arm946es,	5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm966e-s",	arm966es, arm966es,	5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm968e-s",	arm968es, arm968es,	5TE, FL_LDSCHED, 9e)
-ARM_CORE("arm10e",	arm10e, arm10e,		5TE, FL_LDSCHED, fastmul)
-ARM_CORE("arm1020e",	arm1020e, arm1020e,	5TE, FL_LDSCHED, fastmul)
-ARM_CORE("arm1022e",	arm1022e, arm1022e,	5TE, FL_LDSCHED, fastmul)
-ARM_CORE("xscale",	xscale, xscale,		5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
-ARM_CORE("iwmmxt",	iwmmxt, iwmmxt,		5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
-ARM_CORE("iwmmxt2",	iwmmxt2, iwmmxt2,	5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2, xscale)
-ARM_CORE("fa606te",	fa606te, fa606te,	5TE, FL_LDSCHED, 9e)
-ARM_CORE("fa626te",	fa626te, fa626te,	5TE, FL_LDSCHED, 9e)
-ARM_CORE("fmp626",	fmp626, fmp626,		5TE, FL_LDSCHED, 9e)
-ARM_CORE("fa726te",	fa726te, fa726te,	5TE, FL_LDSCHED, fa726te)
+ARM_CORE("arm9e",	arm9e, arm9e,		5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm946e-s",	arm946es, arm946es,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm966e-s",	arm966es, arm966es,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm968e-s",	arm968es, arm968es,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("arm10e",	arm10e, arm10e,		5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul)
+ARM_CORE("arm1020e",	arm1020e, arm1020e,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul)
+ARM_CORE("arm1022e",	arm1022e, arm1022e,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fastmul)
+ARM_CORE("xscale",	xscale, xscale,		5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_FOR_ARCH5TE), xscale)
+ARM_CORE("iwmmxt",	iwmmxt, iwmmxt,		5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_FOR_ARCH5TE), xscale)
+ARM_CORE("iwmmxt2",	iwmmxt2, iwmmxt2,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2 | FL_FOR_ARCH5TE), xscale)
+ARM_CORE("fa606te",	fa606te, fa606te,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("fa626te",	fa626te, fa626te,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("fmp626",	fmp626, fmp626,		5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), 9e)
+ARM_CORE("fa726te",	fa726te, fa726te,	5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TE), fa726te)
 
 /* V5TEJ Architecture Processors */
-ARM_CORE("arm926ej-s",	arm926ejs, arm926ejs,	5TEJ, FL_LDSCHED, 9e)
-ARM_CORE("arm1026ej-s",	arm1026ejs, arm1026ejs,	5TEJ, FL_LDSCHED, 9e)
+ARM_CORE("arm926ej-s",	arm926ejs, arm926ejs,	5TEJ,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TEJ), 9e)
+ARM_CORE("arm1026ej-s",	arm1026ejs, arm1026ejs,	5TEJ,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH5TEJ), 9e)
 
 /* V6 Architecture Processors */
-ARM_CORE("arm1136j-s",		arm1136js, arm1136js,		6J,  FL_LDSCHED, 9e)
-ARM_CORE("arm1136jf-s",		arm1136jfs, arm1136jfs,		6J,  FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("arm1176jz-s",		arm1176jzs, arm1176jzs,		6KZ, FL_LDSCHED, 9e)
-ARM_CORE("arm1176jzf-s",	arm1176jzfs, arm1176jzfs,	6KZ, FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("mpcorenovfp",		mpcorenovfp, mpcorenovfp,	6K,  FL_LDSCHED, 9e)
-ARM_CORE("mpcore",		mpcore, mpcore,			6K,  FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("arm1156t2-s",		arm1156t2s, arm1156t2s,		6T2, FL_LDSCHED, v6t2)
-ARM_CORE("arm1156t2f-s",	arm1156t2fs, arm1156t2fs,	6T2, FL_LDSCHED | FL_VFPV2, v6t2)
+ARM_CORE("arm1136j-s",		arm1136js, arm1136js,		6J,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6J), 9e)
+ARM_CORE("arm1136jf-s",		arm1136jfs, arm1136jfs,		6J,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6J), 9e)
+ARM_CORE("arm1176jz-s",		arm1176jzs, arm1176jzs,		6KZ,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6KZ), 9e)
+ARM_CORE("arm1176jzf-s",	arm1176jzfs, arm1176jzfs,	6KZ,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6KZ), 9e)
+ARM_CORE("mpcorenovfp",		mpcorenovfp, mpcorenovfp,	6K,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6K), 9e)
+ARM_CORE("mpcore",		mpcore, mpcore,			6K,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6K), 9e)
+ARM_CORE("arm1156t2-s",		arm1156t2s, arm1156t2s,		6T2,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6T2), v6t2)
+ARM_CORE("arm1156t2f-s",	arm1156t2fs, arm1156t2fs,	6T2,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_VFPV2 | FL_FOR_ARCH6T2), v6t2)
 
 /* V6M Architecture Processors */
-ARM_CORE("cortex-m1",		cortexm1, cortexm1,		6M, FL_LDSCHED, v6m)
-ARM_CORE("cortex-m0",		cortexm0, cortexm0,		6M, FL_LDSCHED, v6m)
-ARM_CORE("cortex-m0plus",	cortexm0plus, cortexm0plus,	6M, FL_LDSCHED, v6m)
+ARM_CORE("cortex-m1",		cortexm1, cortexm1,		6M,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0",		cortexm0, cortexm0,		6M,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0plus",	cortexm0plus, cortexm0plus,	6M,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH6M), v6m)
 
 /* V6M Architecture Processors for small-multiply implementations.  */
-ARM_CORE("cortex-m1.small-multiply",	cortexm1smallmultiply, cortexm1,	6M, FL_LDSCHED | FL_SMALLMUL, v6m)
-ARM_CORE("cortex-m0.small-multiply",	cortexm0smallmultiply, cortexm0,	6M, FL_LDSCHED | FL_SMALLMUL, v6m)
-ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply, cortexm0plus,6M, FL_LDSCHED | FL_SMALLMUL, v6m)
+ARM_CORE("cortex-m1.small-multiply",	cortexm1smallmultiply, cortexm1,	6M,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0.small-multiply",	cortexm0smallmultiply, cortexm0,	6M,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m)
+ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply, cortexm0plus,6M,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_SMALLMUL | FL_FOR_ARCH6M), v6m)
 
 /* V7 Architecture Processors */
-ARM_CORE("generic-armv7-a",	genericv7a, genericv7a,		7A,  FL_LDSCHED, cortex)
-ARM_CORE("cortex-a5",		cortexa5, cortexa5,		7A,  FL_LDSCHED, cortex_a5)
-ARM_CORE("cortex-a7",		cortexa7, cortexa7,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7)
-ARM_CORE("cortex-a8",		cortexa8, cortexa8,		7A,  FL_LDSCHED, cortex_a8)
-ARM_CORE("cortex-a9",		cortexa9, cortexa9,		7A,  FL_LDSCHED, cortex_a9)
-ARM_CORE("cortex-a12",	  	cortexa12, cortexa17,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
-ARM_CORE("cortex-a15",		cortexa15, cortexa15,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
-ARM_CORE("cortex-a17",		cortexa17, cortexa17,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
-ARM_CORE("cortex-r4",		cortexr4, cortexr4,		7R,  FL_LDSCHED, cortex)
-ARM_CORE("cortex-r4f",		cortexr4f, cortexr4f,		7R,  FL_LDSCHED, cortex)
-ARM_CORE("cortex-r5",		cortexr5, cortexr5,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
-ARM_CORE("cortex-r7",		cortexr7, cortexr7,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
-ARM_CORE("cortex-m7",		cortexm7, cortexm7,		7EM, FL_LDSCHED | FL_NO_VOLATILE_CE, cortex_m7)
-ARM_CORE("cortex-m4",		cortexm4, cortexm4,		7EM, FL_LDSCHED, v7m)
-ARM_CORE("cortex-m3",		cortexm3, cortexm3,		7M,  FL_LDSCHED, v7m)
-ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, marvell_pj4)
+ARM_CORE("generic-armv7-a",	genericv7a, genericv7a,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex)
+ARM_CORE("cortex-a5",		cortexa5, cortexa5,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a5)
+ARM_CORE("cortex-a7",		cortexa7, cortexa7,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a7)
+ARM_CORE("cortex-a8",		cortexa8, cortexa8,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a8)
+ARM_CORE("cortex-a9",		cortexa9, cortexa9,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), cortex_a9)
+ARM_CORE("cortex-a12",		cortexa12, cortexa17,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12)
+ARM_CORE("cortex-a15",		cortexa15, cortexa15,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a15)
+ARM_CORE("cortex-a17",		cortexa17, cortexa17,		7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12)
+ARM_CORE("cortex-r4",		cortexr4, cortexr4,		7R,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-r4f",		cortexr4f, cortexr4f,		7R,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-r5",		cortexr5, cortexr5,		7R,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARM_DIV | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-r7",		cortexr7, cortexr7,		7R,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARM_DIV | FL_FOR_ARCH7R), cortex)
+ARM_CORE("cortex-m7",		cortexm7, cortexm7,		7EM,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_NO_VOLATILE_CE | FL_FOR_ARCH7EM), cortex_m7)
+ARM_CORE("cortex-m4",		cortexm4, cortexm4,		7EM,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7EM), v7m)
+ARM_CORE("cortex-m3",		cortexm3, cortexm3,		7M,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7M), v7m)
+ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH7A), marvell_pj4)
 
 /* V7 big.LITTLE implementations */
-ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
-ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
+ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,	7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a15)
+ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7,	7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12)
 
 /* V8 Architecture Processors */
-ARM_CORE("cortex-a53",	cortexa53, cortexa53,	8A, FL_LDSCHED | FL_CRC32, cortex_a53)
-ARM_CORE("cortex-a57",	cortexa57, cortexa57,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("cortex-a72",	cortexa72, cortexa57,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("exynos-m1",	exynosm1,  cortexa57,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("xgene1",      xgene1,    xgene1,      8A, FL_LDSCHED,            xgene1)
+ARM_CORE("cortex-a53",	cortexa53, cortexa53,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a53)
+ARM_CORE("cortex-a57",	cortexa57, cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("cortex-a72",	cortexa72, cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("exynos-m1",	exynosm1,  cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("xgene1",      xgene1,    xgene1,      8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8A),            xgene1)
 
 /* V8 big.LITTLE implementations */
-ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,  FL_LDSCHED | FL_CRC32, cortex_a57)
-ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A,  FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index cef9eec277d..8df312f3c67 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -319,6 +319,7 @@ extern int vfp3_const_double_for_bits (rtx);
 
 extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
 					   rtx);
+extern bool arm_valid_symbolic_address_p (rtx);
 extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
 #endif /* RTX_CODE */
 
@@ -346,6 +347,8 @@ extern bool arm_is_constant_pool_ref (rtx);
 /* Flags used to identify the presence of processor capabilities.  */
 
 /* Bit values used to identify processor capabilities.  */
+#define FL_NONE	      (0)	      /* No flags.  */
+#define FL_ANY	      (0xffffffff)    /* All flags.  */
 #define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
 #define FL_ARCH3M     (1 << 1)        /* Extended multiply */
 #define FL_MODE26     (1 << 2)        /* 26-bit mode support */
@@ -413,13 +416,116 @@ extern bool arm_is_constant_pool_ref (rtx);
 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
 #define FL_FOR_ARCH8A	(FL_FOR_ARCH7VE | FL_ARCH8)
 
+/* There are too many feature bits to fit in a single word so the set of cpu and
+   fpu capabilities is a structure.  A feature set is created and manipulated
+   with the ARM_FSET macros.  */
+
+typedef struct
+{
+  unsigned long cpu[2];
+} arm_feature_set;
+
+
+/* Initialize a feature set.  */
+
+#define ARM_FSET_MAKE(CPU1,CPU2) { { (CPU1), (CPU2) } }
+
+#define ARM_FSET_MAKE_CPU1(CPU1) ARM_FSET_MAKE ((CPU1), (FL_NONE))
+#define ARM_FSET_MAKE_CPU2(CPU2) ARM_FSET_MAKE ((FL_NONE), (CPU2))
+
+/* Accessors.  */
+
+#define ARM_FSET_CPU1(S) ((S).cpu[0])
+#define ARM_FSET_CPU2(S) ((S).cpu[1])
+
+/* Useful combinations.  */
+
+#define ARM_FSET_EMPTY ARM_FSET_MAKE (FL_NONE, FL_NONE)
+#define ARM_FSET_ANY ARM_FSET_MAKE (FL_ANY, FL_ANY)
+
+/* Tests for a specific CPU feature.  */
+
+#define ARM_FSET_HAS_CPU1(A, F)  \
+  (((A).cpu[0] & ((unsigned long)(F))) == ((unsigned long)(F)))
+#define ARM_FSET_HAS_CPU2(A, F)  \
+  (((A).cpu[1] & ((unsigned long)(F))) == ((unsigned long)(F)))
+#define ARM_FSET_HAS_CPU(A, F1, F2)				\
+  (ARM_FSET_HAS_CPU1 ((A), (F1)) && ARM_FSET_HAS_CPU2 ((A), (F2)))
+
+/* Add a feature to a feature set.  */
+
+#define ARM_FSET_ADD_CPU1(DST, F)		\
+  do {						\
+    (DST).cpu[0] |= (F);			\
+  } while (0)
+
+#define ARM_FSET_ADD_CPU2(DST, F)		\
+  do {						\
+    (DST).cpu[1] |= (F);			\
+  } while (0)
+
+/* Remove a feature from a feature set.  */
+
+#define ARM_FSET_DEL_CPU1(DST, F)		\
+  do {						\
+    (DST).cpu[0] &= ~(F);			\
+  } while (0)
+
+#define ARM_FSET_DEL_CPU2(DST, F)		\
+  do {						\
+    (DST).cpu[1] &= ~(F);			\
+  } while (0)
+
+/* Union of feature sets.  */
+
+#define ARM_FSET_UNION(DST,F1,F2)		\
+  do {						\
+    (DST).cpu[0] = (F1).cpu[0] | (F2).cpu[0];	\
+    (DST).cpu[1] = (F1).cpu[1] | (F2).cpu[1];	\
+  } while (0)
+
+/* Intersection of feature sets.  */
+
+#define ARM_FSET_INTER(DST,F1,F2)		\
+  do {						\
+    (DST).cpu[0] = (F1).cpu[0] & (F2).cpu[0];	\
+    (DST).cpu[1] = (F1).cpu[1] & (F2).cpu[1];	\
+  } while (0)
+
+/* Exclusive disjunction.  */
+
+#define ARM_FSET_XOR(DST,F1,F2)				\
+  do {							\
+    (DST).cpu[0] = (F1).cpu[0] ^ (F2).cpu[0];		\
+    (DST).cpu[1] = (F1).cpu[1] ^ (F2).cpu[1];		\
+  } while (0)
+
+/* Difference of feature sets: F1 excluding the elements of F2.  */
+
+#define ARM_FSET_EXCLUDE(DST,F1,F2)		\
+  do {						\
+    (DST).cpu[0] = (F1).cpu[0] & ~(F2).cpu[0];	\
+    (DST).cpu[1] = (F1).cpu[1] & ~(F2).cpu[1];	\
+  } while (0)
+
+/* Test for an empty feature set.  */
+
+#define ARM_FSET_IS_EMPTY(A)		\
+  (!((A).cpu[0]) && !((A).cpu[1]))
+
+/* Tests whether the cpu features of A are a subset of B.  */
+
+#define ARM_FSET_CPU_SUBSET(A,B)					\
+  ((((A).cpu[0] & (B).cpu[0]) == (A).cpu[0])				\
+   && (((A).cpu[1] & (B).cpu[1]) == (A).cpu[1]))
+
 /* The bits in this mask specify which
    instructions we are allowed to generate.  */
-extern unsigned long insn_flags;
+extern arm_feature_set insn_flags;
 
 /* The bits in this mask specify which instruction scheduling options should
    be used.  */
-extern unsigned long tune_flags;
+extern arm_feature_set tune_flags;
 
 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 extern int arm_arch3m;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 57702cbc8de..fa4e083adfe 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -97,6 +97,7 @@ static void arm_add_gc_roots (void);
 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 			     HOST_WIDE_INT, rtx, rtx, int, int);
 static unsigned bit_count (unsigned long);
+static unsigned feature_count (const arm_feature_set*);
 static int arm_address_register_rtx_p (rtx, int);
 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 static bool is_called_in_ARM_mode (tree);
@@ -767,11 +768,11 @@ static int thumb_call_reg_needed;
 
 /* The bits in this mask specify which
    instructions we are allowed to generate.  */
-unsigned long insn_flags = 0;
+arm_feature_set insn_flags = ARM_FSET_EMPTY;
 
 /* The bits in this mask specify which instruction scheduling options should
    be used.  */
-unsigned long tune_flags = 0;
+arm_feature_set tune_flags = ARM_FSET_EMPTY;
 
 /* The highest ARM architecture version supported by the
    target.  */
@@ -927,7 +928,7 @@ struct processors
   enum processor_type core;
   const char *arch;
   enum base_architecture base_arch;
-  const unsigned long flags;
+  const arm_feature_set flags;
   const struct tune_params *const tune;
 };
 
@@ -2196,10 +2197,10 @@ static const struct processors all_cores[] =
   /* ARM Cores */
 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,	  \
-    FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
+   FLAGS, &arm_##COSTS##_tune},
 #include "arm-cores.def"
 #undef ARM_CORE
-  {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
+  {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
 };
 
 static const struct processors all_architectures[] =
@@ -2212,7 +2213,7 @@ static const struct processors all_architectures[] =
   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
 #include "arm-arches.def"
 #undef ARM_ARCH
-  {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
+  {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
 };
 
 
@@ -2278,6 +2279,14 @@ bit_count (unsigned long value)
   return count;
 }
 
+/* Return the number of features in feature-set SET.  */
+static unsigned
+feature_count (const arm_feature_set * set)
+{
+  return (bit_count (ARM_FSET_CPU1 (*set))
+	  + bit_count (ARM_FSET_CPU2 (*set)));
+}
+
 typedef struct
 {
   machine_mode mode;
@@ -2703,7 +2712,7 @@ arm_option_check_internal (struct gcc_options *opts)
 
   /* Make sure that the processor choice does not conflict with any of the
      other command line choices.  */
-  if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM))
+  if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
     error ("target CPU does not support ARM mode");
 
   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
@@ -2803,7 +2812,8 @@ static void
 arm_option_override_internal (struct gcc_options *opts,
 			      struct gcc_options *opts_set)
 {
-  if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB))
+  if (TARGET_THUMB_P (opts->x_target_flags)
+      && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
     {
       warning (0, "target CPU does not support THUMB instructions");
       opts->x_target_flags &= ~MASK_THUMB;
@@ -2890,8 +2900,13 @@ arm_option_override (void)
     {
       if (arm_selected_cpu)
 	{
+	  const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
+	  arm_feature_set selected_flags;
+	  ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
+			arm_selected_arch->flags);
+	  ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
 	  /* Check for conflict between mcpu and march.  */
-	  if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
+	  if (!ARM_FSET_IS_EMPTY (selected_flags))
 	    {
 	      warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
 		       arm_selected_cpu->name, arm_selected_arch->name);
@@ -2915,7 +2930,7 @@ arm_option_override (void)
   if (!arm_selected_cpu)
     {
       const struct processors * sel;
-      unsigned int        sought;
+      arm_feature_set sought = ARM_FSET_EMPTY;;
 
       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
       if (!arm_selected_cpu->name)
@@ -2935,26 +2950,27 @@ arm_option_override (void)
 
       /* Now check to see if the user has specified some command line
 	 switch that require certain abilities from the cpu.  */
-      sought = 0;
 
       if (TARGET_INTERWORK || TARGET_THUMB)
 	{
-	  sought |= (FL_THUMB | FL_MODE32);
+	  ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
+	  ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
 
 	  /* There are no ARM processors that support both APCS-26 and
 	     interworking.  Therefore we force FL_MODE26 to be removed
 	     from insn_flags here (if it was set), so that the search
 	     below will always be able to find a compatible processor.  */
-	  insn_flags &= ~FL_MODE26;
+	  ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
 	}
 
-      if (sought != 0 && ((sought & insn_flags) != sought))
+      if (!ARM_FSET_IS_EMPTY (sought)
+	  && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
 	{
 	  /* Try to locate a CPU type that supports all of the abilities
 	     of the default CPU, plus the extra abilities requested by
 	     the user.  */
 	  for (sel = all_cores; sel->name != NULL; sel++)
-	    if ((sel->flags & sought) == (sought | insn_flags))
+	    if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
 	      break;
 
 	  if (sel->name == NULL)
@@ -2974,19 +2990,23 @@ arm_option_override (void)
 		 command line options we scan the array again looking
 		 for a best match.  */
 	      for (sel = all_cores; sel->name != NULL; sel++)
-		if ((sel->flags & sought) == sought)
-		  {
-		    unsigned count;
-
-		    count = bit_count (sel->flags & insn_flags);
-
-		    if (count >= current_bit_count)
-		      {
-			best_fit = sel;
-			current_bit_count = count;
-		      }
-		  }
+		{
+		  arm_feature_set required = ARM_FSET_EMPTY;
+		  ARM_FSET_UNION (required, sought, insn_flags);
+		  if (ARM_FSET_CPU_SUBSET (required, sel->flags))
+		    {
+		      unsigned count;
+		      arm_feature_set flags;
+		      ARM_FSET_INTER (flags, sel->flags, insn_flags);
+		      count = feature_count (&flags);
 
+		      if (count >= current_bit_count)
+			{
+			  best_fit = sel;
+			  current_bit_count = count;
+			}
+		    }
+		}
 	      gcc_assert (best_fit);
 	      sel = best_fit;
 	    }
@@ -3014,7 +3034,8 @@ arm_option_override (void)
 
   /* BPABI targets use linker tricks to allow interworking on cores
      without thumb support.  */
-  if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
+  if (TARGET_INTERWORK
+      && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
     {
       warning (0, "target CPU does not support interworking" );
       target_flags &= ~MASK_INTERWORK;
@@ -3039,34 +3060,34 @@ arm_option_override (void)
     warning (0, "passing floating point arguments in fp regs not yet supported");
 
   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
-  arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
-  arm_arch4 = (insn_flags & FL_ARCH4) != 0;
-  arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
-  arm_arch5 = (insn_flags & FL_ARCH5) != 0;
-  arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
-  arm_arch6 = (insn_flags & FL_ARCH6) != 0;
-  arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
-  arm_arch6kz = arm_arch6k && (insn_flags & FL_ARCH6KZ);
-  arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+  arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
+  arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
+  arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
+  arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
+  arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
+  arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
+  arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
+  arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
+  arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
   arm_arch6m = arm_arch6 && !arm_arch_notm;
-  arm_arch7 = (insn_flags & FL_ARCH7) != 0;
-  arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
-  arm_arch8 = (insn_flags & FL_ARCH8) != 0;
-  arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
-  arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
-
-  arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
-  arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
-  arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
-  arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
-  arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
-  arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
-  arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
-  arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
-  arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
+  arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
+  arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
+  arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
+  arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
+  arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
+
+  arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
+  arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
+  arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
+  arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
+  arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
+  arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
+  arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
+  arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
+  arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
-  arm_arch_crc = (insn_flags & FL_CRC32) != 0;
-  arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
+  arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
+  arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
 
   /* V5 code we generate is completely interworking capable, so we turn off
      TARGET_INTERWORK here to avoid many tests later on.  */
@@ -3158,7 +3179,7 @@ arm_option_override (void)
 
   /* For arm2/3 there is no need to do any scheduling if we are doing
      software floating-point.  */
-  if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
+  if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
 
   /* Use the cp15 method if it is available.  */
@@ -12607,22 +12628,12 @@ neon_vdup_constant (rtx vals)
 {
   machine_mode mode = GET_MODE (vals);
   machine_mode inner_mode = GET_MODE_INNER (mode);
-  int n_elts = GET_MODE_NUNITS (mode);
-  bool all_same = true;
   rtx x;
-  int i;
 
   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
     return NULL_RTX;
 
-  for (i = 0; i < n_elts; ++i)
-    {
-      x = XVECEXP (vals, 0, i);
-      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
-	all_same = false;
-    }
-
-  if (!all_same)
+  if (!const_vec_duplicate_p (vals, &x))
     /* The elements are not all the same.  We could handle repeating
        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
        {0, C, 0, C, 0, C, 0, C} which can be loaded using
@@ -12633,7 +12644,7 @@ neon_vdup_constant (rtx vals)
      single ARM register.  This will be cheaper than a vector
      load.  */
 
-  x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+  x = copy_to_mode_reg (inner_mode, x);
   return gen_rtx_VEC_DUPLICATE (mode, x);
 }
 
@@ -12809,10 +12820,10 @@ bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
   if (lane < low || lane >= high)
     {
       if (exp)
-	error ("%K%s %lld out of range %lld - %lld",
+	error ("%K%s %wd out of range %wd - %wd",
 	       exp, desc, lane, low, high - 1);
       else
-	error ("%s %lld out of range %lld - %lld", desc, lane, low, high - 1);
+	error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
     }
 }
 
@@ -28674,6 +28685,38 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
   #undef BRANCH
 }
 
+/* Returns true if the pattern is a valid symbolic address, which is either a
+   symbol_ref or (symbol_ref + addend).
+
+   According to the ARM ELF ABI, the initial addend of REL-type relocations
+   processing MOVW and MOVT instructions is formed by interpreting the 16-bit
+   literal field of the instruction as a 16-bit signed value in the range
+   -32768 <= A < 32768.  */
+
+bool
+arm_valid_symbolic_address_p (rtx addr)
+{
+  rtx xop0, xop1 = NULL_RTX;
+  rtx tmp = addr;
+
+  if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
+    return true;
+
+  /* (const (plus: symbol_ref const_int))  */
+  if (GET_CODE (addr) == CONST)
+    tmp = XEXP (addr, 0);
+
+  if (GET_CODE (tmp) == PLUS)
+    {
+      xop0 = XEXP (tmp, 0);
+      xop1 = XEXP (tmp, 1);
+
+      if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
+	  return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
+    }
+
+  return false;
+}
 
 /* Returns true if a valid comparison operation and makes
    the operands in a form that is valid.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 288bbb9f836..b6c20478f9c 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5774,7 +5774,7 @@
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
 	(lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0")
 		   (match_operand:SI 2 "general_operand"      "i")))]
-  "arm_arch_thumb2"
+  "arm_arch_thumb2 && arm_valid_symbolic_address_p (operands[2])"
   "movt%?\t%0, #:upper16:%c2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
@@ -8162,8 +8162,7 @@
         size += GET_MODE_SIZE (GET_MODE (src));
       }
 
-    emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL,
-				    const0_rtx));
+    emit_call_insn (gen_call_value (par, operands[0], const0_rtx, NULL));
 
     size = 0;
 
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index c923e294cda..2b30be61a46 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -41,7 +41,9 @@ typedef __simd64_int8_t int8x8_t;
 typedef __simd64_int16_t int16x4_t;
 typedef __simd64_int32_t int32x2_t;
 typedef __builtin_neon_di int64x1_t;
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 typedef __simd64_float16_t float16x4_t;
+#endif
 typedef __simd64_float32_t float32x2_t;
 typedef __simd64_poly8_t poly8x8_t;
 typedef __simd64_poly16_t poly16x4_t;
@@ -6220,21 +6222,25 @@ vcvtq_u32_f32 (float32x4_t __a)
 }
 
 #if ((__ARM_FP & 0x2) != 0)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
 vcvt_f16_f32 (float32x4_t __a)
 {
   return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
 }
-
 #endif
+#endif
+
 #if ((__ARM_FP & 0x2) != 0)
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vcvt_f32_f16 (float16x4_t __a)
 {
   return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
 }
-
 #endif
+#endif
+
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vcvt_n_s32_f32 (float32x2_t __a, const int __b)
 {
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 42935a4ca6d..e24858fe45e 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -21,7 +21,7 @@
 ;; The following register constraints have been used:
 ;; - in ARM/Thumb-2 state: t, w, x, y, z
 ;; - in Thumb state: h, b
-;; - in both states: l, c, k, q, US
+;; - in both states: l, c, k, q, Cs, Ts, US
 ;; In ARM state, 'l' is an alias for 'r'
 ;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
 
@@ -67,7 +67,8 @@
 (define_constraint "j"
  "A constant suitable for a MOVW instruction. (ARM/Thumb-2)"
  (and (match_test "TARGET_32BIT && arm_arch_thumb2")
-      (ior (match_code "high")
+      (ior (and (match_code "high")
+		(match_test "arm_valid_symbolic_address_p (XEXP (op, 0))"))
 	   (and (match_code "const_int")
                 (match_test "(ival & 0xffff0000) == 0")))))
 
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 3c477bc8eee..bec9a8bb788 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -9255,10 +9255,10 @@ avr_pgm_check_var_decl (tree node)
         {
           if (TYPE_P (node))
             error ("%qT uses address space %qs beyond flash of %d KiB",
-                   node, avr_addrspace[as].name, avr_n_flash);
+                   node, avr_addrspace[as].name, 64 * avr_n_flash);
           else
             error ("%s %q+D uses address space %qs beyond flash of %d KiB",
-                   reason, node, avr_addrspace[as].name, avr_n_flash);
+                   reason, node, avr_addrspace[as].name, 64 * avr_n_flash);
         }
       else
         {
@@ -9305,7 +9305,7 @@ avr_insert_attributes (tree node, tree *attributes)
       if (avr_addrspace[as].segment >= avr_n_flash)
         {
           error ("variable %q+D located in address space %qs beyond flash "
-                 "of %d KiB", node, avr_addrspace[as].name, avr_n_flash);
+                 "of %d KiB", node, avr_addrspace[as].name, 64 * avr_n_flash);
         }
       else if (!AVR_HAVE_LPM && avr_addrspace[as].pointer_size > 2)
 	{
diff --git a/gcc/config/cr16/cr16.c b/gcc/config/cr16/cr16.c
index 8185b59b282..7b3b6efd3db 100644
--- a/gcc/config/cr16/cr16.c
+++ b/gcc/config/cr16/cr16.c
@@ -583,7 +583,7 @@ cr16_function_arg (cumulative_args_t cum_v, machine_mode mode,
   /* function_arg () is called with this type just after all the args have 
      had their registers assigned. The rtx that function_arg returns from 
      this type is supposed to pass to 'gen_call' but currently it is not 
-     implemented (see macro GEN_CALL).  */
+     implemented.  */
   if (type == void_type_node)
     return NULL_RTX;
 
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 05fa5e10ebf..c69c738caa0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -436,7 +436,7 @@ struct processor_costs iamcu_cost = {
   COSTS_N_INSNS (3),			/* cost of movsx */
   COSTS_N_INSNS (2),			/* cost of movzx */
   8,					/* "large" insn */
-  6,					/* MOVE_RATIO */
+  9,					/* MOVE_RATIO */
   6,				     /* cost for loading QImode using movzbl */
   {2, 4, 2},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
@@ -25531,7 +25531,7 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
 
   /* Avoid branch in fixing the byte.  */
   tmpreg = gen_lowpart (QImode, tmpreg);
-  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+  emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
   tmp = gen_rtx_REG (CCmode, FLAGS_REG);
   cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
   emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
@@ -39510,60 +39510,57 @@ rdseed_step:
       return target;
 
     case IX86_BUILTIN_SBB32:
-      icode = CODE_FOR_subsi3_carry;
+      icode = CODE_FOR_subborrowsi;
       mode0 = SImode;
-      goto addcarryx;
+      goto handlecarry;
 
     case IX86_BUILTIN_SBB64:
-      icode = CODE_FOR_subdi3_carry;
+      icode = CODE_FOR_subborrowdi;
       mode0 = DImode;
-      goto addcarryx;
+      goto handlecarry;
 
     case IX86_BUILTIN_ADDCARRYX32:
-      icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
+      icode = CODE_FOR_addcarrysi;
       mode0 = SImode;
-      goto addcarryx;
+      goto handlecarry;
 
     case IX86_BUILTIN_ADDCARRYX64:
-      icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
+      icode = CODE_FOR_addcarrydi;
       mode0 = DImode;
 
-addcarryx:
+    handlecarry:
       arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in.  */
       arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1.  */
       arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2.  */
       arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out.  */
 
-      op0 = gen_reg_rtx (QImode);
-
-      /* Generate CF from input operand.  */
       op1 = expand_normal (arg0);
       op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
-      emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
 
-      /* Gen ADCX instruction to compute X+Y+CF.  */
       op2 = expand_normal (arg1);
-      op3 = expand_normal (arg2);
-
-      if (!REG_P (op2))
+      if (!register_operand (op2, mode0))
 	op2 = copy_to_mode_reg (mode0, op2);
-      if (!REG_P (op3))
-	op3 = copy_to_mode_reg (mode0, op3);
-
-      op0 = gen_reg_rtx (mode0);
 
-      op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
-      pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
-      emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
+      op3 = expand_normal (arg2);
+      if (!register_operand (op3, mode0))
+	op3 = copy_to_mode_reg (mode0, op3);
 
-      /* Store the result.  */
       op4 = expand_normal (arg3);
       if (!address_operand (op4, VOIDmode))
 	{
 	  op4 = convert_memory_address (Pmode, op4);
 	  op4 = copy_addr_to_reg (op4);
 	}
-      emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
+
+      /* Generate CF from input operand.  */
+      emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
+
+      /* Generate instruction that consumes CF.  */
+      op0 = gen_reg_rtx (mode0);
+
+      op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
+      pat = gen_rtx_LTU (mode0, op1, const0_rtx);
+      emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
 
       /* Return current CF value.  */
       if (target == 0)
@@ -39571,6 +39568,10 @@ addcarryx:
 
       PUT_MODE (pat, QImode);
       emit_insn (gen_rtx_SET (target, pat));
+
+      /* Store the result.  */
+      emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
+
       return target;
 
     case IX86_BUILTIN_READ_FLAGS:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e6c2d30e507..7017913afe2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -102,7 +102,6 @@
   UNSPEC_SAHF
   UNSPEC_PARITY
   UNSPEC_FSTCW
-  UNSPEC_ADD_CARRY
   UNSPEC_FLDCW
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
@@ -848,8 +847,6 @@
 (define_code_attr plusminus_mnemonic
   [(plus "add") (ss_plus "adds") (us_plus "addus")
    (minus "sub") (ss_minus "subs") (us_minus "subus")])
-(define_code_attr plusminus_carry_mnemonic
-  [(plus "adc") (minus "sbb")])
 (define_code_attr multdiv_mnemonic
   [(mult "mul") (div "div")])
 
@@ -5317,46 +5314,21 @@
   "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
   "#"
   "reload_completed"
-  [(parallel [(set (reg:CC FLAGS_REG)
-		   (unspec:CC [(match_dup 1) (match_dup 2)]
-			      UNSPEC_ADD_CARRY))
+  [(parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (plus:DWIH (match_dup 1) (match_dup 2))
+		     (match_dup 1)))
 	      (set (match_dup 0)
 		   (plus:DWIH (match_dup 1) (match_dup 2)))])
    (parallel [(set (match_dup 3)
 		   (plus:DWIH
-		     (match_dup 4)
 		     (plus:DWIH
 		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
-		       (match_dup 5))))
+		       (match_dup 4))
+		     (match_dup 5)))
 	      (clobber (reg:CC FLAGS_REG))])]
   "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
 
-(define_insn "*add<mode>3_cc"
-  [(set (reg:CC FLAGS_REG)
-	(unspec:CC
-	  [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
-	   (match_operand:SWI48 2 "<general_operand>" "r<i>,rm")]
-	  UNSPEC_ADD_CARRY))
-   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
-	(plus:SWI48 (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "addqi3_cc"
-  [(set (reg:CC FLAGS_REG)
-	(unspec:CC
-	  [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
-	   (match_operand:QI 2 "general_operand" "qn,qm")]
-	  UNSPEC_ADD_CARRY))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
-	(plus:QI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, QImode, operands)"
-  "add{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "QI")])
-
 (define_insn "*add<mode>_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r")
 	(plus:SWI48
@@ -6264,10 +6236,10 @@
 		   (minus:DWIH (match_dup 1) (match_dup 2)))])
    (parallel [(set (match_dup 3)
 		   (minus:DWIH
-		     (match_dup 4)
-		     (plus:DWIH
-		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
-		       (match_dup 5))))
+		     (minus:DWIH
+		       (match_dup 4)
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
+		     (match_dup 5)))
 	      (clobber (reg:CC FLAGS_REG))])]
   "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
 
@@ -6431,29 +6403,17 @@
 
 ;; Add with carry and subtract with borrow
 
-(define_expand "<plusminus_insn><mode>3_carry"
-  [(parallel
-    [(set (match_operand:SWI 0 "nonimmediate_operand")
-	  (plusminus:SWI
-	    (match_operand:SWI 1 "nonimmediate_operand")
-	    (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator"
-		       [(match_operand 3 "flags_reg_operand")
-			(const_int 0)])
-		      (match_operand:SWI 2 "<general_operand>"))))
-     (clobber (reg:CC FLAGS_REG))])]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)")
-
-(define_insn "*<plusminus_insn><mode>3_carry"
+(define_insn "add<mode>3_carry"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
-	(plusminus:SWI
-	  (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+	(plus:SWI
 	  (plus:SWI
-	    (match_operator 3 "ix86_carry_flag_operator"
-	     [(reg FLAGS_REG) (const_int 0)])
-	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))))
+	    (match_operator:SWI 4 "ix86_carry_flag_operator"
+	     [(match_operand 3 "flags_reg_operand") (const_int 0)])
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "<plusminus_carry_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
@@ -6462,10 +6422,11 @@
 (define_insn "*addsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-		   (plus:SI (match_operator 3 "ix86_carry_flag_operator"
-			     [(reg FLAGS_REG) (const_int 0)])
-			    (match_operand:SI 2 "x86_64_general_operand" "rme")))))
+	  (plus:SI
+	    (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
+		      [(reg FLAGS_REG) (const_int 0)])
+		     (match_operand:SI 1 "register_operand" "%0"))
+	    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
   "adc{l}\t{%2, %k0|%k0, %2}"
@@ -6474,45 +6435,96 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
+;; There is no point to generate ADCX instruction. ADC is shorter and faster.
+
+(define_insn "addcarry<mode>"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SWI48
+	    (plus:SWI48
+	      (match_operator:SWI48 4 "ix86_carry_flag_operator"
+	       [(match_operand 3 "flags_reg_operand") (const_int 0)])
+	      (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
+	    (match_operand:SWI48 2 "nonimmediate_operand" "rm"))
+	  (match_dup 1)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(plus:SWI48 (plus:SWI48 (match_op_dup 4
+				 [(match_dup 3) (const_int 0)])
+				(match_dup 1))
+		    (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sub<mode>3_carry"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI
+	  (minus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	    (match_operator:SWI 4 "ix86_carry_flag_operator"
+	     [(match_operand 3 "flags_reg_operand") (const_int 0)]))
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*subsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (minus:SI (match_operand:SI 1 "register_operand" "0")
-		    (plus:SI (match_operator 3 "ix86_carry_flag_operator"
-			      [(reg FLAGS_REG) (const_int 0)])
-			     (match_operand:SI 2 "x86_64_general_operand" "rme")))))
+	  (minus:SI
+	    (minus:SI
+	      (match_operand:SI 1 "register_operand" "0")
+	      (match_operator:SI 3 "ix86_carry_flag_operator"
+	       [(reg FLAGS_REG) (const_int 0)]))
+	    (match_operand:SI 2 "x86_64_general_operand" "rme"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sbb{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
-
-;; ADCX instruction
 
-(define_insn "adcx<mode>3"
+(define_insn "subborrow<mode>"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
 	  (plus:SWI48
-	    (match_operand:SWI48 1 "nonimmediate_operand" "%0")
-	    (plus:SWI48
-	      (match_operator 4 "ix86_carry_flag_operator"
-	       [(match_operand 3 "flags_reg_operand") (const_int 0)])
-	      (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
-	  (const_int 0)))
+	    (match_operator:SWI48 4 "ix86_carry_flag_operator"
+	     [(match_operand 3 "flags_reg_operand") (const_int 0)])
+	    (match_operand:SWI48 2 "nonimmediate_operand" "rm"))))
    (set (match_operand:SWI48 0 "register_operand" "=r")
-	(plus:SWI48 (match_dup 1)
-		    (plus:SWI48 (match_op_dup 4
-				 [(match_dup 3) (const_int 0)])
-				(match_dup 2))))]
-  "TARGET_ADX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "adcx\t{%2, %0|%0, %2}"
+	(minus:SWI48 (minus:SWI48 (match_dup 1)
+				  (match_op_dup 4
+				   [(match_dup 3) (const_int 0)]))
+		     (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
    (set_attr "mode" "<MODE>")])
 
 ;; Overflow setting add instructions
 
+(define_expand "addqi3_cconly_overflow"
+  [(parallel
+     [(set (reg:CCC FLAGS_REG)
+	   (compare:CCC
+	     (plus:QI
+	       (match_operand:QI 0 "nonimmediate_operand")
+	       (match_operand:QI 1 "general_operand"))
+	     (match_dup 0)))
+      (clobber (match_scratch:QI 2))])]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
+
 (define_insn "*add<mode>3_cconly_overflow"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
@@ -8842,9 +8854,9 @@
      (set (match_dup 0) (neg:DWIH (match_dup 1)))])
    (parallel
     [(set (match_dup 2)
-	  (plus:DWIH (match_dup 3)
-		     (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
-				(const_int 0))))
+	  (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+				(match_dup 3))
+		     (const_int 0)))
      (clobber (reg:CC FLAGS_REG))])
    (parallel
     [(set (match_dup 2)
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a9c8623ada2..bc76a5b7cee 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -912,25 +912,9 @@
 
 /* Return true if operand is a vector constant that is all ones. */
 (define_predicate "vector_all_ones_operand"
-  (match_code "const_vector")
-{
-  int nunits = GET_MODE_NUNITS (mode);
-
-  if (GET_CODE (op) == CONST_VECTOR
-      && CONST_VECTOR_NUNITS (op) == nunits)
-    {
-      int i;
-      for (i = 0; i < nunits; ++i)
-        {
-          rtx x = CONST_VECTOR_ELT (op, i);
-          if (x != constm1_rtx)
-            return false;
-        }
-      return true;
-    }
-
-  return false;
-})
+  (and (match_code "const_vector")
+       (match_test "INTEGRAL_MODE_P (GET_MODE (op))")
+       (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
 
 ; Return true when OP is operand acceptable for standard SSE move.
 (define_predicate "vector_move_operand"
diff --git a/gcc/config/iq2000/iq2000.md b/gcc/config/iq2000/iq2000.md
index e87cb6802e3..bba67600d96 100644
--- a/gcc/config/iq2000/iq2000.md
+++ b/gcc/config/iq2000/iq2000.md
@@ -1708,7 +1708,7 @@
     {
       int i;
 
-      emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+      emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
 
       for (i = 0; i < XVECLEN (operands[2], 0); i++)
 	{
diff --git a/gcc/config/m32c/blkmov.md b/gcc/config/m32c/blkmov.md
index 88d04066d16..02ad3455bd1 100644
--- a/gcc/config/m32c/blkmov.md
+++ b/gcc/config/m32c/blkmov.md
@@ -178,10 +178,10 @@
 ;; 3 = alignment
 
 (define_expand "cmpstrsi"
-  [(match_operand:HI 0 "" "")
-   (match_operand 1 "ap_operand" "")
-   (match_operand 2 "ap_operand" "")
-   (match_operand 3 "" "")
+  [(match_operand:HI 0 "register_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "memory_operand" "")
+   (match_operand 3 "const_int_operand" "")
    ]
   "TARGET_A24"
   "if (m32c_expand_cmpstr(operands)) DONE; FAIL;"
diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 7f4195a9f2f..463c8277b43 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -6908,7 +6908,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c
index 40a659a8d7b..6e7745ad137 100644
--- a/gcc/config/microblaze/microblaze.c
+++ b/gcc/config/microblaze/microblaze.c
@@ -661,7 +661,7 @@ microblaze_classify_unspec (struct microblaze_address_info *info, rtx x)
   else if (XINT (x, 1) == UNSPEC_TLS)
     {
       info->type = ADDRESS_TLS;
-      info->tls_type = tls_reloc INTVAL(XVECEXP(x, 0, 1));
+      info->tls_type = tls_reloc (INTVAL (XVECEXP (x, 0, 1)));
     }
   else
     {
diff --git a/gcc/config/mips/mips-opts.h b/gcc/config/mips/mips-opts.h
index 79882051595..3c2c6590e3d 100644
--- a/gcc/config/mips/mips-opts.h
+++ b/gcc/config/mips/mips-opts.h
@@ -47,4 +47,10 @@ enum mips_r10k_cache_barrier_setting {
 #define MIPS_ARCH_OPTION_FROM_ABI -1
 #define MIPS_ARCH_OPTION_NATIVE -2
 
+/* Enumerates the setting of the -mcompact-branches= option.  */
+enum mips_cb_setting {
+  MIPS_CB_NEVER,
+  MIPS_CB_OPTIMAL,
+  MIPS_CB_ALWAYS
+};
 #endif
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index d9ad9100f99..8a9ae0147ed 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -298,6 +298,9 @@ extern const char *mips_output_conditional_branch (rtx_insn *, rtx *,
 						   const char *, const char *);
 extern const char *mips_output_order_conditional_branch (rtx_insn *, rtx *,
 							 bool);
+extern const char *mips_output_equal_conditional_branch (rtx_insn *, rtx *,
+							 bool);
+extern const char *mips_output_jump (rtx *, int, int, bool);
 extern const char *mips_output_sync (void);
 extern const char *mips_output_sync_loop (rtx_insn *, rtx *);
 extern unsigned int mips_sync_loop_insns (rtx_insn *, rtx *);
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 401d73bfeaa..0e0ecf232d9 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -176,7 +176,8 @@ along with GCC; see the file COPYING3.  If not see
 /* Return the opcode to jump to register DEST.  When the JR opcode is not
    available use JALR $0, DEST.  */
 #define MIPS_JR(DEST) \
-  (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9))
+  (TARGET_CB_ALWAYS ? ((0x1b << 27) | ((DEST) << 16)) \
+		    : (((DEST) << 21) | (ISA_HAS_JR ? 0x8 : 0x9)))
 
 /* Return the opcode for:
 
@@ -5181,7 +5182,8 @@ mips_allocate_fcc (machine_mode mode)
    conditions are:
 
       - EQ or NE between two registers.
-      - any comparison between a register and zero.  */
+      - any comparison between a register and zero.
+      - if compact branches are available then any condition is valid.  */
 
 static void
 mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p)
@@ -5203,6 +5205,44 @@ mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p)
 	  else
 	    *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1);
 	}
+      else if (!need_eq_ne_p && TARGET_CB_MAYBE)
+	{
+	  bool swap = false;
+	  switch (*code)
+	    {
+	    case LE:
+	      swap = true;
+	      *code = GE;
+	      break;
+	    case GT:
+	      swap = true;
+	      *code = LT;
+	      break;
+	    case LEU:
+	      swap = true;
+	      *code = GEU;
+	      break;
+	    case GTU:
+	      swap = true;
+	      *code = LTU;
+	      break;
+	    case GE:
+	    case LT:
+	    case GEU:
+	    case LTU:
+	      /* Do nothing.  */
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1);
+	  if (swap)
+	    {
+	      rtx tmp = *op1;
+	      *op1 = *op0;
+	      *op0 = tmp;
+	    }
+	}
       else
 	{
 	  /* The comparison needs a separate scc instruction.  Store the
@@ -7260,7 +7300,7 @@ mips16_build_call_stub (rtx retval, rtx *fn_ptr, rtx args_size, int fp_code)
       if (fp_ret_p)
 	{
 	  /* Now call the non-MIPS16 function.  */
-	  output_asm_insn (MIPS_CALL ("jal", &fn, 0, -1), &fn);
+	  output_asm_insn (mips_output_jump (&fn, 0, -1, true), &fn);
 	  fprintf (asm_out_file, "\t.cfi_register 31,18\n");
 
 	  /* Move the result from floating-point registers to
@@ -7630,12 +7670,22 @@ mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
      half-word alignment, it is usually better to move in half words.
      For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr
      and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr.
-     Otherwise move word-sized chunks.  */
-  if (MEM_ALIGN (src) == BITS_PER_WORD / 2
-      && MEM_ALIGN (dest) == BITS_PER_WORD / 2)
-    bits = BITS_PER_WORD / 2;
+     Otherwise move word-sized chunks.
+
+     For ISA_HAS_LWL_LWR we rely on the lwl/lwr & swl/swr load. Otherwise
+     picking the minimum of alignment or BITS_PER_WORD gets us the
+     desired size for bits.  */
+
+  if (!ISA_HAS_LWL_LWR)
+    bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
   else
-    bits = BITS_PER_WORD;
+    {
+      if (MEM_ALIGN (src) == BITS_PER_WORD / 2
+	  && MEM_ALIGN (dest) == BITS_PER_WORD / 2)
+	bits = BITS_PER_WORD / 2;
+      else
+	bits = BITS_PER_WORD;
+    }
 
   mode = mode_for_size (bits, MODE_INT, 0);
   delta = bits / BITS_PER_UNIT;
@@ -7754,8 +7804,9 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
 bool
 mips_expand_block_move (rtx dest, rtx src, rtx length)
 {
-  /* Disable entirely for R6 initially.  */
-  if (!ISA_HAS_LWL_LWR)
+  if (!ISA_HAS_LWL_LWR
+      && (MEM_ALIGN (src) < MIPS_MIN_MOVE_MEM_ALIGN
+	  || MEM_ALIGN (dest) < MIPS_MIN_MOVE_MEM_ALIGN))
     return false;
 
   if (CONST_INT_P (length))
@@ -8367,7 +8418,7 @@ mips_pop_asm_switch (struct mips_asm_switch *asm_switch)
    '!'  Print "s" to use the short version if the delay slot contains a
 	16-bit instruction.
 
-   See also mips_init_print_operand_pucnt.  */
+   See also mips_init_print_operand_punct.  */
 
 static void
 mips_print_operand_punctuation (FILE *file, int ch)
@@ -8451,7 +8502,8 @@ mips_print_operand_punctuation (FILE *file, int ch)
 
     case ':':
       /* When final_sequence is 0, the delay slot will be a nop.  We can
-	 use the compact version for microMIPS.  */
+	 use the compact version where available.  The %: formatter will
+	 only be present if a compact form of the branch is available.  */
       if (final_sequence == 0)
 	putc ('c', file);
       break;
@@ -8459,8 +8511,9 @@ mips_print_operand_punctuation (FILE *file, int ch)
     case '!':
       /* If the delay slot instruction is short, then use the
 	 compact version.  */
-      if (final_sequence == 0
-	  || get_attr_length (final_sequence->insn (1)) == 2)
+      if (TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED && mips_isa_rev <= 5
+	  && (final_sequence == 0
+	      || get_attr_length (final_sequence->insn (1)) == 2))
 	putc ('s', file);
       break;
 
@@ -12958,6 +13011,7 @@ mips_adjust_insn_length (rtx_insn *insn, int length)
 	break;
 
       case HAZARD_DELAY:
+      case HAZARD_FORBIDDEN_SLOT:
 	length += NOP_INSN_LENGTH;
 	break;
 
@@ -12969,6 +13023,78 @@ mips_adjust_insn_length (rtx_insn *insn, int length)
   return length;
 }
 
+/* Return the asm template for a call.  OPERANDS are the operands, TARGET_OPNO
+   is the operand number of the target.  SIZE_OPNO is the operand number of
+   the argument size operand that can optionally hold the call attributes.  If
+   SIZE_OPNO is not -1 and the call is indirect, use the function symbol from
+   the call attributes to attach a R_MIPS_JALR relocation to the call.  LINK_P
+   indicates whether the jump is a call and needs to set the link register.
+
+   When generating GOT code without explicit relocation operators, all calls
+   should use assembly macros.  Otherwise, all indirect calls should use "jr"
+   or "jalr"; we will arrange to restore $gp afterwards if necessary.  Finally,
+   we can only generate direct calls for -mabicalls by temporarily switching
+   to non-PIC mode.
+
+   For microMIPS jal(r), we try to generate jal(r)s when a 16-bit
+   instruction is in the delay slot of jal(r).
+
+   Where compact branches are available, we try to use them if the delay slot
+   has a NOP (or equivalently delay slots were not enabled for the instruction
+   anyway).  */
+
+const char *
+mips_output_jump (rtx *operands, int target_opno, int size_opno, bool link_p)
+{
+  static char buffer[300];
+  char *s = buffer;
+  bool reg_p = REG_P (operands[target_opno]);
+
+  const char *and_link = link_p ? "al" : "";
+  const char *reg = reg_p ? "r" : "";
+  const char *compact = "";
+  const char *nop = "%/";
+  const char *short_delay = link_p ? "%!" : "";
+  const char *insn_name = TARGET_CB_NEVER || reg_p ? "j" : "b";
+
+  /* Compact branches can only be described when the ISA has support for them
+     as both the compact formatter '%:' and the delay slot NOP formatter '%/'
+     work as a mutually exclusive pair.  I.e. a NOP is never required if a
+     compact form is available.  */
+  if (!final_sequence
+      && (TARGET_CB_MAYBE
+	  || (ISA_HAS_JRC && !link_p && reg_p)))
+    {
+      compact = "c";
+      nop = "";
+    }
+
+  if (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS)
+    sprintf (s, "%%*%s%s\t%%%d%%/", insn_name, and_link, target_opno);
+  else
+    {
+      if (!reg_p && TARGET_ABICALLS_PIC2)
+	s += sprintf (s, ".option\tpic0\n\t");
+
+      if (reg_p && mips_get_pic_call_symbol (operands, size_opno))
+	{
+	  s += sprintf (s, "%%*.reloc\t1f,R_MIPS_JALR,%%%d\n1:\t", size_opno);
+	  /* Not sure why this shouldn't permit a short delay but it did not
+	     allow it before so we still don't allow it.  */
+	  short_delay = "";
+	}
+      else
+	s += sprintf (s, "%%*");
+
+      s += sprintf (s, "%s%s%s%s%s\t%%%d%s", insn_name, and_link, reg, compact, short_delay,
+					    target_opno, nop);
+
+      if (!reg_p && TARGET_ABICALLS_PIC2)
+	s += sprintf (s, "\n\t.option\tpic2");
+    }
+  return buffer;
+}
+
 /* Return the assembly code for INSN, which has the operands given by
    OPERANDS, and which branches to OPERANDS[0] if some condition is true.
    BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
@@ -13022,12 +13148,25 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands,
     }
 
   /* Output the unconditional branch to TAKEN.  */
-  if (TARGET_ABSOLUTE_JUMPS)
+  if (TARGET_ABSOLUTE_JUMPS && TARGET_CB_MAYBE)
+    {
+      /* Add a hazard nop.  */
+      if (!final_sequence)
+	{
+	  output_asm_insn ("nop\t\t# hazard nop", 0);
+	  fprintf (asm_out_file, "\n");
+	}
+      output_asm_insn (MIPS_ABSOLUTE_JUMP ("bc\t%0"), &taken);
+    }
+  else if (TARGET_ABSOLUTE_JUMPS)
     output_asm_insn (MIPS_ABSOLUTE_JUMP ("j\t%0%/"), &taken);
   else
     {
       mips_output_load_label (taken);
-      output_asm_insn ("jr\t%@%]%/", 0);
+      if (TARGET_CB_MAYBE)
+	output_asm_insn ("jrc\t%@%]", 0);
+      else
+	output_asm_insn ("jr\t%@%]%/", 0);
     }
 
   /* Now deal with its delay slot; see above.  */
@@ -13041,7 +13180,7 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands,
 			   asm_out_file, optimize, 1, NULL);
 	  final_sequence->insn (1)->set_deleted ();
 	}
-      else
+      else if (TARGET_CB_NEVER)
 	output_asm_insn ("nop", 0);
       fprintf (asm_out_file, "\n");
     }
@@ -13053,42 +13192,155 @@ mips_output_conditional_branch (rtx_insn *insn, rtx *operands,
 }
 
 /* Return the assembly code for INSN, which branches to OPERANDS[0]
+   if some equality condition is true.  The condition is given by
+   OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
+   OPERANDS[1].  OPERANDS[2] is the comparison's first operand;
+   OPERANDS[3] is the second operand and may be zero or a register.  */
+
+const char *
+mips_output_equal_conditional_branch (rtx_insn* insn, rtx *operands,
+				      bool inverted_p)
+{
+  const char *branch[2];
+  /* For a simple BNEZ or BEQZ microMIPSr3 branch.  */
+  if (TARGET_MICROMIPS
+      && mips_isa_rev <= 5
+      && operands[3] == const0_rtx
+      && get_attr_length (insn) <= 8)
+    {
+      if (mips_cb == MIPS_CB_OPTIMAL)
+	{
+	  branch[!inverted_p] = "%*b%C1z%:\t%2,%0";
+	  branch[inverted_p] = "%*b%N1z%:\t%2,%0";
+	}
+      else
+	{
+	  branch[!inverted_p] = "%*b%C1z\t%2,%0%/";
+	  branch[inverted_p] = "%*b%N1z\t%2,%0%/";
+	}
+    }
+  else if (TARGET_CB_MAYBE)
+    {
+      if (operands[3] == const0_rtx)
+	{
+	  branch[!inverted_p] = MIPS_BRANCH_C ("b%C1z", "%2,%0");
+	  branch[inverted_p] = MIPS_BRANCH_C ("b%N1z", "%2,%0");
+	}
+      else if (REGNO (operands[2]) != REGNO (operands[3]))
+	{
+	  branch[!inverted_p] = MIPS_BRANCH_C ("b%C1", "%2,%3,%0");
+	  branch[inverted_p] = MIPS_BRANCH_C ("b%N1", "%2,%3,%0");
+	}
+      else
+	{
+	  /* This case is degenerate.  It should not happen, but does.  */
+	  if (GET_CODE (operands[1]) == NE)
+	    inverted_p = !inverted_p;
+
+	  branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0");
+	  branch[inverted_p] = "%*\t\t# branch never";
+	}
+    }
+  else
+    {
+      branch[!inverted_p] = MIPS_BRANCH ("b%C1", "%2,%z3,%0");
+      branch[inverted_p] = MIPS_BRANCH ("b%N1", "%2,%z3,%0");
+    }
+
+  return mips_output_conditional_branch (insn, operands, branch[1], branch[0]);
+}
+
+/* Return the assembly code for INSN, which branches to OPERANDS[0]
    if some ordering condition is true.  The condition is given by
    OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
    OPERANDS[1].  OPERANDS[2] is the comparison's first operand;
-   its second is always zero.  */
+   OPERANDS[3] is the second operand and may be zero or a register.  */
 
 const char *
-mips_output_order_conditional_branch (rtx_insn *insn, rtx *operands, bool inverted_p)
+mips_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+				      bool inverted_p)
 {
   const char *branch[2];
 
   /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
      Make BRANCH[0] branch on the inverse condition.  */
-  switch (GET_CODE (operands[1]))
+  if (operands[3] != const0_rtx)
     {
-      /* These cases are equivalent to comparisons against zero.  */
-    case LEU:
-      inverted_p = !inverted_p;
-      /* Fall through.  */
-    case GTU:
-      branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0");
-      branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0");
-      break;
+      /* Handle degenerate cases that should not, but do, occur.  */
+      if (REGNO (operands[2]) == REGNO (operands[3]))
+	{
+	  switch (GET_CODE (operands[1]))
+	    {
+	    case LT:
+	    case LTU:
+	      inverted_p = !inverted_p;
+	      /* Fall through.  */
+	    case GE:
+	    case GEU:
+	      branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0");
+	      branch[inverted_p] = "%*\t\t# branch never";
+	      break;
+	   default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  branch[!inverted_p] = MIPS_BRANCH_C ("b%C1", "%2,%3,%0");
+	  branch[inverted_p] = MIPS_BRANCH_C ("b%N1", "%2,%3,%0");
+	}
+    }
+  else
+    {
+      switch (GET_CODE (operands[1]))
+	{
+	  /* These cases are equivalent to comparisons against zero.  */
+	case LEU:
+	  inverted_p = !inverted_p;
+	  /* Fall through.  */
+	case GTU:
+	  if (TARGET_CB_MAYBE)
+	    {
+	      branch[!inverted_p] = MIPS_BRANCH_C ("bnez", "%2,%0");
+	      branch[inverted_p] = MIPS_BRANCH_C ("beqz", "%2,%0");
+	    }
+	  else
+	    {
+	      branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0");
+	      branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0");
+	    }
+	  break;
 
-      /* These cases are always true or always false.  */
-    case LTU:
-      inverted_p = !inverted_p;
-      /* Fall through.  */
-    case GEU:
-      branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0");
-      branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0");
-      break;
+	  /* These cases are always true or always false.  */
+	case LTU:
+	  inverted_p = !inverted_p;
+	  /* Fall through.  */
+	case GEU:
+	  if (TARGET_CB_MAYBE)
+	    {
+	      branch[!inverted_p] = MIPS_BRANCH_C ("b", "%0");
+	      branch[inverted_p] = "%*\t\t# branch never";
+	    }
+	  else
+	    {
+	      branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0");
+	      branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0");
+	    }
+	  break;
 
-    default:
-      branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0");
-      branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0");
-      break;
+	default:
+	  if (TARGET_CB_MAYBE)
+	    {
+	      branch[!inverted_p] = MIPS_BRANCH_C ("b%C1z", "%2,%0");
+	      branch[inverted_p] = MIPS_BRANCH_C ("b%N1z", "%2,%0");
+	    }
+	  else
+	    {
+	      branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0");
+	      branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0");
+	    }
+	  break;
+	}
     }
   return mips_output_conditional_branch (insn, operands, branch[1], branch[0]);
 }
@@ -13291,11 +13543,18 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands)
 			       at, oldval, inclusive_mask, NULL);
 	  tmp1 = at;
 	}
-      mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
+      if (TARGET_CB_NEVER)
+	mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
 
       /* CMP = 0 [delay slot].  */
       if (cmp)
         mips_multi_add_insn ("li\t%0,0", cmp, NULL);
+
+      if (TARGET_CB_MAYBE && required_oldval == const0_rtx)
+	mips_multi_add_insn ("bnezc\t%0,2f", tmp1, NULL);
+      else if (TARGET_CB_MAYBE)
+	mips_multi_add_insn ("bnec\t%0,%1,2f", tmp1, required_oldval, NULL);
+
     }
 
   /* $TMP1 = OLDVAL & EXCLUSIVE_MASK.  */
@@ -13358,7 +13617,10 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands)
      be annulled.  To ensure this behaviour unconditionally use a NOP
      in the delay slot for the branch likely case.  */
 
-  mips_multi_add_insn ("beq%?\t%0,%.,1b%~", at, NULL);
+  if (TARGET_CB_MAYBE)
+    mips_multi_add_insn ("beqzc\t%0,1b", at, NULL);
+  else
+    mips_multi_add_insn ("beq%?\t%0,%.,1b%~", at, NULL);
 
   /* if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot].  */
   if (insn1 != SYNC_INSN1_MOVE && insn1 != SYNC_INSN1_LI && tmp3 != newval)
@@ -16640,7 +16902,7 @@ mips_orphaned_high_part_p (mips_offset_table *htab, rtx_insn *insn)
 
 static void
 mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
-		   rtx *delayed_reg, rtx lo_reg)
+		   rtx *delayed_reg, rtx lo_reg, bool *fs_delay)
 {
   rtx pattern, set;
   int nops, ninsns;
@@ -16666,6 +16928,15 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
     nops = 2 - *hilo_delay;
   else if (*delayed_reg != 0 && reg_referenced_p (*delayed_reg, pattern))
     nops = 1;
+  /* If processing a forbidden slot hazard then a NOP is required if the
+     branch instruction was not in a sequence (as the sequence would
+     imply it is not actually a compact branch anyway) and the current
+     insn is not an inline asm, and can't go in a delay slot.  */
+  else if (*fs_delay && get_attr_can_delay (insn) == CAN_DELAY_NO
+	   && GET_CODE (PATTERN (after)) != SEQUENCE
+	   && GET_CODE (pattern) != ASM_INPUT
+	   && asm_noperands (pattern) < 0)
+    nops = 1;
   else
     nops = 0;
 
@@ -16678,12 +16949,18 @@ mips_avoid_hazard (rtx_insn *after, rtx_insn *insn, int *hilo_delay,
   /* Set up the state for the next instruction.  */
   *hilo_delay += ninsns;
   *delayed_reg = 0;
+  *fs_delay = false;
   if (INSN_CODE (insn) >= 0)
     switch (get_attr_hazard (insn))
       {
       case HAZARD_NONE:
 	break;
 
+      case HAZARD_FORBIDDEN_SLOT:
+	if (TARGET_CB_MAYBE)
+	  *fs_delay = true;
+	break;
+
       case HAZARD_HILO:
 	*hilo_delay = 0;
 	break;
@@ -16707,6 +16984,7 @@ mips_reorg_process_insns (void)
   rtx_insn *insn, *last_insn, *subinsn, *next_insn;
   rtx lo_reg, delayed_reg;
   int hilo_delay;
+  bool fs_delay;
 
   /* Force all instructions to be split into their final form.  */
   split_all_insns_noflow ();
@@ -16775,6 +17053,7 @@ mips_reorg_process_insns (void)
   hilo_delay = 2;
   delayed_reg = 0;
   lo_reg = gen_rtx_REG (SImode, LO_REGNUM);
+  fs_delay = false;
 
   /* Make a second pass over the instructions.  Delete orphaned
      high-part relocations or turn them into NOPs.  Avoid hazards
@@ -16798,7 +17077,7 @@ mips_reorg_process_insns (void)
 			INSN_CODE (subinsn) = CODE_FOR_nop;
 		      }
 		    mips_avoid_hazard (last_insn, subinsn, &hilo_delay,
-				       &delayed_reg, lo_reg);
+				       &delayed_reg, lo_reg, &fs_delay);
 		  }
 	      last_insn = insn;
 	    }
@@ -16819,7 +17098,7 @@ mips_reorg_process_insns (void)
 	      else
 		{
 		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
-				     &delayed_reg, lo_reg);
+				     &delayed_reg, lo_reg, &fs_delay);
 		  last_insn = insn;
 		}
 	    }
@@ -17684,6 +17963,27 @@ mips_option_override (void)
       target_flags |= MASK_ODD_SPREG;
     }
 
+  if (!ISA_HAS_COMPACT_BRANCHES && mips_cb == MIPS_CB_ALWAYS)
+    {
+      error ("unsupported combination: %qs%s %s",
+	      mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "",
+	      "-mcompact-branches=always");
+    }
+  else if (!ISA_HAS_DELAY_SLOTS && mips_cb == MIPS_CB_NEVER)
+    {
+      error ("unsupported combination: %qs%s %s",
+	      mips_arch_info->name, TARGET_MICROMIPS ? " -mmicromips" : "",
+	      "-mcompact-branches=never");
+    }
+
+  /* Require explicit relocs for MIPS R6 onwards.  This enables simplification
+     of the compact branch and jump support through the backend.  */
+  if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6)
+    {
+      error ("unsupported combination: %qs %s",
+	     mips_arch_info->name, "-mno-explicit-relocs");
+    }
+
   /* The effect of -mabicalls isn't defined for the EABI.  */
   if (mips_abi == ABI_EABI && TARGET_ABICALLS)
     {
@@ -18703,6 +19003,18 @@ mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 
 #undef OP
 
+  /* If we are using compact branches we don't have delay slots so
+     place the instruction that was in the delay slot before the JRC
+     instruction.  */
+
+  if (TARGET_CB_ALWAYS)
+    {
+      rtx temp;
+      temp = trampoline[i-2];
+      trampoline[i-2] = trampoline[i-1];
+      trampoline[i-1] = temp;
+    }
+
   /* Copy the trampoline code.  Leave any padding uninitialized.  */
   for (j = 0; j < i; j++)
     {
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index da1de011fc9..25a1e0622cd 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -92,6 +92,33 @@ struct mips_cpu_info {
 /* True if we are generating position-independent VxWorks RTP code.  */
 #define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic)
 
+/* Compact branches must not be used if the user either selects the
+   'never' policy or the 'optimal' policy on a core that lacks
+   compact branch instructions.  */
+#define TARGET_CB_NEVER (mips_cb == MIPS_CB_NEVER	\
+			 || (mips_cb == MIPS_CB_OPTIMAL \
+			     && !ISA_HAS_COMPACT_BRANCHES))
+
+/* Compact branches may be used if the user either selects the
+   'always' policy or the 'optimal' policy on a core that supports
+   compact branch instructions.  */
+#define TARGET_CB_MAYBE (TARGET_CB_ALWAYS		\
+			 || (mips_cb == MIPS_CB_OPTIMAL \
+			     && ISA_HAS_COMPACT_BRANCHES))
+
+/* Compact branches must always be generated if the user selects
+   the 'always' policy or the 'optimal' policy om a core that
+   lacks delay slot branch instructions.  */
+#define TARGET_CB_ALWAYS (mips_cb == MIPS_CB_ALWAYS	\
+			 || (mips_cb == MIPS_CB_OPTIMAL \
+			     && !ISA_HAS_DELAY_SLOTS))
+
+/* Special handling for JRC that exists in microMIPSR3 as well as R6
+   ISAs with full compact branch support.  */
+#define ISA_HAS_JRC ((ISA_HAS_COMPACT_BRANCHES		\
+		      || TARGET_MICROMIPS)		\
+		     && mips_cb != MIPS_CB_NEVER)
+
 /* True if the output file is marked as ".abicalls; .option pic0"
    (-call_nonpic).  */
 #define TARGET_ABICALLS_PIC0 \
@@ -872,6 +899,10 @@ struct mips_cpu_info {
 
 #define ISA_HAS_JR		(mips_isa_rev <= 5)
 
+#define ISA_HAS_DELAY_SLOTS	1
+
+#define ISA_HAS_COMPACT_BRANCHES (mips_isa_rev >= 6)
+
 /* ISA has branch likely instructions (e.g. mips2).  */
 /* Disable branchlikely for tx39 until compare rewrite.  They haven't
    been generated up to this point.  */
@@ -2645,6 +2676,9 @@ typedef struct mips_args {
 #define MIPS_BRANCH(OPCODE, OPERANDS) \
   "%*" OPCODE "%?\t" OPERANDS "%/"
 
+#define MIPS_BRANCH_C(OPCODE, OPERANDS) \
+  "%*" OPCODE "%:\t" OPERANDS
+
 /* Return an asm string that forces INSN to be treated as an absolute
    J or JAL instruction instead of an assembler macro.  */
 #define MIPS_ABSOLUTE_JUMP(INSN) \
@@ -2652,45 +2686,6 @@ typedef struct mips_args {
    ? ".option\tpic0\n\t" INSN "\n\t.option\tpic2"		\
    : INSN)
 
-/* Return the asm template for a call.  INSN is the instruction's mnemonic
-   ("j" or "jal"), OPERANDS are its operands, TARGET_OPNO is the operand
-   number of the target.  SIZE_OPNO is the operand number of the argument size
-   operand that can optionally hold the call attributes.  If SIZE_OPNO is not
-   -1 and the call is indirect, use the function symbol from the call
-   attributes to attach a R_MIPS_JALR relocation to the call.
-
-   When generating GOT code without explicit relocation operators,
-   all calls should use assembly macros.  Otherwise, all indirect
-   calls should use "jr" or "jalr"; we will arrange to restore $gp
-   afterwards if necessary.  Finally, we can only generate direct
-   calls for -mabicalls by temporarily switching to non-PIC mode.
-
-   For microMIPS jal(r), we try to generate jal(r)s when a 16-bit
-   instruction is in the delay slot of jal(r).  */
-#define MIPS_CALL(INSN, OPERANDS, TARGET_OPNO, SIZE_OPNO)	\
-  (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS			\
-   ? "%*" INSN "\t%" #TARGET_OPNO "%/"				\
-   : REG_P (OPERANDS[TARGET_OPNO])				\
-   ? (mips_get_pic_call_symbol (OPERANDS, SIZE_OPNO)		\
-      ? ("%*.reloc\t1f,R_MIPS_JALR,%" #SIZE_OPNO "\n"		\
-	 "1:\t" INSN "r\t%" #TARGET_OPNO "%/")			\
-      : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED	\
-      ? "%*" INSN "r%!\t%" #TARGET_OPNO "%/"			\
-      : "%*" INSN "r\t%" #TARGET_OPNO "%/")			\
-   : TARGET_MICROMIPS && !TARGET_INTERLINK_COMPRESSED		\
-     ? MIPS_ABSOLUTE_JUMP ("%*" INSN "%!\t%" #TARGET_OPNO "%/")	\
-     : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #TARGET_OPNO "%/"))	\
-
-/* Similar to MIPS_CALL, but this is for MICROMIPS "j" to generate
-   "jrc" when nop is in the delay slot of "jr".  */
-
-#define MICROMIPS_J(INSN, OPERANDS, OPNO)			\
-  (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS			\
-   ? "%*j\t%" #OPNO "%/"					\
-   : REG_P (OPERANDS[OPNO])					\
-   ? "%*jr%:\t%" #OPNO						\
-   : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #OPNO "%/"))
-
 
 /* Control the assembler format that we output.  */
 
@@ -2981,6 +2976,9 @@ while (0)
 #undef PTRDIFF_TYPE
 #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
 
+/* The minimum alignment of any expanded block move.  */
+#define MIPS_MIN_MOVE_MEM_ALIGN 16
+
 /* The maximum number of bytes that can be copied by one iteration of
    a movmemsi loop; see mips_block_move_loop.  */
 #define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index a0079d5c974..1d1c42bf5f5 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -409,6 +409,15 @@
 	 (eq_attr "sync_mem" "!none") (const_string "syncloop")]
 	(const_string "unknown")))
 
+(define_attr "compact_form" "always,maybe,never"
+  (cond [(eq_attr "jal" "direct")
+	 (const_string "always")
+	 (eq_attr "jal" "indirect")
+	 (const_string "maybe")
+	 (eq_attr "type" "jump")
+	 (const_string "maybe")]
+	(const_string "never")))
+
 ;; Mode for conversion types (fcvt)
 ;; I2S          integer to float single (SI/DI to SF)
 ;; I2D          integer to float double (SI/DI to DF)
@@ -694,7 +703,7 @@
 ;; DELAY means that the next instruction cannot read the result
 ;; of this one.  HILO means that the next two instructions cannot
 ;; write to HI or LO.
-(define_attr "hazard" "none,delay,hilo"
+(define_attr "hazard" "none,delay,hilo,forbidden_slot"
   (cond [(and (eq_attr "type" "load,fpload,fpidxload")
 	      (match_test "ISA_HAS_LOAD_DELAY"))
 	 (const_string "delay")
@@ -1045,21 +1054,37 @@
    (nil)
    (eq_attr "can_delay" "yes")])
 
-;; Branches that don't have likely variants do not annul on false.
+;; Branches that have delay slots and don't have likely variants do
+;; not annul on false.
 (define_delay (and (eq_attr "type" "branch")
 		   (not (match_test "TARGET_MIPS16"))
+		   (ior (match_test "TARGET_CB_NEVER")
+			(and (eq_attr "compact_form" "maybe")
+			     (not (match_test "TARGET_CB_ALWAYS")))
+			(eq_attr "compact_form" "never"))
 		   (eq_attr "branch_likely" "no"))
   [(eq_attr "can_delay" "yes")
    (nil)
    (nil)])
 
-(define_delay (eq_attr "type" "jump")
+(define_delay (and (eq_attr "type" "jump")
+		   (ior (match_test "TARGET_CB_NEVER")
+			(and (eq_attr "compact_form" "maybe")
+			     (not (match_test "TARGET_CB_ALWAYS")))
+			(eq_attr "compact_form" "never")))
   [(eq_attr "can_delay" "yes")
    (nil)
    (nil)])
 
+;; Call type instructions should never have a compact form as the
+;; type is only used for MIPS16 patterns.  For safety put the compact
+;; branch detection condition in anyway.
 (define_delay (and (eq_attr "type" "call")
-		   (eq_attr "jal_macro" "no"))
+		   (eq_attr "jal_macro" "no")
+		   (ior (match_test "TARGET_CB_NEVER")
+			(and (eq_attr "compact_form" "maybe")
+			     (not (match_test "TARGET_CB_ALWAYS")))
+			(eq_attr "compact_form" "never")))
   [(eq_attr "can_delay" "yes")
    (nil)
    (nil)])
@@ -5813,25 +5838,29 @@
   [(set (pc)
 	(if_then_else
 	 (match_operator 1 "order_operator"
-			 [(match_operand:GPR 2 "register_operand" "d")
-			  (const_int 0)])
+			 [(match_operand:GPR 2 "register_operand" "d,d")
+			  (match_operand:GPR 3 "reg_or_0_operand" "J,d")])
 	 (label_ref (match_operand 0 "" ""))
 	 (pc)))]
   "!TARGET_MIPS16"
   { return mips_output_order_conditional_branch (insn, operands, false); }
-  [(set_attr "type" "branch")])
+  [(set_attr "type" "branch")
+   (set_attr "compact_form" "maybe,always")
+   (set_attr "hazard" "forbidden_slot")])
 
 (define_insn "*branch_order<mode>_inverted"
   [(set (pc)
 	(if_then_else
 	 (match_operator 1 "order_operator"
-			 [(match_operand:GPR 2 "register_operand" "d")
-			  (const_int 0)])
+			 [(match_operand:GPR 2 "register_operand" "d,d")
+			  (match_operand:GPR 3 "reg_or_0_operand" "J,d")])
 	 (pc)
 	 (label_ref (match_operand 0 "" ""))))]
   "!TARGET_MIPS16"
   { return mips_output_order_conditional_branch (insn, operands, true); }
-  [(set_attr "type" "branch")])
+  [(set_attr "type" "branch")
+   (set_attr "compact_form" "maybe,always")
+   (set_attr "hazard" "forbidden_slot")])
 
 ;; Conditional branch on equality comparison.
 
@@ -5844,20 +5873,10 @@
 	 (label_ref (match_operand 0 "" ""))
 	 (pc)))]
   "!TARGET_MIPS16"
-{
-  /* For a simple BNEZ or BEQZ microMIPS branch.  */
-  if (TARGET_MICROMIPS
-      && operands[3] == const0_rtx
-      && get_attr_length (insn) <= 8)
-    return mips_output_conditional_branch (insn, operands,
-					   "%*b%C1z%:\t%2,%0",
-					   "%*b%N1z%:\t%2,%0");
-
-  return mips_output_conditional_branch (insn, operands,
-					 MIPS_BRANCH ("b%C1", "%2,%z3,%0"),
-					 MIPS_BRANCH ("b%N1", "%2,%z3,%0"));
-}
-  [(set_attr "type" "branch")])
+  { return mips_output_equal_conditional_branch (insn, operands, false); }
+  [(set_attr "type" "branch")
+   (set_attr "compact_form" "maybe")
+   (set_attr "hazard" "forbidden_slot")])
 
 (define_insn "*branch_equality<mode>_inverted"
   [(set (pc)
@@ -5868,20 +5887,10 @@
 	 (pc)
 	 (label_ref (match_operand 0 "" ""))))]
   "!TARGET_MIPS16"
-{
-  /* For a simple BNEZ or BEQZ microMIPS branch.  */
-  if (TARGET_MICROMIPS
-      && operands[3] == const0_rtx
-      && get_attr_length (insn) <= 8)
-    return mips_output_conditional_branch (insn, operands,
-					   "%*b%N0z%:\t%2,%1",
-					   "%*b%C0z%:\t%2,%1");
-
-  return mips_output_conditional_branch (insn, operands,
-					 MIPS_BRANCH ("b%N1", "%2,%z3,%0"),
-					 MIPS_BRANCH ("b%C1", "%2,%z3,%0"));
-}
-  [(set_attr "type" "branch")])
+  { return mips_output_equal_conditional_branch (insn, operands, true); }
+  [(set_attr "type" "branch")
+   (set_attr "compact_form" "maybe")
+   (set_attr "hazard" "forbidden_slot")])
 
 ;; MIPS16 branches
 
@@ -6176,11 +6185,22 @@
   "!TARGET_MIPS16 && TARGET_ABSOLUTE_JUMPS"
 {
   if (get_attr_length (insn) <= 8)
-    return "%*b\t%l0%/";
+    {
+      if (TARGET_CB_MAYBE)
+	return MIPS_ABSOLUTE_JUMP ("%*b%:\t%l0");
+      else
+	return MIPS_ABSOLUTE_JUMP ("%*b\t%l0%/");
+    }
   else
-    return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/");
+    {
+      if (TARGET_CB_MAYBE && !final_sequence)
+	return MIPS_ABSOLUTE_JUMP ("%*bc\t%l0");
+      else
+	return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/");
+    }
 }
-  [(set_attr "type" "branch")])
+  [(set_attr "type" "branch")
+   (set_attr "compact_form" "maybe")])
 
 (define_insn "*jump_pic"
   [(set (pc)
@@ -6188,14 +6208,23 @@
   "!TARGET_MIPS16 && !TARGET_ABSOLUTE_JUMPS"
 {
   if (get_attr_length (insn) <= 8)
-    return "%*b\t%l0%/";
+    {
+      if (TARGET_CB_MAYBE)
+	return "%*b%:\t%l0";
+      else
+	return "%*b\t%l0%/";
+    }
   else
     {
       mips_output_load_label (operands[0]);
-      return "%*jr\t%@%/%]";
+      if (TARGET_CB_MAYBE)
+	return "%*jr%:\t%@%]";
+      else
+	return "%*jr\t%@%/%]";
     }
 }
-  [(set_attr "type" "branch")])
+  [(set_attr "type" "branch")
+   (set_attr "compact_form" "maybe")])
 
 ;; We need a different insn for the mips16, because a mips16 branch
 ;; does not have a delay slot.
@@ -6242,12 +6271,9 @@
 (define_insn "indirect_jump_<mode>"
   [(set (pc) (match_operand:P 0 "register_operand" "d"))]
   ""
-{
-  if (TARGET_MICROMIPS)
-    return "%*jr%:\t%0";
-  else
-    return "%*j\t%0%/";
-}
+  {
+    return mips_output_jump (operands, 0, -1, false);
+  }
   [(set_attr "type" "jump")
    (set_attr "mode" "none")])
 
@@ -6291,12 +6317,9 @@
 	(match_operand:P 0 "register_operand" "d"))
    (use (label_ref (match_operand 1 "" "")))]
   ""
-{
-  if (TARGET_MICROMIPS)
-    return "%*jr%:\t%0";
-  else
-    return "%*j\t%0%/";
-}
+  {
+    return mips_output_jump (operands, 0, -1, false);
+  }
   [(set_attr "type" "jump")
    (set_attr "mode" "none")])
 
@@ -6508,10 +6531,8 @@
   [(any_return)]
   ""
   {
-    if (TARGET_MICROMIPS)
-      return "%*jr%:\t$31";
-    else
-      return "%*j\t$31%/";
+    operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+    return mips_output_jump (operands, 0, -1, false);
   }
   [(set_attr "type"	"jump")
    (set_attr "mode"	"none")])
@@ -6522,12 +6543,10 @@
   [(any_return)
    (use (match_operand 0 "pmode_register_operand" ""))]
   ""
-{
-  if (TARGET_MICROMIPS)
-    return "%*jr%:\t%0";
-  else
-    return "%*j\t%0%/";
-}
+  {
+    operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+    return mips_output_jump (operands, 0, -1, false);
+  }
   [(set_attr "type"	"jump")
    (set_attr "mode"	"none")])
 
@@ -6783,12 +6802,7 @@
   [(call (mem:SI (match_operand 0 "call_insn_operand" "j,S"))
 	 (match_operand 1 "" ""))]
   "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
-{
-  if (TARGET_MICROMIPS)
-    return MICROMIPS_J ("j", operands, 0);
-  else
-    return MIPS_CALL ("j", operands, 0, 1);
-}
+  { return mips_output_jump (operands, 0, 1, false); }
   [(set_attr "jal" "indirect,direct")
    (set_attr "jal_macro" "no")])
 
@@ -6809,12 +6823,7 @@
         (call (mem:SI (match_operand 1 "call_insn_operand" "j,S"))
               (match_operand 2 "" "")))]
   "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
-{
-  if (TARGET_MICROMIPS)
-    return MICROMIPS_J ("j", operands, 1);
-  else
-    return MIPS_CALL ("j", operands, 1, 2);
-}
+  { return mips_output_jump (operands, 1, 2, false); }
   [(set_attr "jal" "indirect,direct")
    (set_attr "jal_macro" "no")])
 
@@ -6826,12 +6835,7 @@
 	(call (mem:SI (match_dup 1))
 	      (match_dup 2)))]
   "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
-{
-  if (TARGET_MICROMIPS)
-    return MICROMIPS_J ("j", operands, 1);
-  else
-    return MIPS_CALL ("j", operands, 1, 2);
-}
+  { return mips_output_jump (operands, 1, 2, false); }
   [(set_attr "jal" "indirect,direct")
    (set_attr "jal_macro" "no")])
 
@@ -6887,7 +6891,10 @@
 	 (match_operand 1 "" ""))
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
-  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, 1); }
+  {
+    return (TARGET_SPLIT_CALLS ? "#"
+	    : mips_output_jump (operands, 0, 1, true));
+  }
   "reload_completed && TARGET_SPLIT_CALLS"
   [(const_int 0)]
 {
@@ -6902,7 +6909,7 @@
    (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
-  { return MIPS_CALL ("jal", operands, 0, 1); }
+  { return mips_output_jump (operands, 0, 1, true); }
   [(set_attr "jal" "indirect,direct")
    (set_attr "jal_macro" "no")])
 
@@ -6916,7 +6923,10 @@
    (const_int 1)
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
-  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, -1); }
+  {
+    return (TARGET_SPLIT_CALLS ? "#"
+	    : mips_output_jump (operands, 0, -1, true));
+  }
   "reload_completed && TARGET_SPLIT_CALLS"
   [(const_int 0)]
 {
@@ -6933,7 +6943,7 @@
    (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
-  { return MIPS_CALL ("jal", operands, 0, -1); }
+  { return mips_output_jump (operands, 0, -1, true); }
   [(set_attr "jal" "direct")
    (set_attr "jal_macro" "no")])
 
@@ -6956,7 +6966,10 @@
               (match_operand 2 "" "")))
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
-  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+  {
+    return (TARGET_SPLIT_CALLS ? "#"
+	    : mips_output_jump (operands, 1, 2, true));
+  }
   "reload_completed && TARGET_SPLIT_CALLS"
   [(const_int 0)]
 {
@@ -6974,7 +6987,7 @@
    (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
-  { return MIPS_CALL ("jal", operands, 1, 2); }
+  { return mips_output_jump (operands, 1, 2, true); }
   [(set_attr "jal" "indirect,direct")
    (set_attr "jal_macro" "no")])
 
@@ -6986,7 +6999,10 @@
    (const_int 1)
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
-  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, -1); }
+  {
+    return (TARGET_SPLIT_CALLS ? "#"
+	    : mips_output_jump (operands, 1, -1, true));
+  }
   "reload_completed && TARGET_SPLIT_CALLS"
   [(const_int 0)]
 {
@@ -7005,7 +7021,7 @@
    (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
-  { return MIPS_CALL ("jal", operands, 1, -1); }
+  { return mips_output_jump (operands, 1, -1, true); }
   [(set_attr "jal" "direct")
    (set_attr "jal_macro" "no")])
 
@@ -7019,7 +7035,10 @@
 	      (match_dup 2)))
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
-  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+  {
+    return (TARGET_SPLIT_CALLS ? "#"
+	    : mips_output_jump (operands, 1, 2, true));
+  }
   "reload_completed && TARGET_SPLIT_CALLS"
   [(const_int 0)]
 {
@@ -7040,7 +7059,7 @@
    (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
-  { return MIPS_CALL ("jal", operands, 1, 2); }
+  { return mips_output_jump (operands, 1, 2, true); }
   [(set_attr "jal" "indirect,direct")
    (set_attr "jal_macro" "no")])
 
@@ -7055,7 +7074,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
@@ -7411,7 +7430,7 @@
    (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
    (clobber (reg:P RETURN_ADDR_REGNUM))]
   "HAVE_AS_TLS && TARGET_MIPS16"
-  { return MIPS_CALL ("jal", operands, 0, -1); }
+  { return mips_output_jump (operands, 0, -1, true); }
   [(set_attr "type" "call")
    (set_attr "insn_count" "3")
    (set_attr "mode" "<MODE>")])
@@ -7452,7 +7471,7 @@
    (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
    (clobber (reg:P RETURN_ADDR_REGNUM))]
   "TARGET_HARD_FLOAT_ABI && TARGET_MIPS16"
-  { return MIPS_CALL ("jal", operands, 0, -1); }
+  { return mips_output_jump (operands, 0, -1, true); }
   [(set_attr "type" "call")
    (set_attr "insn_count" "3")])
 
@@ -7482,7 +7501,7 @@
    (clobber (reg:P PIC_FUNCTION_ADDR_REGNUM))
    (clobber (reg:P RETURN_ADDR_REGNUM))]
   "TARGET_HARD_FLOAT_ABI && TARGET_MIPS16"
-  { return MIPS_CALL ("jal", operands, 0, -1); }
+  { return mips_output_jump (operands, 0, -1, true); }
   [(set_attr "type" "call")
    (set_attr "insn_count" "3")])
 
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 348c6e03f1e..84887d11623 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -418,3 +418,20 @@ Driver
 mload-store-pairs
 Target Report Var(TARGET_LOAD_STORE_PAIRS) Init(1)
 Enable load/store bonding.
+
+mcompact-branches=
+Target RejectNegative JoinedOrMissing Var(mips_cb) Report Enum(mips_cb_setting) Init(MIPS_CB_OPTIMAL)
+Specify the compact branch usage policy
+
+Enum
+Name(mips_cb_setting) Type(enum mips_cb_setting)
+Policies available for use with -mcompact-branches=:
+
+EnumValue
+Enum(mips_cb_setting) String(never) Value(MIPS_CB_NEVER)
+
+EnumValue
+Enum(mips_cb_setting) String(optimal) Value(MIPS_CB_OPTIMAL)
+
+EnumValue
+Enum(mips_cb_setting) String(always) Value(MIPS_CB_ALWAYS)
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
index 4929c3dc27e..3259232bb89 100644
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -475,7 +475,18 @@
   (match_code "eq,ne,lt,ltu,ge,geu"))
 
 (define_predicate "order_operator"
-  (match_code "lt,ltu,le,leu,ge,geu,gt,gtu"))
+  (match_code "lt,ltu,le,leu,ge,geu,gt,gtu")
+{
+  if (XEXP (op, 1) == const0_rtx)
+    return true;
+
+  if (TARGET_CB_MAYBE
+      && (GET_CODE (op) == LT || GET_CODE (op) == LTU
+	  || GET_CODE (op) == GE || GET_CODE (op) == GEU))
+    return true;
+
+  return false;
+})
 
 ;; For NE, cstore uses sltu instructions in which the first operand is $0.
 ;; This isn't possible in mips16 code.
diff --git a/gcc/config/msp430/msp430.opt b/gcc/config/msp430/msp430.opt
index 3fed8799822..e055f61069d 100644
--- a/gcc/config/msp430/msp430.opt
+++ b/gcc/config/msp430/msp430.opt
@@ -12,7 +12,7 @@ Specify the MCU to build for.
 
 mcpu=
 Target Report Joined RejectNegative Var(target_cpu)
-Specify the ISA to build for: msp430, mdsp430x, msp430xv2
+Specify the ISA to build for: msp430, msp430x, msp430xv2
 
 mlarge
 Target Report Mask(LARGE) RejectNegative
diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c
index 1e154c8412c..ba0454e537a 100644
--- a/gcc/config/nvptx/mkoffload.c
+++ b/gcc/config/nvptx/mkoffload.c
@@ -881,10 +881,10 @@ process (FILE *in, FILE *out)
 	   "extern \"C\" {\n"
 	   "#endif\n");
 
-  fprintf (out, "extern void GOMP_offload_register"
-	   " (const void *, int, const void *);\n");
-  fprintf (out, "extern void GOMP_offload_unregister"
-	   " (const void *, int, const void *);\n");
+  fprintf (out, "extern void GOMP_offload_register_ver"
+	   " (unsigned, const void *, int, const void *);\n");
+  fprintf (out, "extern void GOMP_offload_unregister_ver"
+	   " (unsigned, const void *, int, const void *);\n");
 
   fprintf (out, "#ifdef __cplusplus\n"
 	   "}\n"
@@ -894,15 +894,19 @@ process (FILE *in, FILE *out)
 
   fprintf (out, "static __attribute__((constructor)) void init (void)\n"
 	   "{\n"
-	   "  GOMP_offload_register (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
-	   "                         &target_data);\n"
-	   "};\n", GOMP_DEVICE_NVIDIA_PTX);
+	   "  GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__,"
+	   "%d/*NVIDIA_PTX*/, &target_data);\n"
+	   "};\n",
+	   GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX),
+	   GOMP_DEVICE_NVIDIA_PTX);
 
   fprintf (out, "static __attribute__((destructor)) void fini (void)\n"
 	   "{\n"
-	   "  GOMP_offload_unregister (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
-	   "                           &target_data);\n"
-	   "};\n", GOMP_DEVICE_NVIDIA_PTX);
+	   "  GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__,"
+	   "%d/*NVIDIA_PTX*/, &target_data);\n"
+	   "};\n",
+	   GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_NVIDIA_PTX),
+	   GOMP_DEVICE_NVIDIA_PTX);
 }
 
 static void
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index a3569670d62..e6853680078 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -1,4 +1,3 @@
-
 /* Target code for NVPTX.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
@@ -322,7 +321,8 @@ nvptx_write_function_decl (std::stringstream &s, const char *name, const_tree de
 
   /* Declare argument types.  */
   if ((args != NULL_TREE
-       && !(TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) == void_type_node))
+       && !(TREE_CODE (args) == TREE_LIST
+	    && TREE_VALUE (args) == void_type_node))
       || is_main
       || return_in_mem
       || DECL_STATIC_CHAIN (decl))
@@ -406,8 +406,8 @@ walk_args_for_param (FILE *file, tree argtypes, tree args, bool write_copy,
 		mode = DFmode;
 
 	    }
-	  mode = arg_promotion (mode);
 	}
+      mode = arg_promotion (mode);
       while (count-- > 0)
 	{
 	  i++;
@@ -546,7 +546,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
   else if (TYPE_MODE (result_type) != VOIDmode)
     {
       machine_mode mode = arg_promotion (TYPE_MODE (result_type));
-      fprintf (file, ".reg%s %%retval;\n",
+      fprintf (file, "\t.reg%s %%retval;\n",
 	       nvptx_ptx_type_from_mode (mode, false));
     }
 
@@ -598,9 +598,11 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
   sz = get_frame_size ();
   if (sz > 0 || cfun->machine->has_call_with_sc)
     {
+      int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
+
       fprintf (file, "\t.reg.u%d %%frame;\n"
-	       "\t.local.align 8 .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n",
-	       BITS_PER_WORD, sz == 0 ? 1 : sz);
+	       "\t.local.align %d .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC"];\n",
+	       BITS_PER_WORD, alignment, sz == 0 ? 1 : sz);
       fprintf (file, "\tcvta.local.u%d %%frame, %%farray;\n",
 	       BITS_PER_WORD);
     }
@@ -616,10 +618,10 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
   walk_args_for_param (file, TYPE_ARG_TYPES (fntype), DECL_ARGUMENTS (decl),
 		       true, return_in_mem);
   if (return_in_mem)
-    fprintf (file, "ld.param.u%d %%ar1, [%%in_ar1];\n",
+    fprintf (file, "\tld.param.u%d %%ar1, [%%in_ar1];\n",
 	     GET_MODE_BITSIZE (Pmode));
   if (stdarg_p (fntype))
-    fprintf (file, "ld.param.u%d %%argp, [%%in_argp];\n",
+    fprintf (file, "\tld.param.u%d %%argp, [%%in_argp];\n",
 	     GET_MODE_BITSIZE (Pmode));
 }
 
@@ -726,6 +728,14 @@ nvptx_function_ok_for_sibcall (tree, tree)
   return false;
 }
 
+/* Return Dynamic ReAlignment Pointer RTX.  For PTX there isn't any.  */
+
+static rtx
+nvptx_get_drap_rtx (void)
+{
+  return NULL_RTX;
+}
+
 /* Implement the TARGET_CALL_ARGS hook.  Record information about one
    argument to the next call.  */
 
@@ -1908,7 +1918,7 @@ nvptx_reorg_subreg (void)
     {
       next = NEXT_INSN (insn);
       if (!NONDEBUG_INSN_P (insn)
-	  || asm_noperands (insn) >= 0
+	  || asm_noperands (PATTERN (insn)) >= 0
 	  || GET_CODE (PATTERN (insn)) == USE
 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
 	continue;
@@ -2118,6 +2128,8 @@ nvptx_file_end (void)
 #define TARGET_LIBCALL_VALUE nvptx_libcall_value
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
+#undef TARGET_GET_DRAP_RTX
+#define TARGET_GET_DRAP_RTX nvptx_get_drap_rtx
 #undef TARGET_SPLIT_COMPLEX_ARG
 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
 #undef TARGET_RETURN_IN_MEMORY
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index afe4fcdd361..60a922af93f 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -52,6 +52,8 @@
 #define BIGGEST_ALIGNMENT 64
 #define STRICT_ALIGNMENT 1
 
+#define MAX_STACK_ALIGNMENT (1024 * 8)
+
 /* Copied from elf.h and other places.  We'd otherwise use
    BIGGEST_ALIGNMENT and fail a number of testcases.  */
 #define MAX_OFILE_ALIGNMENT (32768 * 8)
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 7c2cb9c15bc..b857e53bb22 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1241,6 +1241,12 @@
    (match_operand 1 "nvptx_register_operand")]
   ""
 {
+  /* The ptx documentation specifies an alloca intrinsic (for 32 bit
+     only)  but notes it is not implemented.  The assembler emits a
+     confused error message.  Issue a blunt one now instead.  */
+  sorry ("target cannot support alloca.");
+  emit_insn (gen_nop ());
+  DONE;
   if (TARGET_ABI64)
     emit_insn (gen_allocate_stack_di (operands[0], operands[1]));
   else
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index ad9289236ff..46fc0f5719c 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -8248,7 +8248,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 3ef6bc85ecd..1c00099c78d 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -208,6 +208,8 @@
 #define vec_lvebx __builtin_vec_lvebx
 #define vec_lvehx __builtin_vec_lvehx
 #define vec_lvewx __builtin_vec_lvewx
+#define vec_pmsum_be __builtin_vec_vpmsum
+#define vec_shasigma_be __builtin_crypto_vshasigma
 /* Cell only intrinsics.  */
 #ifdef __PPU__
 #define vec_lvlx __builtin_vec_lvlx
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ae74796849d..3edb4774e75 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -239,6 +239,25 @@
   return INT_REGNO_P (REGNO (op));
 })
 
+;; Like int_reg_operand, but don't return true for pseudo registers
+(define_predicate "int_reg_operand_not_pseudo"
+  (match_operand 0 "register_operand")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+    return 0;
+
+  return INT_REGNO_P (REGNO (op));
+})
+
 ;; Like int_reg_operand, but only return true for base registers
 (define_predicate "base_reg_operand"
   (match_operand 0 "int_reg_operand")
@@ -883,12 +902,12 @@
 (define_predicate "current_file_function_operand"
   (and (match_code "symbol_ref")
        (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
-		    && ((SYMBOL_REF_LOCAL_P (op)
-			 && ((DEFAULT_ABI != ABI_AIX
-			      && DEFAULT_ABI != ABI_ELFv2)
-			     || !SYMBOL_REF_EXTERNAL_P (op)))
-		        || (op == XEXP (DECL_RTL (current_function_decl),
-						  0)))")))
+		    && (SYMBOL_REF_LOCAL_P (op)
+			|| op == XEXP (DECL_RTL (current_function_decl), 0))
+		    && !((DEFAULT_ABI == ABI_AIX
+			  || DEFAULT_ABI == ABI_ELFv2)
+			 && (SYMBOL_REF_EXTERNAL_P (op)
+			     || SYMBOL_REF_WEAK (op)))")))
 
 ;; Return 1 if this operand is a valid input for a move insn.
 (define_predicate "input_operand"
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 7beddf64d1b..85082ec0ee2 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1489,6 +1489,10 @@ BU_P8V_AV_2 (VPKUDUM,		"vpkudum",	CONST,	altivec_vpkudum)
 BU_P8V_AV_2 (VPKSDSS,		"vpksdss",	CONST,	altivec_vpksdss)
 BU_P8V_AV_2 (VPKUDUS,		"vpkudus",	CONST,	altivec_vpkudus)
 BU_P8V_AV_2 (VPKSDUS,		"vpksdus",	CONST,	altivec_vpksdus)
+BU_P8V_AV_2 (VPMSUMB,		"vpmsumb",	CONST,	crypto_vpmsumb)
+BU_P8V_AV_2 (VPMSUMH,		"vpmsumh",	CONST,	crypto_vpmsumh)
+BU_P8V_AV_2 (VPMSUMW,		"vpmsumw",	CONST,	crypto_vpmsumw)
+BU_P8V_AV_2 (VPMSUMD,		"vpmsumd",	CONST,	crypto_vpmsumd)
 BU_P8V_AV_2 (VRLD,		"vrld",		CONST,	vrotlv2di3)
 BU_P8V_AV_2 (VSLD,		"vsld",		CONST,	vashlv2di3)
 BU_P8V_AV_2 (VSRD,		"vsrd",		CONST,	vlshrv2di3)
@@ -1570,6 +1574,7 @@ BU_P8V_OVERLOAD_2 (VPKSDSS,	"vpksdss")
 BU_P8V_OVERLOAD_2 (VPKSDUS,	"vpksdus")
 BU_P8V_OVERLOAD_2 (VPKUDUM,	"vpkudum")
 BU_P8V_OVERLOAD_2 (VPKUDUS,	"vpkudus")
+BU_P8V_OVERLOAD_2 (VPMSUM,      "vpmsum")
 BU_P8V_OVERLOAD_2 (VRLD,	"vrld")
 BU_P8V_OVERLOAD_2 (VSLD,	"vsld")
 BU_P8V_OVERLOAD_2 (VSRAD,	"vsrad")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index d45bc93b10a..5fc2b53adfe 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2937,6 +2937,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
   { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS,
     RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
   { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
@@ -4171,6 +4179,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
 
+  { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMW,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMD,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
   { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
   { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 9fd565286f2..03764aef740 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -53,6 +53,7 @@
 				 | OPTION_MASK_P8_VECTOR		\
 				 | OPTION_MASK_CRYPTO			\
 				 | OPTION_MASK_DIRECT_MOVE		\
+				 | OPTION_MASK_EFFICIENT_UNALIGNED_VSX	\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_QUAD_MEMORY		\
   				 | OPTION_MASK_QUAD_MEMORY_ATOMIC	\
@@ -78,6 +79,7 @@
 				 | OPTION_MASK_DFP			\
 				 | OPTION_MASK_DIRECT_MOVE		\
 				 | OPTION_MASK_DLMZB			\
+				 | OPTION_MASK_EFFICIENT_UNALIGNED_VSX	\
 				 | OPTION_MASK_FPRND			\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_ISEL			\
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 7262a151438..7be529fab49 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -54,6 +54,7 @@ extern const char *output_vec_const_move (rtx *);
 extern const char *rs6000_output_move_128bit (rtx *);
 extern bool rs6000_move_128bit_ok_p (rtx []);
 extern bool rs6000_split_128bit_ok_p (rtx []);
+extern void rs6000_expand_float128_convert (rtx, rtx, bool);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2a969782f26..8107bec8e6e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3716,6 +3716,45 @@ rs6000_option_override_internal (bool global_init_p)
   else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX)
     error ("-mfloat128-software requires VSX support");
 
+  /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
+     support. If we only have ISA 2.06 support, and the user did not specify
+     the switch, leave it set to -1 so the movmisalign patterns are enabled,
+     but we don't enable the full vectorization support  */
+  if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
+    TARGET_ALLOW_MOVMISALIGN = 1;
+
+  else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
+    {
+      if (TARGET_ALLOW_MOVMISALIGN > 0)
+	error ("-mallow-movmisalign requires -mvsx");
+
+      TARGET_ALLOW_MOVMISALIGN = 0;
+    }
+
+  /* Determine when unaligned vector accesses are permitted, and when
+     they are preferred over masked Altivec loads.  Note that if
+     TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
+     TARGET_EFFICIENT_UNALIGNED_VSX must be as well.  The converse is
+     not true.  */
+  if (TARGET_EFFICIENT_UNALIGNED_VSX)
+    {
+      if (!TARGET_VSX)
+	{
+	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+	    error ("-mefficient-unaligned-vsx requires -mvsx");
+
+	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+	}
+
+      else if (!TARGET_ALLOW_MOVMISALIGN)
+	{
+	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+	    error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
+
+	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+	}
+    }
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
 
@@ -4275,22 +4314,6 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
-  /* Determine when unaligned vector accesses are permitted, and when
-     they are preferred over masked Altivec loads.  Note that if
-     TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
-     TARGET_EFFICIENT_UNALIGNED_VSX must be as well.  The converse is
-     not true.  */
-  if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) {
-    if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8
-	&& TARGET_ALLOW_MOVMISALIGN != 0)
-      TARGET_EFFICIENT_UNALIGNED_VSX = 1;
-    else
-      TARGET_EFFICIENT_UNALIGNED_VSX = 0;
-  }
-
-  if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8)
-    TARGET_ALLOW_MOVMISALIGN = 1;
-
   /* Set the builtin mask of the various options used that could affect which
      builtins were used.  In the past we used target_flags, but we've run out
      of bits, and some options like SPE and PAIRED are no longer in
@@ -8462,7 +8485,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
      during expand.  */
   gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
 
-  /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+  /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
      V1TImode).  */
   if (mode == TImode || mode == V1TImode)
     {
@@ -18519,6 +18542,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
 	{
 	  unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
 	  unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+	  bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
+	  bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
 
 	  /* Don't allow 64-bit types to overlap with 128-bit types that take a
 	     single register under VSX because the scalar part of the register
@@ -18527,7 +18552,10 @@ rs6000_cannot_change_mode_class (machine_mode from,
 	     IEEE floating point can't overlap, and neither can small
 	     values.  */
 
-	  if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+	  if (to_float128_vector_p && from_float128_vector_p)
+	    return false;
+
+	  else if (to_float128_vector_p || from_float128_vector_p)
 	    return true;
 
 	  /* TDmode in floating-mode registers must always go into a register
@@ -18555,6 +18583,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
   if (TARGET_E500_DOUBLE
       && ((((to) == DFmode) + ((from) == DFmode)) == 1
 	  || (((to) == TFmode) + ((from) == TFmode)) == 1
+	  || (((to) == IFmode) + ((from) == IFmode)) == 1
+	  || (((to) == KFmode) + ((from) == KFmode)) == 1
 	  || (((to) == DDmode) + ((from) == DDmode)) == 1
 	  || (((to) == TDmode) + ((from) == TDmode)) == 1
 	  || (((to) == DImode) + ((from) == DImode)) == 1))
@@ -18751,13 +18781,7 @@ rs6000_output_move_128bit (rtx operands[])
 	return output_vec_const_move (operands);
     }
 
-  if (TARGET_DEBUG_ADDR)
-    {
-      fprintf (stderr, "\n===== Bad 128 bit move:\n");
-      debug_rtx (gen_rtx_SET (dest, src));
-    }
-
-  gcc_unreachable ();
+  fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
 }
 
 /* Validate a 128-bit move.  */
@@ -19801,6 +19825,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
 	      break;
 
 	    case TFmode:
+	    case IFmode:
+	    case KFmode:
 	      cmp = (flag_finite_math_only && !flag_trapping_math)
 		? gen_tsttfeq_gpr (compare_result, op0, op1)
 		: gen_cmptfeq_gpr (compare_result, op0, op1);
@@ -19828,6 +19854,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
 	      break;
 
 	    case TFmode:
+	    case IFmode:
+	    case KFmode:
 	      cmp = (flag_finite_math_only && !flag_trapping_math)
 		? gen_tsttfgt_gpr (compare_result, op0, op1)
 		: gen_cmptfgt_gpr (compare_result, op0, op1);
@@ -19855,6 +19883,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
 	      break;
 
 	    case TFmode:
+	    case IFmode:
+	    case KFmode:
 	      cmp = (flag_finite_math_only && !flag_trapping_math)
 		? gen_tsttflt_gpr (compare_result, op0, op1)
 		: gen_cmptflt_gpr (compare_result, op0, op1);
@@ -19892,6 +19922,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
 	      break;
 
 	    case TFmode:
+	    case IFmode:
+	    case KFmode:
 	      cmp = (flag_finite_math_only && !flag_trapping_math)
 		? gen_tsttfeq_gpr (compare_result2, op0, op1)
 		: gen_cmptfeq_gpr (compare_result2, op0, op1);
@@ -19914,14 +19946,117 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
 
       emit_insn (cmp);
     }
+
+  /* IEEE 128-bit support in VSX registers.  The comparison function (__cmpkf2)
+     returns 0..15 that is laid out the same way as the PowerPC CR register
+     would for a normal floating point comparison.  */
+  else if (FLOAT128_IEEE_P (mode))
+    {
+      rtx and_reg = gen_reg_rtx (SImode);
+      rtx dest = gen_reg_rtx (SImode);
+      rtx libfunc = optab_libfunc (cmp_optab, mode);
+      HOST_WIDE_INT mask_value = 0;
+
+      /* Values that __cmpkf2 returns.  */
+#define PPC_CMP_UNORDERED	0x1		/* isnan (a) || isnan (b).  */
+#define PPC_CMP_EQUAL		0x2		/* a == b.  */
+#define PPC_CMP_GREATER_THEN	0x4		/* a > b.  */
+#define PPC_CMP_LESS_THEN	0x8		/* a < b.  */
+
+      switch (code)
+	{
+	case EQ:
+	  mask_value = PPC_CMP_EQUAL;
+	  code = NE;
+	  break;
+
+	case NE:
+	  mask_value = PPC_CMP_EQUAL;
+	  code = EQ;
+	  break;
+
+	case GT:
+	  mask_value = PPC_CMP_GREATER_THEN;
+	  code = NE;
+	  break;
+
+	case GE:
+	  mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+	  code = NE;
+	  break;
+
+	case LT:
+	  mask_value = PPC_CMP_LESS_THEN;
+	  code = NE;
+	  break;
+
+	case LE:
+	  mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+	  code = NE;
+	  break;
+
+	case UNLE:
+	  mask_value = PPC_CMP_GREATER_THEN;
+	  code = EQ;
+	  break;
+
+	case UNLT:
+	  mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+	  code = EQ;
+	  break;
+
+	case UNGE:
+	  mask_value = PPC_CMP_LESS_THEN;
+	  code = EQ;
+	  break;
+
+	case UNGT:
+	  mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+	  code = EQ;
+	  break;
+
+	case UNEQ:
+	  mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+	  code = NE;
+
+	case LTGT:
+	  mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+	  code = EQ;
+	  break;
+
+	case UNORDERED:
+	  mask_value = PPC_CMP_UNORDERED;
+	  code = NE;
+	  break;
+
+	case ORDERED:
+	  mask_value = PPC_CMP_UNORDERED;
+	  code = EQ;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      gcc_assert (mask_value != 0);
+      and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2,
+					 op0, mode, op1, mode);
+
+      emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value)));
+      compare_result = gen_reg_rtx (CCmode);
+      comp_mode = CCmode;
+
+      emit_insn (gen_rtx_SET (compare_result,
+			      gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
+    }
+
   else
     {
       /* Generate XLC-compatible TFmode compare as PARALLEL with extra
 	 CLOBBERs to match cmptf_internal2 pattern.  */
       if (comp_mode == CCFPmode && TARGET_XL_COMPAT
-	  && GET_MODE (op0) == TFmode
-	  && !TARGET_IEEEQUAD
-	  && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
+	  && FLOAT128_IBM_P (GET_MODE (op0))
+	  && TARGET_HARD_FLOAT && TARGET_FPRS)
 	emit_insn (gen_rtx_PARALLEL (VOIDmode,
 	  gen_rtvec (10,
 		     gen_rtx_SET (compare_result,
@@ -19954,6 +20089,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
   /* Some kinds of FP comparisons need an OR operation;
      under flag_finite_math_only we don't bother.  */
   if (FLOAT_MODE_P (mode)
+      && !FLOAT128_IEEE_P (mode)
       && !flag_finite_math_only
       && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
       && (code == LE || code == GE
@@ -19993,6 +20129,68 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
 }
 
 
+/* Expand floating point conversion to/from __float128 and __ibm128.  */
+
+void
+rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
+{
+  machine_mode dest_mode = GET_MODE (dest);
+  machine_mode src_mode = GET_MODE (src);
+  convert_optab cvt = unknown_optab;
+  rtx libfunc = NULL_RTX;
+  rtx dest2;
+
+  if (dest_mode == src_mode)
+    gcc_unreachable ();
+
+  if (FLOAT128_IEEE_P (dest_mode))
+    {
+      if (src_mode == SFmode
+	  || src_mode == DFmode
+	  || FLOAT128_IBM_P (src_mode))
+	cvt = sext_optab;
+
+      else if (GET_MODE_CLASS (src_mode) == MODE_INT)
+	cvt = (unsigned_p) ? ufloat_optab : sfloat_optab;
+
+      else if (FLOAT128_IEEE_P (src_mode))
+	emit_move_insn (dest, gen_lowpart (dest_mode, src));
+
+      else
+	gcc_unreachable ();
+    }
+
+  else if (FLOAT128_IEEE_P (src_mode))
+    {
+      if (dest_mode == SFmode
+	  || dest_mode == DFmode
+	  || FLOAT128_IBM_P (dest_mode))
+	cvt = trunc_optab;
+
+      else if (GET_MODE_CLASS (dest_mode) == MODE_INT)
+	cvt = (unsigned_p) ? ufix_optab : sfix_optab;
+
+      else
+	gcc_unreachable ();
+    }
+
+  else
+    gcc_unreachable ();
+
+  gcc_assert (cvt != unknown_optab);
+  libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
+  gcc_assert (libfunc != NULL_RTX);
+
+  dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
+				   src_mode);
+
+  gcc_assert (dest != NULL_RTX);
+  if (!rtx_equal_p (dest, dest2))
+    emit_move_insn (dest, dest2);
+
+  return;
+}
+
 /* Emit the RTL for an sISEL pattern.  */
 
 void
@@ -22635,6 +22833,7 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
       || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 	  && decl
 	  && !DECL_EXTERNAL (decl)
+	  && !DECL_WEAK (decl)
 	  && (*targetm.binds_local_p) (decl))
       || (DEFAULT_ABI == ABI_V4
 	  && (!TARGET_SECURE_PLT
@@ -32921,6 +33120,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
   { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
   { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
+  { "efficient-unaligned-vsx",	OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
+								false, true  },
   { "fprnd",			OPTION_MASK_FPRND,		false, true  },
   { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
   { "htm",			OPTION_MASK_HTM,		false, true  },
@@ -34786,7 +34987,7 @@ class swap_web_entry : public web_entry_base
   /* A nonzero value indicates what kind of special handling for this
      insn is required if doublewords are swapped.  Undefined if
      is_swappable is not set.  */
-  unsigned int special_handling : 3;
+  unsigned int special_handling : 4;
   /* Set if the web represented by this entry cannot be optimized.  */
   unsigned int web_not_optimizable : 1;
   /* Set if this insn should be deleted.  */
@@ -34800,7 +35001,9 @@ enum special_handling_values {
   SH_NOSWAP_LD,
   SH_NOSWAP_ST,
   SH_EXTRACT,
-  SH_SPLAT
+  SH_SPLAT,
+  SH_XXPERMDI,
+  SH_CONCAT
 };
 
 /* Union INSN with all insns containing definitions that reach USE.
@@ -34992,6 +35195,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  *special = SH_EXTRACT;
 	  return 1;
 	}
+      /* An XXPERMDI is ok if we adjust the lanes.  Note that if the
+	 XXPERMDI is a swap operation, it will be identified by
+	 insn_is_swap_p and therefore we won't get here.  */
+      else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
+	       && (GET_MODE (XEXP (op, 0)) == V4DFmode
+		   || GET_MODE (XEXP (op, 0)) == V4DImode)
+	       && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+	       && XVECLEN (parallel, 0) == 2
+	       && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
+	       && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
+	{
+	  *special = SH_XXPERMDI;
+	  return 1;
+	}
       else
 	return 0;
 
@@ -35169,6 +35386,17 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
       return 1;
     }
 
+  /* A concatenation of two doublewords is ok if we reverse the
+     order of the inputs.  */
+  if (GET_CODE (body) == SET
+      && GET_CODE (SET_SRC (body)) == VEC_CONCAT
+      && (GET_MODE (SET_SRC (body)) == V2DFmode
+	  || GET_MODE (SET_SRC (body)) == V2DImode))
+    {
+      *special = SH_CONCAT;
+      return 1;
+    }
+
   /* Otherwise check the operands for vector lane violations.  */
   return rtx_is_swappable_p (body, special);
 }
@@ -35458,6 +35686,49 @@ adjust_splat (rtx_insn *insn)
     fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
 }
 
+/* Given OP that contains an XXPERMDI operation (that is not a doubleword
+   swap), reverse the order of the source operands and adjust the indices
+   of the source lanes to account for doubleword reversal.  */
+static void
+adjust_xxpermdi (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  rtx select = XEXP (set, 1);
+  rtx concat = XEXP (select, 0);
+  rtx src0 = XEXP (concat, 0);
+  XEXP (concat, 0) = XEXP (concat, 1);
+  XEXP (concat, 1) = src0;
+  rtx parallel = XEXP (select, 1);
+  int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
+  int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
+  int new_lane0 = 3 - lane1;
+  int new_lane1 = 3 - lane0;
+  XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
+  XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a VEC_CONCAT operation of two doublewords,
+   reverse the order of those inputs.  */
+static void
+adjust_concat (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  rtx concat = XEXP (set, 1);
+  rtx src0 = XEXP (concat, 0);
+  XEXP (concat, 0) = XEXP (concat, 1);
+  XEXP (concat, 1) = src0;
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
+}
+
 /* The insn described by INSN_ENTRY[I] can be swapped, but only
    with special handling.  Take care of that here.  */
 static void
@@ -35504,6 +35775,14 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
       /* Change the lane on a direct-splat operation.  */
       adjust_splat (insn);
       break;
+    case SH_XXPERMDI:
+      /* Change the lanes on an XXPERMDI operation.  */
+      adjust_xxpermdi (insn);
+      break;
+    case SH_CONCAT:
+      /* Reverse the order of a concatenation operation.  */
+      adjust_concat (insn);
+      break;
     }
 }
 
@@ -35576,6 +35855,10 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
 	      fputs ("special:extract ", dump_file);
 	    else if (insn_entry[i].special_handling == SH_SPLAT)
 	      fputs ("special:splat ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_XXPERMDI)
+	      fputs ("special:xxpermdi ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_CONCAT)
+	      fputs ("special:concat ", dump_file);
 	  }
 	if (insn_entry[i].web_not_optimizable)
 	  fputs ("unoptimizable ", dump_file);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 527ad985423..cfdb286a2cb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -348,6 +348,8 @@
    && TARGET_HARD_FLOAT
    && (TARGET_FPRS || TARGET_E500_DOUBLE)
    && TARGET_LONG_DOUBLE_128")
+  (IF "TARGET_FLOAT128")
+  (KF "TARGET_FLOAT128")
   (DD "TARGET_DFP")
   (TD "TARGET_DFP")])
 
@@ -365,9 +367,14 @@
 (define_mode_iterator FMOVE32 [SF SD])
 (define_mode_iterator FMOVE64 [DF DD])
 (define_mode_iterator FMOVE64X [DI DF DD])
-(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
+(define_mode_iterator FMOVE128 [(TF "TARGET_LONG_DOUBLE_128")
+				(IF "TARGET_LONG_DOUBLE_128")
 				(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
 
+(define_mode_iterator FMOVE128_FPR [(TF "FLOAT128_2REG_P (TFmode)")
+				    (IF "FLOAT128_2REG_P (IFmode)")
+				    (TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+
 ; Iterators for 128 bit types for direct move
 (define_mode_iterator FMOVE128_GPR [(TI    "TARGET_VSX_TIMODE")
 				    (V16QI "")
@@ -376,7 +383,13 @@
 				    (V4SF  "")
 				    (V2DI  "")
 				    (V2DF  "")
-				    (V1TI  "")])
+				    (V1TI  "")
+				    (KF    "")
+				    (TF    "")
+				    (IF    "")])
+
+; Iterator for 128-bit VSX types for pack/unpack
+(define_mode_iterator FMOVE128_VSX [V1TI KF])
 
 ; Whether a floating point move is ok, don't allow SD without hardware FP
 (define_mode_attr fmove_ok [(SF "")
@@ -432,6 +445,25 @@
 ; Iterator for just SF/DF
 (define_mode_iterator SFDF [SF DF])
 
+; Iterator for float128 floating conversions
+(define_mode_iterator FLOAT128_SFDFTF [
+    (SF "TARGET_FLOAT128")
+    (DF "TARGET_FLOAT128")
+    (TF "FLOAT128_IBM_P (TFmode)")
+    (IF "TARGET_FLOAT128")])
+
+; Iterator for special 128-bit floating point.  This is for non-default
+; conversions, so TFmode is not used here.
+(define_mode_iterator IFKF [IF KF])
+
+; Iterator for 128-bit floating point that uses the IBM double-double format
+(define_mode_iterator IBM128 [IF TF])
+
+; Iterator for 128-bit floating point
+(define_mode_iterator TFIFKF [(KF "TARGET_FLOAT128")
+			      (IF "TARGET_FLOAT128")
+			      (TF "TARGET_LONG_DOUBLE_128")])
+
 ; SF/DF suffix for traditional floating instructions
 (define_mode_attr Ftrad		[(SF "s") (DF "")])
 
@@ -596,7 +628,7 @@
 ;; Reload iterator for creating the function to allocate a base register to
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
-			      SF SD SI DF DD DI TI PTI])
+			      SF SD SI DF DD DI TI PTI KF IF TF])
 
 
 ;; Start with fixed-point load and store insns.  Here we put only the more
@@ -3037,15 +3069,15 @@
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
-(define_insn_and_split "*and<mode>3_imm_dot_shifted"
-  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+(define_insn "*and<mode>3_imm_dot_shifted"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
 	(compare:CC
 	  (and:GPR
-	    (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
-			  (match_operand:SI 4 "const_int_operand" "n,n"))
-	    (match_operand:GPR 2 "const_int_operand" "n,n"))
+	    (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r")
+			  (match_operand:SI 4 "const_int_operand" "n"))
+	    (match_operand:GPR 2 "const_int_operand" "n"))
 	  (const_int 0)))
-   (clobber (match_scratch:GPR 0 "=r,r"))]
+   (clobber (match_scratch:GPR 0 "=r"))]
   "logical_const_operand (GEN_INT (UINTVAL (operands[2])
 				   << INTVAL (operands[4])),
 			  DImode)
@@ -3054,23 +3086,10 @@
    && rs6000_gen_cell_microcode"
 {
   operands[2] = GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4]));
-  if (which_alternative == 0)
-    return "andi%e2. %0,%1,%u2";
-  else
-    return "#";
+  return "andi%e2. %0,%1,%u2";
 }
-  "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
-  [(set (match_dup 0)
-	(and:GPR (lshiftrt:GPR (match_dup 1)
-			       (match_dup 4))
-		 (match_dup 2)))
-   (set (match_dup 3)
-	(compare:CC (match_dup 0)
-		    (const_int 0)))]
-  ""
   [(set_attr "type" "logical")
-   (set_attr "dot" "yes")
-   (set_attr "length" "4,8")])
+   (set_attr "dot" "yes")])
 
 
 (define_insn "and<mode>3_mask"
@@ -3664,10 +3683,10 @@
 ; an insert instruction, in many cases.
 (define_insn_and_split "*ior<mode>_mask"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
-	(ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
-		 (match_operand:GPR 2 "const_int_operand" "n")))]
-  "can_create_pseudo_p ()
-   && !logical_const_operand (operands[2], <MODE>mode)
+	(ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "0")
+		 (match_operand:GPR 2 "const_int_operand" "n")))
+   (clobber (match_scratch:GPR 3 "=r"))]
+  "!logical_const_operand (operands[2], <MODE>mode)
    && rs6000_is_valid_mask (operands[2], NULL, NULL, <MODE>mode)"
   "#"
   "&& 1"
@@ -3682,7 +3701,8 @@
 {
   int nb, ne;
   rs6000_is_valid_mask (operands[2], &nb, &ne, <MODE>mode);
-  operands[3] = gen_reg_rtx (<MODE>mode);
+  if (GET_CODE (operands[3]) == SCRATCH)
+    operands[3] = gen_reg_rtx (<MODE>mode);
   operands[4] = GEN_INT (ne);
   operands[5] = GEN_INT (~UINTVAL (operands[2]));
 }
@@ -4216,19 +4236,18 @@
 ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
 ;; builtins.c and optabs.c that are not correct for IBM long double
 ;; when little-endian.
-(define_expand "signbittf2"
+(define_expand "signbit<mode>2"
   [(set (match_dup 2)
-	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+	(float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "")))
    (set (match_dup 3)
    	(subreg:DI (match_dup 2) 0))
    (set (match_dup 4)
    	(match_dup 5))
    (set (match_operand:SI 0 "gpc_reg_operand" "")
   	(match_dup 6))]
-  "!TARGET_IEEEQUAD
+  "FLOAT128_IBM_P (<MODE>mode)
    && TARGET_HARD_FLOAT
-   && (TARGET_FPRS || TARGET_E500_DOUBLE)
-   && TARGET_LONG_DOUBLE_128"
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)"
 {
   operands[2] = gen_reg_rtx (DFmode);
   operands[3] = gen_reg_rtx (DImode);
@@ -6402,9 +6421,10 @@
 ;; problematical.  Don't allow direct move for this case.
 
 (define_insn_and_split "*mov<mode>_64bit_dm"
-  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm")
-	(match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))]
+  [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm")
+	(match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
+   && FLOAT128_2REG_P (<MODE>mode)
    && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -6427,9 +6447,12 @@
   [(set_attr "length" "8,8,8,8,12,12,8")])
 
 (define_insn_and_split "*mov<mode>_32bit"
-  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
-	(match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r"))]
+  [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
+	(match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
+   && (FLOAT128_2REG_P (<MODE>mode)
+       || int_reg_operand_not_pseudo (operands[0], <MODE>mode)
+       || int_reg_operand_not_pseudo (operands[1], <MODE>mode))
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
   "#"
@@ -6453,12 +6476,12 @@
 (define_expand "extenddftf2"
   [(set (match_operand:TF 0 "nonimmediate_operand" "")
 	(float_extend:TF (match_operand:DF 1 "input_operand" "")))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT
-   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+  "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)
    && TARGET_LONG_DOUBLE_128"
 {
-  if (TARGET_E500_DOUBLE)
+  if (TARGET_IEEEQUAD)
+    rs6000_expand_float128_convert (operands[0], operands[1], false);
+  else if (TARGET_E500_DOUBLE)
     emit_insn (gen_spe_extenddftf2 (operands[0], operands[1]));
   else
     emit_insn (gen_extenddftf2_fprs (operands[0], operands[1]));
@@ -6507,25 +6530,34 @@
 (define_expand "extendsftf2"
   [(set (match_operand:TF 0 "nonimmediate_operand" "")
 	(float_extend:TF (match_operand:SF 1 "gpc_reg_operand" "")))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT
+  "TARGET_HARD_FLOAT
    && (TARGET_FPRS || TARGET_E500_DOUBLE)
    && TARGET_LONG_DOUBLE_128"
 {
-  rtx tmp = gen_reg_rtx (DFmode);
-  emit_insn (gen_extendsfdf2 (tmp, operands[1]));
-  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  if (TARGET_IEEEQUAD)
+    rs6000_expand_float128_convert (operands[0], operands[1], false);
+  else
+    {
+      rtx tmp = gen_reg_rtx (DFmode);
+      emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+      emit_insn (gen_extenddftf2 (operands[0], tmp));
+    }
   DONE;
 })
 
 (define_expand "trunctfdf2"
   [(set (match_operand:DF 0 "gpc_reg_operand" "")
 	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT
+  "TARGET_HARD_FLOAT
    && (TARGET_FPRS || TARGET_E500_DOUBLE)
    && TARGET_LONG_DOUBLE_128"
-  "")
+{
+  if (TARGET_IEEEQUAD)
+    {
+      rs6000_expand_float128_convert (operands[0], operands[1], false);
+      DONE;
+    }
+})
 
 (define_insn_and_split "trunctfdf2_internal1"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d")
@@ -6556,12 +6588,13 @@
 (define_expand "trunctfsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
 	(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "")))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT
+  "TARGET_HARD_FLOAT
    && (TARGET_FPRS || TARGET_E500_DOUBLE)
    && TARGET_LONG_DOUBLE_128"
 {
-  if (TARGET_E500_DOUBLE)
+  if (TARGET_IEEEQUAD)
+    rs6000_expand_float128_convert (operands[0], operands[1], false);
+  else if (TARGET_E500_DOUBLE)
     emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1]));
   else
     emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1]));
@@ -6612,10 +6645,12 @@
 (define_expand "fix_trunctfsi2"
   [(set (match_operand:SI 0 "gpc_reg_operand" "")
 	(fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))]
-  "!TARGET_IEEEQUAD && TARGET_HARD_FLOAT
+  "TARGET_HARD_FLOAT
    && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128"
 {
-  if (TARGET_E500_DOUBLE)
+  if (TARGET_IEEEQUAD)
+    rs6000_expand_float128_convert (operands[0], operands[1], false);
+  else if (TARGET_E500_DOUBLE)
     emit_insn (gen_spe_fix_trunctfsi2 (operands[0], operands[1]));
   else
     emit_insn (gen_fix_trunctfsi2_fprs (operands[0], operands[1]));
@@ -6663,20 +6698,73 @@
   DONE;
 })
 
-(define_expand "negtf2"
-  [(set (match_operand:TF 0 "gpc_reg_operand" "")
-	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT
-   && (TARGET_FPRS || TARGET_E500_DOUBLE)
-   && TARGET_LONG_DOUBLE_128"
-  "")
+(define_expand "fix_trunctfdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(fix:DI (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "fixuns_trunctf<mode>2"
+  [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+	(unsigned_fix:SDI (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], true);
+  DONE;
+})
+
+(define_expand "floatditf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float:TF (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "floatuns<mode>tf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(unsigned_float:TF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], true);
+  DONE;
+})
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "")
+	(neg:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))]
+  "FLOAT128_IEEE_P (<MODE>mode)
+   || (FLOAT128_IBM_P (<MODE>mode)
+       && TARGET_HARD_FLOAT
+       && (TARGET_FPRS || TARGET_E500_DOUBLE))"
+  "
+{
+  if (FLOAT128_IEEE_P (<MODE>mode))
+    {
+      if (TARGET_FLOAT128)
+	emit_insn (gen_ieee_128bit_vsx_neg<mode>2 (operands[0], operands[1]));
+      else
+	{
+	  rtx libfunc = optab_libfunc (neg_optab, <MODE>mode);
+	  rtx target = emit_library_call_value (libfunc, operands[0], LCT_CONST,
+						<MODE>mode, 1,
+						operands[1], <MODE>mode);
+
+	  if (target && !rtx_equal_p (target, operands[0]))
+	    emit_move_insn (operands[0], target);
+	}
+      DONE;
+    }
+}")
 
 (define_insn "negtf2_internal"
   [(set (match_operand:TF 0 "gpc_reg_operand" "=d")
 	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "d")))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+  "TARGET_HARD_FLOAT && TARGET_FPRS && FLOAT128_IBM_P (TFmode)"
   "*
 {
   if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
@@ -6687,16 +6775,29 @@
   [(set_attr "type" "fp")
    (set_attr "length" "8")])
 
-(define_expand "abstf2"
-  [(set (match_operand:TF 0 "gpc_reg_operand" "")
-	(abs:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT
-   && (TARGET_FPRS || TARGET_E500_DOUBLE)
-   && TARGET_LONG_DOUBLE_128"
+(define_expand "abs<mode>2"
+  [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "")
+	(abs:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))]
+  "FLOAT128_IEEE_P (<MODE>mode)
+   || (FLOAT128_IBM_P (<MODE>mode)
+       && TARGET_HARD_FLOAT
+       && (TARGET_FPRS || TARGET_E500_DOUBLE))"
   "
 {
-  rtx label = gen_label_rtx ();
+  rtx label;
+
+  if (FLOAT128_IEEE_P (<MODE>mode))
+    {
+      if (TARGET_FLOAT128)
+	{
+	  emit_insn (gen_ieee_128bit_vsx_abs<mode>2 (operands[0], operands[1]));
+	  DONE;
+	}
+      else
+	FAIL;
+    }
+
+  label = gen_label_rtx ();
   if (TARGET_E500_DOUBLE)
     {
       if (flag_finite_math_only && !flag_trapping_math)
@@ -6732,6 +6833,184 @@
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
   operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
 }")
+
+
+;; Generate IEEE 128-bit -0.0 (0x80000000000000000000000000000000) in a vector
+;; register
+
+(define_expand "ieee_128bit_negative_zero"
+  [(set (match_operand:V16QI 0 "register_operand" "") (match_dup 1))]
+  "TARGET_FLOAT128"
+{
+  rtvec v = rtvec_alloc (16);
+  int i, high;
+
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (v, i) = const0_rtx;
+
+  high = (BYTES_BIG_ENDIAN) ? 0 : 15;
+  RTVEC_ELT (v, high) = GEN_INT (0x80);
+
+  rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v));
+  DONE;
+})
+
+;; IEEE 128-bit negate
+
+;; We have 2 insns here for negate and absolute value.  The first uses
+;; match_scratch so that phases like combine can recognize neg/abs as generic
+;; insns, and second insn after the first split pass loads up the bit to
+;; twiddle the sign bit.  Later GCSE passes can then combine multiple uses of
+;; neg/abs to create the constant just once.
+
+(define_insn_and_split "ieee_128bit_vsx_neg<mode>2"
+  [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+	(neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+   (clobber (match_scratch:V16QI 2 "=v"))]
+  "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (neg:TFIFKF (match_dup 1)))
+	      (use (match_dup 2))])]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (V16QImode);
+
+  operands[3] = gen_reg_rtx (V16QImode);
+  emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_neg<mode>2_internal"
+  [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+	(neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+   (use (match_operand:V16QI 2 "register_operand" "=v"))]
+  "TARGET_FLOAT128"
+  "xxlxor %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+;; IEEE 128-bit absolute value
+(define_insn_and_split "ieee_128bit_vsx_abs<mode>2"
+  [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+	(abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+   (clobber (match_scratch:V16QI 2 "=v"))]
+  "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (abs:TFIFKF (match_dup 1)))
+	      (use (match_dup 2))])]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (V16QImode);
+
+  operands[3] = gen_reg_rtx (V16QImode);
+  emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_abs<mode>2_internal"
+  [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+	(abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+   (use (match_operand:V16QI 2 "register_operand" "=v"))]
+  "TARGET_FLOAT128"
+  "xxlandc %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+;; IEEE 128-bit negative absolute value
+(define_insn_and_split "*ieee_128bit_vsx_nabs<mode>2"
+  [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+	(neg:TFIFKF
+	 (abs:TFIFKF
+	  (match_operand:TFIFKF 1 "register_operand" "wa"))))
+   (clobber (match_scratch:V16QI 2 "=v"))]
+  "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (abs:TFIFKF (match_dup 1)))
+	      (use (match_dup 2))])]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (V16QImode);
+
+  operands[3] = gen_reg_rtx (V16QImode);
+  emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_nabs<mode>2_internal"
+  [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+	(neg:TFIFKF
+	 (abs:TFIFKF
+	  (match_operand:TFIFKF 1 "register_operand" "wa"))))
+   (use (match_operand:V16QI 2 "register_operand" "=v"))]
+  "TARGET_FLOAT128"
+  "xxlor %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+;; Float128 conversion functions.  These expand to library function calls.
+
+(define_expand "extend<FLOAT128_SFDFTF:mode><IFKF:mode>2"
+  [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+	(float_extend:IFKF
+	 (match_operand:FLOAT128_SFDFTF 1 "gpc_reg_operand" "")))]
+  "TARGET_FLOAT128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "trunc<IFKF:mode><FLOAT128_SFDFTF:mode>2"
+  [(set (match_operand:FLOAT128_SFDFTF 0 "nonimmediate_operand" "")
+	(float_truncate:FLOAT128_SFDFTF
+	 (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+  "TARGET_FLOAT128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "fix_trunc<IFKF:mode><SDI:mode>2"
+  [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+	(fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+  "TARGET_FLOAT128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "fixuns_trunc<IFKF:mode><SDI:mode>2"
+  [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+	(unsigned_fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+  "TARGET_FLOAT128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], true);
+  DONE;
+})
+
+(define_expand "float<SDI:mode><IFKF:mode>2"
+  [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+	(float:KF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  "TARGET_FLOAT128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "floatuns<SDI:mode><IFKF:mode>2"
+  [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+	(unsigned_float:IFKF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  "TARGET_FLOAT128"
+{
+  rs6000_expand_float128_convert (operands[0], operands[1], true);
+  DONE;
+})
+
 
 ;; Reload helper functions used by rs6000_secondary_reload.  The patterns all
 ;; must have 3 arguments, and scratch register constraint must be a single
@@ -9516,7 +9795,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, const0_rtx, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
@@ -12134,7 +12413,10 @@
 ;; Pack/unpack 128-bit floating point types that take 2 scalar registers
 
 ; Type of the 64-bit part when packing/unpacking 128-bit floating point types
-(define_mode_attr FP128_64 [(TF "DF") (TD "DI")])
+(define_mode_attr FP128_64 [(TF "DF")
+			    (IF "DF")
+			    (TD "DI")
+			    (KF "DI")])
 
 (define_expand "unpack<mode>"
   [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "")
@@ -12142,7 +12424,7 @@
 	 [(match_operand:FMOVE128 1 "register_operand" "")
 	  (match_operand:QI 2 "const_0_to_1_operand" "")]
 	 UNSPEC_UNPACK_128BIT))]
-  ""
+  "FLOAT128_2REG_P (<MODE>mode)"
   "")
 
 (define_insn_and_split "unpack<mode>_dm"
@@ -12151,7 +12433,7 @@
 	 [(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r")
 	  (match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")]
 	 UNSPEC_UNPACK_128BIT))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && FLOAT128_2REG_P (<MODE>mode)"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 3))]
@@ -12175,7 +12457,7 @@
 	 [(match_operand:FMOVE128 1 "register_operand" "d,d")
 	  (match_operand:QI 2 "const_0_to_1_operand" "i,i")]
 	 UNSPEC_UNPACK_128BIT))]
-  "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE"
+  "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (<MODE>mode)"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 3))]
@@ -12199,7 +12481,7 @@
 	 [(match_operand:<FP128_64> 1 "register_operand" "0,d")
 	  (match_operand:<FP128_64> 2 "register_operand" "d,d")]
 	 UNSPEC_PACK_128BIT))]
-  ""
+  "FLOAT128_2REG_P (<MODE>mode)"
   "@
    fmr %L0,%2
    #"
@@ -12219,12 +12501,12 @@
   [(set_attr "type" "fp,fp")
    (set_attr "length" "4,8")])
 
-(define_insn "unpackv1ti"
+(define_insn "unpack<mode>"
   [(set (match_operand:DI 0 "register_operand" "=d,d")
-	(unspec:DI [(match_operand:V1TI 1 "register_operand" "0,wa")
+	(unspec:DI [(match_operand:FMOVE128_VSX 1 "register_operand" "0,wa")
 		    (match_operand:QI 2 "const_0_to_1_operand" "O,i")]
 	 UNSPEC_UNPACK_128BIT))]
-  "TARGET_VSX"
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0)
     return ASM_COMMENT_START " xxpermdi to same register";
@@ -12232,19 +12514,17 @@
   operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3);
   return "xxpermdi %x0,%x1,%x1,%3";
 }
-  [(set_attr "type" "vecperm")
-   (set_attr "length" "4")])
+  [(set_attr "type" "vecperm")])
 
-(define_insn "packv1ti"
-  [(set (match_operand:V1TI 0 "register_operand" "=wa")
-	(unspec:V1TI
+(define_insn "pack<mode>"
+  [(set (match_operand:FMOVE128_VSX 0 "register_operand" "=wa")
+	(unspec:FMOVE128_VSX
 	 [(match_operand:DI 1 "register_operand" "d")
 	  (match_operand:DI 2 "register_operand" "d")]
 	 UNSPEC_PACK_128BIT))]
   "TARGET_VSX"
   "xxpermdi %x0,%x1,%x2,0"
-  [(set_attr "type" "vecperm")
-   (set_attr "length" "4")])
+  [(set_attr "type" "vecperm")])
 
 
 
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 18ea27a3d90..6d11ff7dfdb 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -212,7 +212,7 @@ Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) Save
 ; Allow/disallow the movmisalign in DF/DI vectors
 
 mefficient-unaligned-vector
-Target Undocumented Report Var(TARGET_EFFICIENT_UNALIGNED_VSX) Init(-1)
+Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags)
 ; Consider unaligned VSX accesses to be efficient/inefficient
 
 mallow-df-permute
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index 24618e309f1..f48af43e7c5 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
 %{R*} \
 %(link_shlib) \
 %{!T*: %(link_start) } \
-%(link_target) \
 %(link_os)"
 
 /* Shared libraries are not default.  */
@@ -584,10 +583,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
 %{shared:-G -dy -z text } \
 %{symbolic:-Bsymbolic -G -dy -z text }"
 
-/* Override the default target of the linker.  */
-#define	LINK_TARGET_SPEC \
-  ENDIAN_SELECT("", " --oformat elf32-powerpcle", "")
-
 /* Any specific OS flags.  */
 #define LINK_OS_SPEC "\
 %{mads         : %(link_os_ads)         ; \
@@ -873,7 +868,6 @@ ncrtn.o%s"
   { "endfile_openbsd",		ENDFILE_OPENBSD_SPEC },			\
   { "endfile_default",		ENDFILE_DEFAULT_SPEC },			\
   { "link_shlib",		LINK_SHLIB_SPEC },			\
-  { "link_target",		LINK_TARGET_SPEC },			\
   { "link_start",		LINK_START_SPEC },			\
   { "link_start_ads",		LINK_START_ADS_SPEC },			\
   { "link_start_yellowknife",	LINK_START_YELLOWKNIFE_SPEC },		\
diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h
index 7b1d6a1b4de..66ee7cadfe4 100644
--- a/gcc/config/rs6000/sysv4le.h
+++ b/gcc/config/rs6000/sysv4le.h
@@ -25,10 +25,6 @@
 #undef	DEFAULT_ASM_ENDIAN
 #define	DEFAULT_ASM_ENDIAN " -mlittle"
 
-#undef	LINK_TARGET_SPEC
-#define	LINK_TARGET_SPEC \
-  ENDIAN_SELECT(" --oformat elf32-powerpc", "", "")
-
 #undef	MULTILIB_DEFAULTS
 #define	MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
 
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 4a62fbbbdd4..8821dec5989 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -977,6 +977,8 @@
 ;; General shift amounts can be supported using vsro + vsr. We're
 ;; not expecting to see these yet (the vectorizer currently
 ;; generates only shifts by a whole number of vector elements).
+;; Note that the vec_shr operation is actually defined as 
+;; 'shift toward element 0' so is a shr for LE and shl for BE.
 (define_expand "vec_shr_<mode>"
   [(match_operand:VEC_L 0 "vlogical_operand" "")
    (match_operand:VEC_L 1 "vlogical_operand" "")
@@ -987,6 +989,7 @@
   rtx bitshift = operands[2];
   rtx shift;
   rtx insn;
+  rtx zero_reg, op1, op2;
   HOST_WIDE_INT bitshift_val;
   HOST_WIDE_INT byteshift_val;
 
@@ -996,19 +999,29 @@
   if (bitshift_val & 0x7)
     FAIL;
   byteshift_val = (bitshift_val >> 3);
+  zero_reg = gen_reg_rtx (<MODE>mode);
+  emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode));
   if (!BYTES_BIG_ENDIAN)
-    byteshift_val = 16 - byteshift_val;
+    {
+      byteshift_val = 16 - byteshift_val;
+      op1 = zero_reg;
+      op2 = operands[1];
+    }
+  else
+    {
+      op1 = operands[1];
+      op2 = zero_reg;
+    }
+
   if (TARGET_VSX && (byteshift_val & 0x3) == 0)
     {
       shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
-      insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
-				     shift);
+      insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift);
     }
   else
     {
       shift = gen_rtx_CONST_INT (QImode, byteshift_val);
-      insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
-					shift);
+      insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift);
     }
 
   emit_insn (insn);
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 8b124759b0b..6faf7719a9a 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -2315,7 +2315,7 @@
   
     emit_move_insn (str1, force_operand (XEXP (operands[1], 0), NULL_RTX));
     emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
-    emit_move_insn (len, force_operand (operands[3], NULL_RTX));
+    emit_move_insn (len, operands[3]);
 
     emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
     DONE;
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index 0a24da9bcb1..b267b04e2a7 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -438,15 +438,15 @@ B_DEF      (s390_vllezf,                vec_insert_and_zerov4si,0,
 B_DEF      (s390_vllezg,                vec_insert_and_zerov2di,0,              B_VX,               0,                  BT_FN_UV2DI_ULONGLONGCONSTPTR)
 
 OB_DEF     (s390_vec_load_bndry,        s390_vec_load_bndry_s8,s390_vec_load_bndry_dbl,B_VX,        BT_FN_OV4SI_INTCONSTPTR_INT)
-OB_DEF_VAR (s390_vec_load_bndry_s8,     s390_vlbb,          O2_U3,              BT_OV_V16QI_SCHARCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u8,     s390_vlbb,          O2_U3,              BT_OV_UV16QI_UCHARCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_s16,    s390_vlbb,          O2_U3,              BT_OV_V8HI_SHORTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u16,    s390_vlbb,          O2_U3,              BT_OV_UV8HI_USHORTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_s32,    s390_vlbb,          O2_U3,              BT_OV_V4SI_INTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u32,    s390_vlbb,          O2_U3,              BT_OV_UV4SI_UINTCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_s64,    s390_vlbb,          O2_U3,              BT_OV_V2DI_LONGLONGCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_u64,    s390_vlbb,          O2_U3,              BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT)
-OB_DEF_VAR (s390_vec_load_bndry_dbl,    s390_vlbb,          O2_U3,              BT_OV_V2DF_DBLCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s8,     s390_vlbb,          O2_U16,              BT_OV_V16QI_SCHARCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u8,     s390_vlbb,          O2_U16,              BT_OV_UV16QI_UCHARCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s16,    s390_vlbb,          O2_U16,              BT_OV_V8HI_SHORTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u16,    s390_vlbb,          O2_U16,              BT_OV_UV8HI_USHORTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s32,    s390_vlbb,          O2_U16,              BT_OV_V4SI_INTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u32,    s390_vlbb,          O2_U16,              BT_OV_UV4SI_UINTCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s64,    s390_vlbb,          O2_U16,              BT_OV_V2DI_LONGLONGCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u64,    s390_vlbb,          O2_U16,              BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_dbl,    s390_vlbb,          O2_U16,              BT_OV_V2DF_DBLCONSTPTR_USHORT)
 
 B_DEF      (s390_vlbb,                  vlbb,               0,                  B_VX,               O2_U3,              BT_FN_UV16QI_UCHARCONSTPTR_USHORT)
 
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 5814694adbc..cbfc80073c9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2258,23 +2258,14 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
 {
   unsigned HOST_WIDE_INT mask;
   int length, size;
+  rtx elt;
 
-  if (!VECTOR_MODE_P (GET_MODE (op))
-      || GET_CODE (op) != CONST_VECTOR
-      || !CONST_INT_P (XVECEXP (op, 0, 0)))
+  if (!const_vec_duplicate_p (op, &elt)
+      || !CONST_INT_P (elt))
     return false;
 
-  if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
-    {
-      int i;
-
-      for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
-	if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
-	  return false;
-    }
-
   size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
-  mask = UINTVAL (XVECEXP (op, 0, 0));
+  mask = UINTVAL (elt);
   if (s390_contiguous_bitmask_p (mask, size, start,
 				 end != NULL ? &length : NULL))
     {
@@ -10360,6 +10351,7 @@ s390_emit_prologue (void)
 		       current_function_name(), cfun_frame_layout.frame_size,
 		       s390_stack_size);
 	      emit_insn (gen_trap ());
+	      emit_barrier ();
 	    }
 	  else
 	    {
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index 5a552e2be81..3e4211be4de 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -159,6 +159,7 @@ extern int sh_eval_treg_value (rtx op);
 extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op);
 extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a);
 extern bool sh_movsf_ie_ra_split_p (rtx, rtx, rtx);
+extern void sh_expand_sym_label2reg (rtx, rtx, rtx, bool);
 
 /* Result value of sh_find_set_of_reg.  */
 struct set_of_reg
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 450d634e246..1442b7fc790 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -1604,6 +1604,10 @@ sh_asm_output_addr_const_extra (FILE *file, rtx x)
 	  output_addr_const (file, XVECEXP (x, 0, 0));
 	  fputs ("@GOTPLT", file);
 	  break;
+	case UNSPEC_PCREL:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PCREL", file);
+	  break;
 	case UNSPEC_DTPOFF:
 	  output_addr_const (file, XVECEXP (x, 0, 0));
 	  fputs ("@DTPOFF", file);
@@ -10441,6 +10445,7 @@ nonpic_symbol_mentioned_p (rtx x)
 	  || XINT (x, 1) == UNSPEC_DTPOFF
 	  || XINT (x, 1) == UNSPEC_TPOFF
 	  || XINT (x, 1) == UNSPEC_PLT
+	  || XINT (x, 1) == UNSPEC_PCREL
 	  || XINT (x, 1) == UNSPEC_SYMOFF
 	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
     return false;
@@ -10714,7 +10719,8 @@ sh_delegitimize_address (rtx orig_x)
 		  rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
 
 		  if (GET_CODE (symplt) == UNSPEC
-		      && XINT (symplt, 1) == UNSPEC_PLT)
+		      && (XINT (symplt, 1) == UNSPEC_PLT
+			  || XINT (symplt, 1) == UNSPEC_PCREL))
 		    return XVECEXP (symplt, 0, 0);
 		}
 	    }
@@ -11702,9 +11708,24 @@ sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
 	      || crtl->args.info.stack_regs == 0)
 	  && ! sh_cfun_interrupt_handler_p ()
 	  && (! flag_pic
-	      || (decl && ! TREE_PUBLIC (decl))
+	      || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
 	      || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
 }
+
+/* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P.  */
+void
+sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
+{
+  const_tree decl = SYMBOL_REF_DECL (sym);
+  bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
+
+  if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
+    emit_insn (gen_sym_label2reg (reg, sym, lab));
+  else if (sibcall_p)
+    emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
+  else
+    emit_insn (gen_symPLT_label2reg (reg, sym, lab));
+}
 
 /* Machine specific built-in functions.  */
 
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index ad49f72c68d..4e7cd169f84 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -135,6 +135,7 @@
   UNSPEC_PLT
   UNSPEC_CALLER
   UNSPEC_GOTPLT
+  UNSPEC_PCREL
   UNSPEC_ICACHE
   UNSPEC_INIT_TRAMP
   UNSPEC_FCOSA
@@ -9470,11 +9471,8 @@ label:
   [(const_int 0)]
 {
   rtx lab = PATTERN (gen_call_site ());
-
-  if (SYMBOL_REF_LOCAL_P (operands[0]))
-    emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
-  else
-    emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab));
+  
+  sh_expand_sym_label2reg (operands[2], operands[0], lab, false);
   emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab)));
   DONE;
 }
@@ -9605,10 +9603,7 @@ label:
 {
   rtx lab = PATTERN (gen_call_site ());
 
-  if (SYMBOL_REF_LOCAL_P (operands[1]))
-    emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
-  else
-    emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab));
+  sh_expand_sym_label2reg (operands[3], operands[1], lab, false);
   emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3],
 					 operands[2], copy_rtx (lab)));
   DONE;
@@ -10008,7 +10003,7 @@ label:
   rtx lab = PATTERN (gen_call_site ());
   rtx call_insn;
 
-  emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+  sh_expand_sym_label2reg (operands[2], operands[0], lab, true);
   call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1],
 						  copy_rtx (lab)));
   SIBLING_CALL_P (call_insn) = 1;
@@ -10200,7 +10195,7 @@ label:
   rtx lab = PATTERN (gen_call_site ());
   rtx call_insn;
 
-  emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+  sh_expand_sym_label2reg (operands[3], operands[1], lab, true);
   call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0],
 							operands[3],
 							operands[2],
@@ -10748,6 +10743,16 @@ label:
 			     UNSPEC_SYMOFF)))]
   "TARGET_SH1" "")
 
+(define_expand "symPCREL_label2reg"
+  [(set (match_operand:SI 0 "" "")
+	(const:SI
+	 (unspec:SI
+	  [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PCREL))
+	   (const:SI (plus:SI (match_operand:SI 2 "" "")
+			      (const_int 2)))] UNSPEC_PCREL_SYMOFF)))]
+  "TARGET_SH1"
+  "")
+
 (define_expand "symGOT_load"
   [(set (match_dup 2) (match_operand 1 "" ""))
    (set (match_dup 3) (plus (match_dup 2) (reg PIC_REG)))
@@ -12731,7 +12736,7 @@ label:
   [(set (match_operand:SI 0 "register_operand")
 	(compare:SI (match_operand:BLK 1 "memory_operand")
 		    (match_operand:BLK 2 "memory_operand")))
-   (use (match_operand:SI 3 "immediate_operand"))
+   (use (match_operand:SI 3 "nonmemory_operand"))
    (use (match_operand:SI 4 "immediate_operand"))]
   "TARGET_SH1 && optimize"
 {
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 9665ee6da9b..5b9f0517b90 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -6403,7 +6403,7 @@
 
   /* Pass constm1 to indicate that it may expect a structure value, but
      we don't know what size it is.  */
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, constm1_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, constm1_rtx));
 
   /* Save the function value registers.  */
   emit_move_insn (adjust_address (result, DImode, 0), valreg1);
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index ca762877a0f..05c81f5ed73 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -3185,11 +3185,8 @@ classify_immediate (rtx op, machine_mode mode)
       && mode == V4SImode
       && GET_CODE (op) == CONST_VECTOR
       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
-      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
-      && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
-      && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
-      && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
-    op = CONST_VECTOR_ELT (op, 0);
+      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
+    op = unwrap_const_vec_duplicate (op);
 
   switch (GET_CODE (op))
     {
@@ -3507,9 +3504,7 @@ spu_legitimate_constant_p (machine_mode mode, rtx x)
       && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
-    return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
-	   && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
-	   && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
+    return const_vec_duplicate_p (x);
 
   if (GET_CODE (x) == CONST_VECTOR
       && !const_vector_immediate_p (x))
diff --git a/gcc/config/tilegx/constraints.md b/gcc/config/tilegx/constraints.md
index 783e1ca98fe..f47d0f68296 100644
--- a/gcc/config/tilegx/constraints.md
+++ b/gcc/config/tilegx/constraints.md
@@ -96,21 +96,14 @@
   "An 8-element vector constant with identical elements"
   (and (match_code "const_vector")
        (match_test "CONST_VECTOR_NUNITS (op) == 8")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)")))
+       (match_test "const_vec_duplicate_p (op)")))
 
 (define_constraint "Y"
   "A 4-element vector constant with identical elements"
   (and (match_code "const_vector")
        (match_test "CONST_VECTOR_NUNITS (op) == 4")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))
+       (match_test "const_vec_duplicate_p (op)")))
+
 (define_constraint "Z0"
  "The integer constant 0xffffffff"
  (and (match_code "const_int")
diff --git a/gcc/config/tilegx/predicates.md b/gcc/config/tilegx/predicates.md
index 4cbebf18a91..ce04660f9ed 100644
--- a/gcc/config/tilegx/predicates.md
+++ b/gcc/config/tilegx/predicates.md
@@ -112,14 +112,8 @@
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_vector")
 	    (match_test "CONST_VECTOR_NUNITS (op) == 8
-                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 4)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 5)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 6)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 7)"))))
+                         && (satisfies_constraint_I
+			     (unwrap_const_vec_duplicate (op)))"))))
 
 ;; Return 1 if OP is a 4-element vector constant with identical signed
 ;; 8-bit elements or any register.
@@ -127,10 +121,8 @@
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_vector")
 	    (match_test "CONST_VECTOR_NUNITS (op) == 4
-                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))))
+                         && (satisfies_constraint_I
+			     (unwrap_const_vec_duplicate (op)))"))))
 
 ;; Return 1 if the operand is a valid second operand to an add insn.
 (define_predicate "add_operand"
diff --git a/gcc/config/tilegx/tilegx.md b/gcc/config/tilegx/tilegx.md
index 75322e16721..944953c34b2 100644
--- a/gcc/config/tilegx/tilegx.md
+++ b/gcc/config/tilegx/tilegx.md
@@ -2670,7 +2670,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
diff --git a/gcc/config/tilepro/constraints.md b/gcc/config/tilepro/constraints.md
index 4d13fb0640a..3ab9ab75650 100644
--- a/gcc/config/tilepro/constraints.md
+++ b/gcc/config/tilepro/constraints.md
@@ -90,12 +90,10 @@
   "A 4-element vector constant with identical elements"
   (and (match_code "const_vector")
        (match_test "CONST_VECTOR_NUNITS (op) == 4")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)")))
+       (match_test "const_vec_duplicate_p (op)")))
 
 (define_constraint "Y"
   "A 2-element vector constant with identical elements"
   (and (match_code "const_vector")
        (match_test "CONST_VECTOR_NUNITS (op) == 2")
-       (match_test "CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)")))
+       (match_test "const_vec_duplicate_p (op)")))
diff --git a/gcc/config/tilepro/predicates.md b/gcc/config/tilepro/predicates.md
index 00d2bb989cd..ab62d20731a 100644
--- a/gcc/config/tilepro/predicates.md
+++ b/gcc/config/tilepro/predicates.md
@@ -75,10 +75,8 @@
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_vector")
 	    (match_test "CONST_VECTOR_NUNITS (op) == 4
-                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 2)
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 3)"))))
+                         && (satisfies_constraint_I
+			     (unwrap_const_vec_duplicate (op)))"))))
 
 ;; Return 1 if OP is a 2-element vector constant with identical signed
 ;; 8-bit elements or any register.
@@ -86,8 +84,8 @@
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_vector")
 	    (match_test "CONST_VECTOR_NUNITS (op) == 2
-                         && satisfies_constraint_I (CONST_VECTOR_ELT (op, 0))
-                         && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)"))))
+                         && (satisfies_constraint_I
+			     (unwrap_const_vec_duplicate (op)))"))))
 
 ;; Return 1 if the operand is a valid second operand to an add insn.
 (define_predicate "add_operand"
diff --git a/gcc/config/tilepro/tilepro.md b/gcc/config/tilepro/tilepro.md
index a97ebf9eb22..b1e6b81e71f 100644
--- a/gcc/config/tilepro/tilepro.md
+++ b/gcc/config/tilepro/tilepro.md
@@ -1516,7 +1516,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
diff --git a/gcc/config/visium/visium.md b/gcc/config/visium/visium.md
index 969cb887a6c..370b6a4b5b2 100644
--- a/gcc/config/visium/visium.md
+++ b/gcc/config/visium/visium.md
@@ -2375,7 +2375,7 @@
 {
   int i;
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL));
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
     {
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2016-02-10 17:20:51 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2016-02-10 17:20:51 +0000
commit	2d9d01985a7a7866916fafa19c5c296702e69714 (patch)
tree	259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config
parent	c8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff)
download	gcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz