97 files changed, 1937 insertions, 502 deletions
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index b8d0ba6b69e..10893324d3f 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -65,8 +65,8 @@ AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
 AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
 
 /* Qualcomm ('Q') cores. */
-AARCH64_CORE("falkor",      falkor,    cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
-AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+AARCH64_CORE("falkor",      falkor,    falkor,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
+AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx,   0x51, 0xC00, -1)
 
 /* Samsung ('S') cores. */
 AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index c4f059ab7c5..a989a2ec23e 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -65,6 +65,6 @@ AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
 
 /* Enabling "rdma" also enables "fp", "simd".
    Disabling "rdma" just disables "rdma".  */
-AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "rdma")
+AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "asimdrdm")
 
 #undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 011fcec0795..f74b68775cf 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -351,6 +351,35 @@
   }
 )
 
+(define_expand "xorsign<mode>3"
+  [(match_operand:VHSDF 0 "register_operand")
+   (match_operand:VHSDF 1 "register_operand")
+   (match_operand:VHSDF 2 "register_operand")]
+  "TARGET_SIMD"
+{
+
+  machine_mode imode = <V_cmp_result>mode;
+  rtx v_bitmask = gen_reg_rtx (imode);
+  rtx op1x = gen_reg_rtx (imode);
+  rtx op2x = gen_reg_rtx (imode);
+
+  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
+  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
+
+  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+  emit_move_insn (v_bitmask,
+		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
+						     HOST_WIDE_INT_M1U << bits));
+
+  emit_insn (gen_and<v_cmp_result>3 (op2x, v_bitmask, arg2));
+  emit_insn (gen_xor<v_cmp_result>3 (op1x, arg1, op2x));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op1x, imode));
+  DONE;
+}
+)
+
 (define_expand "copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
@@ -5617,9 +5646,9 @@
   DONE;
 })
 
-;; Standard pattern name vec_init<mode>.
+;; Standard pattern name vec_init<mode><Vel>.
 
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><Vel>"
   [(match_operand:VALL_F16 0 "register_operand" "")
    (match_operand 1 "" "")]
   "TARGET_SIMD"
@@ -5674,9 +5703,9 @@
  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
 
-;; Standard pattern name vec_extract<mode>.
+;; Standard pattern name vec_extract<mode><Vel>.
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><Vel>"
   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
    (match_operand:VALL_F16 1 "register_operand" "")
    (match_operand:SI 2 "immediate_operand" "")]
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 055ebafb830..28c4e0e6476 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -33,6 +33,7 @@
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
@@ -3069,7 +3070,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
   reg = gen_rtx_REG (mode, regno);
   mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
 			    plus_constant (Pmode, base_rtx, -adjustment));
-  mem = gen_rtx_MEM (mode, mem);
+  mem = gen_frame_mem (mode, mem);
 
   insn = emit_move_insn (mem, reg);
   RTX_FRAME_RELATED_P (insn) = 1;
@@ -3157,7 +3158,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
     {
       rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
       mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
-      emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
+      emit_move_insn (reg1, gen_frame_mem (mode, mem));
     }
   else
     {
@@ -3233,8 +3234,6 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
 			   unsigned start, unsigned limit, bool skip_wb)
 {
   rtx_insn *insn;
-  rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
-						 ? gen_frame_mem : gen_rtx_MEM);
   unsigned regno;
   unsigned regno2;
 
@@ -3255,8 +3254,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
 
       reg = gen_rtx_REG (mode, regno);
       offset = start_offset + cfun->machine->frame.reg_offset[regno];
-      mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-					      offset));
+      mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
+						offset));
 
       regno2 = aarch64_next_callee_save (regno + 1, limit);
 
@@ -3270,8 +3269,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
 	  rtx mem2;
 
 	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-	  mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-						   offset));
+	  mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
+						     offset));
 	  insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
 						    reg2));
 
@@ -3300,8 +3299,6 @@ aarch64_restore_callee_saves (machine_mode mode,
 			      unsigned limit, bool skip_wb, rtx *cfi_ops)
 {
   rtx base_rtx = stack_pointer_rtx;
-  rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
-						 ? gen_frame_mem : gen_rtx_MEM);
   unsigned regno;
   unsigned regno2;
   HOST_WIDE_INT offset;
@@ -3322,7 +3319,7 @@ aarch64_restore_callee_saves (machine_mode mode,
 
       reg = gen_rtx_REG (mode, regno);
       offset = start_offset + cfun->machine->frame.reg_offset[regno];
-      mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
+      mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
 
       regno2 = aarch64_next_callee_save (regno + 1, limit);
 
@@ -3335,7 +3332,7 @@ aarch64_restore_callee_saves (machine_mode mode,
 	  rtx mem2;
 
 	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-	  mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
+	  mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
 	  emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
 
 	  *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
@@ -4733,9 +4730,14 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
 		  CONST_DOUBLE_REAL_VALUE (value),
 		  REAL_MODE_FORMAT (mode));
 
-  ival = zext_hwi (res[0], 32);
-  if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (DFmode))
-    ival |= (zext_hwi (res[1], 32) << 32);
+  if (mode == DFmode)
+    {
+      int order = BYTES_BIG_ENDIAN ? 1 : 0;
+      ival = zext_hwi (res[order], 32);
+      ival |= (zext_hwi (res[1 - order], 32) << 32);
+    }
+  else
+      ival = zext_hwi (res[0], 32);
 
   *intval = ival;
   return true;
@@ -4787,10 +4789,6 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   if (!TARGET_SIMD)
      return false;
 
-  /* We make a general exception for 0.  */
-  if (aarch64_float_const_zero_rtx_p (x))
-      return true;
-
   machine_mode vmode, imode;
   unsigned HOST_WIDE_INT ival;
 
@@ -4800,6 +4798,10 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
       if (!aarch64_reinterpret_float_as_int (x, &ival))
 	return false;
 
+      /* We make a general exception for 0.  */
+      if (aarch64_float_const_zero_rtx_p (x))
+	return true;
+
       imode = int_mode_for_mode (mode);
     }
   else if (GET_CODE (x) == CONST_INT
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index fc799479c81..64b60a903ed 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -231,6 +231,7 @@
 (include "../arm/cortex-a53.md")
 (include "../arm/cortex-a57.md")
 (include "../arm/exynos-m1.md")
+(include "falkor.md")
 (include "thunderx.md")
 (include "../arm/xgene1.md")
 (include "thunderx2t99.md")
@@ -1076,7 +1077,7 @@
   [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r")
 	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
-    || aarch64_reg_or_fp_float (operands[1], HFmode))"
+    || aarch64_reg_or_fp_zero (operands[1], HFmode))"
   "@
    movi\\t%0.4h, #0
    fmov\\t%h0, %w1
@@ -1099,7 +1100,7 @@
   [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
 	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
-    || aarch64_reg_or_fp_float (operands[1], SFmode))"
+    || aarch64_reg_or_fp_zero (operands[1], SFmode))"
   "@
    movi\\t%0.2s, #0
    fmov\\t%s0, %w1
@@ -1123,7 +1124,7 @@
   [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
 	(match_operand:DF 1 "general_operand"      "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
-    || aarch64_reg_or_fp_float (operands[1], DFmode))"
+    || aarch64_reg_or_fp_zero (operands[1], DFmode))"
   "@
    movi\\t%d0, #0
    fmov\\t%d0, %x1
@@ -5178,6 +5179,42 @@
 }
 )
 
+;; For xorsign (x, y), we want to generate:
+;;
+;; LDR   d2, #1<<63
+;; AND   v3.8B, v1.8B, v2.8B
+;; EOR   v0.8B, v0.8B, v3.8B
+;;
+
+(define_expand "xorsign<mode>3"
+  [(match_operand:GPF 0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")]
+  "TARGET_FLOAT && TARGET_SIMD"
+{
+
+  machine_mode imode = <V_cmp_result>mode;
+  rtx mask = gen_reg_rtx (imode);
+  rtx op1x = gen_reg_rtx (imode);
+  rtx op2x = gen_reg_rtx (imode);
+
+  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
+  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
+						     imode)));
+
+  emit_insn (gen_and<v_cmp_result>3 (op2x, mask,
+				     lowpart_subreg (imode, operands[2],
+						     <MODE>mode)));
+  emit_insn (gen_xor<v_cmp_result>3 (op1x,
+				     lowpart_subreg (imode, operands[1],
+						     <MODE>mode),
+				     op2x));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op1x, imode));
+  DONE;
+}
+)
+
 ;; -------------------------------------------------------------------
 ;; Reload support
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md
new file mode 100644
index 00000000000..b422ab30c44
--- /dev/null
+++ b/gcc/config/aarch64/falkor.md
@@ -0,0 +1,681 @@
+;; Falkor pipeline description
+;; Copyright (C) 2017 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "falkor")
+
+;; Complex int instructions (e.g. multiply and divide) execute in the X
+;; pipeline.  Simple int instructions execute in the X, Y, and Z pipelines.
+
+(define_cpu_unit "falkor_x" "falkor")
+(define_cpu_unit "falkor_y" "falkor")
+(define_cpu_unit "falkor_z" "falkor")
+
+;; Branches execute in the B pipeline or in one of the int pipelines depending
+;; on how complex it is.  Simple int insns (like movz) can also execute here.
+
+(define_cpu_unit "falkor_b" "falkor")
+
+;; Vector and FP insns execute in the VX and VY pipelines.
+
+(define_automaton "falkor_vfp")
+
+(define_cpu_unit "falkor_vx" "falkor_vfp")
+(define_cpu_unit "falkor_vy" "falkor_vfp")
+
+;; Loads execute in the LD pipeline.
+;; Stores execute in the ST, SD, and VSD pipelines, for address, data, and
+;; vector data.
+
+(define_automaton "falkor_mem")
+
+(define_cpu_unit "falkor_ld" "falkor_mem")
+(define_cpu_unit "falkor_st" "falkor_mem")
+(define_cpu_unit "falkor_sd" "falkor_mem")
+(define_cpu_unit "falkor_vsd" "falkor_mem")
+
+;; The GTOV and VTOG pipelines are for general to vector reg moves, and vice
+;; versa.
+
+(define_cpu_unit "falkor_gtov" "falkor")
+(define_cpu_unit "falkor_vtog" "falkor")
+
+;; Common reservation combinations.
+
+(define_reservation "falkor_vxvy" "falkor_vx|falkor_vy")
+(define_reservation "falkor_zb"   "falkor_z|falkor_b")
+(define_reservation "falkor_xyz"  "falkor_x|falkor_y|falkor_z")
+(define_reservation "falkor_xyzb" "falkor_x|falkor_y|falkor_z|falkor_b")
+
+;; SIMD Floating-Point Instructions
+
+(define_insn_reservation "falkor_afp_1_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_neg_s,neon_fp_neg_d,neon_fp_abs_s,neon_fp_abs_d"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_1_vxvy_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_neg_s_q,neon_fp_neg_d_q,neon_fp_abs_s_q,neon_fp_abs_d_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_2_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_minmax_s,neon_fp_minmax_d,neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,neon_fp_compare_s,neon_fp_compare_d,neon_fp_round_s,neon_fp_round_d"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_2_vxvy_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q,neon_fp_compare_s_q,neon_fp_compare_d_q,neon_fp_round_s_q,neon_fp_round_d_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_3_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,neon_fp_abd_s,neon_fp_abd_d,neon_fp_addsub_s,neon_fp_addsub_d,neon_fp_reduc_add_s,neon_fp_reduc_add_d"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_3_vxvy_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_abd_s_q,neon_fp_abd_d_q,neon_fp_addsub_s_q,neon_fp_addsub_d_q,neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_4_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_to_int_s,neon_fp_to_int_d,neon_int_to_fp_s,neon_int_to_fp_d,neon_fp_cvt_widen_h,neon_fp_cvt_widen_s"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_4_vxvy_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_to_int_s_q,neon_fp_to_int_d_q,neon_int_to_fp_s_q,neon_int_to_fp_d_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_mul" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_s_scalar"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_mla" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_scalar"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mul" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mul_s_q,neon_fp_mul_s_scalar_q"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mla" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mla_s_q,neon_fp_mla_s_scalar_q"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_mul" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mul_d"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_mla" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mla_d"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mul" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mul_d_q,neon_fp_mul_d_scalar_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mla" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_mla_d_q,neon_fp_mla_d_scalar_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_4_vxvy_vxvy_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q"))
+  "falkor_vxvy+falkor_vxvy,falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vx_vy" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_div_s"))
+  "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_11_vx_vy" 11
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_div_d"))
+  "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_6_vx_vy_vx_vy" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_div_s_q"))
+  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+(define_insn_reservation "falkor_afp_11_vx_vy_vx_vy" 11
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_div_d_q"))
+  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+(define_insn_reservation "falkor_afp_12_vx_vy" 12
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_sqrt_s"))
+  "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_22_vx_vy" 22
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_sqrt_d"))
+  "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_12_vx_vy_vx_vy" 12
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_sqrt_s_q"))
+  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+(define_insn_reservation "falkor_afp_22_vx_vy_vx_vy" 22
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_sqrt_d_q"))
+  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+;; SIMD Integer Instructions
+
+(define_insn_reservation "falkor_ai_1_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_add,neon_reduc_add,neon_logic,neon_neg,neon_sub"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_1_vxvy_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_shift_imm_long,neon_add_q,neon_reduc_add_q,neon_logic_q,neon_neg_q,neon_sub_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_2_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_add_long,neon_sub_long,neon_add_halve,neon_sub_halve,neon_shift_imm,neon_shift_reg,neon_minmax,neon_abs,neon_compare,neon_compare_zero,neon_tst"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_2_vxvy_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_add_halve_q,neon_sub_halve_q,neon_shift_imm_q,neon_shift_reg_q,neon_minmax_q,neon_abs_q,neon_compare_q,neon_compare_zero_q,neon_tst_q,neon_reduc_add_long"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_3_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_shift_acc,neon_reduc_add_acc,neon_abd,neon_qadd,neon_qsub,neon_qabs,neon_qneg,neon_sat_shift_imm,neon_sat_shift_imm_narrow_q,neon_sat_shift_reg,neon_reduc_minmax"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_reduc_minmax_q"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_3_vxvy_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_shift_acc_q,neon_reduc_add_acc_q,neon_abd_q,neon_abd_long,neon_qadd_q,neon_qsub_q,neon_qabs_q,neon_qneg_q,neon_sat_shift_imm_q,neon_sat_shift_reg_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_mul" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_mul_b,neon_mul_h,neon_mul_s,neon_mul_h_scalar,neon_mul_s_scalar,neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,neon_sat_mul_h_scalar,neon_sat_mul_s_scalar"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_mla" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_mla_b,neon_mla_h,neon_mla_s,neon_mla_h_scalar,neon_mla_s_scalar"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mul" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,neon_mul_h_scalar_q,neon_mul_s_scalar_q,neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,neon_mul_d_long,neon_mul_h_scalar_long,neon_mul_s_scalar_long,neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mla" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,neon_mla_h_scalar_q,neon_mla_s_scalar_q,neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,neon_mla_h_scalar_long,neon_mla_s_scalar_long,neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_add_halve_narrow_q,neon_sub_halve_narrow_q,neon_arith_acc"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_2_ai_vxvy_vxvy_vxvy_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_add_widen,neon_sub_widen"))
+  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
+
+(define_insn_reservation "falkor_4_ai_vxvy_vxvy_vxvy_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_arith_acc_q"))
+  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
+
+;; SIMD Load Instructions
+
+(define_insn_reservation "falkor_ald_4_ld" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,neon_load1_all_lanes,neon_load2_one_lane"))
+  "falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_none" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_load1_2reg,neon_load2_2reg,neon_load2_all_lanes"))
+  "falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_load1_2reg_q,neon_load2_2reg_q,neon_load2_all_lanes_q,neon_load3_one_lane,neon_load4_one_lane,neon_ldp,neon_ldp_q"))
+  "falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_none" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_load1_3reg,neon_load3_3reg,neon_load3_all_lanes"))
+  "falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_ld" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_load1_3reg_q,neon_load3_3reg_q,neon_load3_all_lanes_q"))
+  "falkor_ld,falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_none_none" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_load1_4reg,neon_load4_4reg"))
+  "falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_ld_ld" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_load1_4reg_q,neon_load4_4reg_q,neon_load4_all_lanes,neon_load4_all_lanes_q"))
+  "falkor_ld,falkor_ld,falkor_ld,falkor_ld")
+
+;; Arithmetic and Logical Instructions
+
+(define_insn_reservation "falkor_alu_1_xyz" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg"))
+  "falkor_xyz")
+
+;; SIMD Miscellaneous Instructions
+
+;; No separate type for ins and dup.  But this is correct for both.
+
+(define_insn_reservation "falkor_am_3_gtov" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_from_gp"))
+  "falkor_gtov")
+
+;; No separate type for ins and dup.  Assuming dup is more common.  Ins is
+;; gtov+vxvy and latency of 4.
+
+(define_insn_reservation "falkor_am_3_gtov_gtov" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_from_gp_q"))
+  "falkor_gtov,falkor_gtov")
+
+;; neon_to_gp_q is used for 32-bit ARM instructions that move 64-bits of data
+;; so no use needed here.
+
+(define_insn_reservation "falkor_am_3_vtog" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_to_gp"))
+  "falkor_vtog")
+
+(define_insn_reservation "falkor_am_1_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_bsl,neon_dup,neon_ext,neon_ins,neon_ins_q,neon_move,neon_rev,neon_tbl1,neon_permute,neon_shift_imm_narrow_q"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_1_vxvy_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_bsl_q,neon_dup_q,neon_ext_q,neon_move_q,neon_rev_q,neon_tbl1_q,neon_permute_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_2_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_cls,neon_cnt,neon_rbit"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_4_vxvy_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_cls_q,neon_cnt_q,neon_rbit_q,neon_tbl2"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_3_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_recpe_s,neon_fp_recpe_d,neon_fp_rsqrte_s,neon_fp_rsqrte_d,neon_fp_recpx_s,neon_fp_recpx_d"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_3_vxvy_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_recpe_s_q,neon_fp_recpe_d_q,neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_5_vxvy" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_recps_s"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_5_vxvy_vxvy" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_recps_s_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_6_vxvy" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_recps_d,neon_fp_rsqrts_d"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_6_vxvy_vxvy" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_fp_recps_d_q,neon_fp_rsqrts_d_q"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_5_vxvy_vxvy_vxvy" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_tbl2_q,neon_tbl3"))
+  "(falkor_vxvy+falkor_vxvy),falkor_vxvy")
+
+(define_insn_reservation "falkor_am_6_vxvy_vxvy_vxvy_vxvy" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_tbl3_q,neon_tbl4"))
+  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
+
+(define_insn_reservation "falkor_am_7_vxvy_vxvy_vxvy_vxvy_vxvy" 7
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_tbl4_q"))
+  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy),falkor_vxvy")
+
+;; SIMD Store Instructions
+
+;; ??? stp is neon_store1_2reg in aarch64.md, but neon_stp in aarch64-simd.md.
+;; Similarly with ldp.
+
+(define_insn_reservation "falkor_ast_st_vsd" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,neon_store1_one_lane,neon_store1_one_lane_q,neon_store1_2reg,neon_store2_2reg,neon_store2_one_lane,neon_store2_one_lane_q,neon_stp"))
+  "falkor_st+falkor_vsd")
+
+(define_insn_reservation "falkor_as_0_st_vsd_st_vsd" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_store1_2reg_q,neon_store1_3reg,neon_store1_4reg,neon_store2_2reg_q,neon_store3_3reg,neon_store4_4reg,neon_store3_one_lane,neon_store3_one_lane_q,neon_store4_one_lane,neon_store4_one_lane_q,neon_stp_q"))
+  "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
+
+(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_store1_3reg_q,neon_store3_3reg_q"))
+  "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
+
+(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd_st_vsd" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "neon_store1_4reg_q,neon_store4_4reg_q"))
+  "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
+
+;; Branch Instructions
+
+(define_insn_reservation "falkor_branch_0_zb" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "branch"))
+  "falkor_zb")
+
+(define_insn_reservation "falkor_call_0_xyzb" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "call"))
+  "falkor_xyzb")
+
+;; Cryptography Extensions
+
+(define_insn_reservation "falkor_cry_1_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "crypto_sha1_fast"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_cry_2_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "crypto_aesmc"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_cry_2_vxvy_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "crypto_sha1_xor,crypto_sha256_fast,crypto_pmull"))
+  "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_cry_4_vy_vx" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "crypto_sha1_slow"))
+  "falkor_vy+falkor_vx")
+
+(define_insn_reservation "falkor_cry_6_vy_vx" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "crypto_sha256_slow"))
+  "falkor_vy+falkor_vx")
+
+(define_insn_reservation "falkor_cry_3_vxvy_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "crypto_aese"))
+  "falkor_vxvy+falkor_vxvy")
+
+;; FP Load Instructions
+
+(define_insn_reservation "falkor_fld_4_ld" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_loads,f_loadd"))
+  "falkor_ld")
+
+;; No separate FP store section, these are found in the SIMD store section.
+
+(define_insn_reservation "falkor_fld_0_st_vsd" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_stores,f_stored"))
+  "falkor_st+falkor_vsd")
+
+;; FP Data Processing Instructions
+
+(define_insn_reservation "falkor_fpdt_0_vxvy" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_5_vtog" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_cvtf2i"))
+  "falkor_vtog")
+
+(define_insn_reservation "falkor_fpdt_1_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "ffariths,ffarithd,fcsel"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_2_vxvy" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_minmaxd,f_minmaxs,f_rintd,f_rints"))
+  "falkor_vxvy")
+
+;; Scalar FP ABD is handled same as vector FP ABD.
+
+(define_insn_reservation "falkor_fpdt_3_vxvy" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "faddd,fadds"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_4_vxvy" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_cvt"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_5_vxvy_mul" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fmuls"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_5_vxvy_mla" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fmacs,ffmas"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_6_vxvy_mul" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fmuld"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_6_vxvy_mla" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fmacd,ffmad"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_6_vx_vy" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fdivs"))
+  "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_fpdt_11_vx_vy" 11
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fdivd"))
+  "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_fpdt_12_vx_vy" 12
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fsqrts"))
+  "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_22_vx_vy" 22
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fsqrtd"))
+  "falkor_vxvy")
+
+;; FP Miscellaneous Instructions
+
+(define_insn_reservation "falkor_fpmsc_3_vtog" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_mrc"))
+  "falkor_vtog")
+
+(define_insn_reservation "falkor_fpmsc_3_gtov" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_mcr"))
+  "falkor_gtov")
+
+(define_insn_reservation "falkor_fpmsc_1_vxvy" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "fmov,fconsts,fconstd"))
+  "falkor_vxvy")
+
+;; No separate type for float-to-fixed conversions.  Same type as
+;; float-to-int conversions.  They schedule the same though, so no problem.
+
+(define_insn_reservation "falkor_fpmsc_6_gtov" 6
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "f_cvti2f"))
+  "falkor_gtov")
+
+;; Load Instructions
+
+(define_insn_reservation "falkor_ld_3_ld" 3
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "load1,load2"))
+  "falkor_ld")
+
+;; Miscellaneous Data-Processing Instructions
+
+(define_insn_reservation "falkor_misc_1_xyz" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "bfx,bfm,extend,rotate_imm,shift_imm"))
+  "falkor_xyz")
+
+(define_insn_reservation "falkor_misc_2_x" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "crc"))
+  "falkor_x")
+
+(define_insn_reservation "falkor_misc_2_xyz" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "clz,rbit,rev"))
+  "falkor_xyz")
+
+;; Divide and Multiply Instructions
+
+(define_insn_reservation "falkor_muldiv_4_x_mul" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "mul"))
+  "falkor_x")
+
+(define_insn_reservation "falkor_muldiv_4_x_mla" 4
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "mla,smlal,umlal"))
+  "falkor_x")
+
+(define_insn_reservation "falkor_muldiv_5_x_mul" 5
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "smull,umull"))
+  "falkor_x")
+
+(define_insn_reservation "falkor_md_11_x_z" 11
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "sdiv,udiv"))
+  "falkor_x+falkor_z")
+
+;; Move and Shift Instructions
+
+(define_insn_reservation "falkor_mvs_1_xyz" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "mov_imm,shift_reg"))
+  "falkor_xyz")
+
+(define_insn_reservation "falkor_mvs_1_xyzb" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "adr"))
+  "falkor_xyzb")
+
+;; Other Instructions
+
+;; Block is for instruction scheduling blockage insns in RTL.  There are no
+;; hardware instructions emitted for them, so don't use any resources.
+
+(define_insn_reservation "falkor_other_0_nothing" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "no_insn,trap,block"))
+  "nothing")
+
+(define_insn_reservation "falkor_other_2_z" 2
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "mrs"))
+  "falkor_z")
+
+;; Assume multiple instructions use all pipes.
+
+(define_insn_reservation "falkor_extra" 1
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "multiple"))
+  "falkor_x+falkor_y+falkor_z+falkor_b+falkor_vx+falkor_vy+falkor_ld+falkor_st+falkor_sd+falkor_vsd+falkor_gtov+falkor_vtog")
+
+;; Store Instructions
+
+;; No use of store_rel, store3, or store4 in aarch64.
+
+(define_insn_reservation "falkor_st_0_st_sd" 0
+  (and (eq_attr "tune" "falkor")
+       (eq_attr "type" "store1,store2"))
+  "falkor_st+falkor_sd")
+
+;; Muliply bypasses.
+
+;; 1 cycle latency (0 bubble) for an integer mul or mac feeding into a mac.
+
+(define_bypass 1
+  "falkor_ai_4_vxvy_mul,falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mul,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mul,falkor_muldiv_4_x_mla,falkor_muldiv_5_x_mul"
+  "falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mla")
+
+;; 3 cycle latency (2 bubbles) for an FP mul or mac feeding into a mac.
+
+(define_bypass 3
+  "falkor_afp_5_vxvy_mul,falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mul,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mul,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mul,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mul,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mul,falkor_fpdt_6_vxvy_mla"
+  "falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mla")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 067cef78533..cceb57525c7 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -523,6 +523,17 @@
 			(SI   "SI") (HI   "HI")
 			(QI   "QI")])
 
+;; Define element mode for each vector mode (lower case).
+(define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
+			(V4HI "hi") (V8HI "hi")
+			(V2SI "si") (V4SI "si")
+			(DI "di")   (V2DI "di")
+			(V4HF "hf") (V8HF "hf")
+			(V2SF "sf") (V4SF "sf")
+			(V2DF "df") (DF "df")
+			(SI   "si") (HI   "hi")
+			(QI   "qi")])
+
 ;; 64-bit container modes the inner or scalar source mode.
 (define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
 			 (V4HI "V4HI") (V8HI "V4HI")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 95d28cfa33c..11243c4ce00 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -57,11 +57,6 @@
        (ior (match_operand 0 "register_operand")
 	    (match_test "op == const0_rtx"))))
 
-(define_predicate "aarch64_reg_or_fp_float"
-  (ior (match_operand 0 "register_operand")
-	(and (match_code "const_double")
-	     (match_test "aarch64_float_const_rtx_p (op)"))))
-
 (define_predicate "aarch64_reg_or_fp_zero"
   (ior (match_operand 0 "register_operand")
 	(and (match_code "const_double")
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 9f5dae29054..e13c5f9fc57 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -26,6 +26,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "memmodel.h"
 #include "gimple.h"
 #include "df.h"
@@ -9456,6 +9458,25 @@ And in the noreturn case:
 
   if (current_function_has_exception_handlers ())
     alpha_pad_function_end ();
+
+  /* CALL_PAL that implements trap insn, updates program counter to point
+     after the insn.  In case trap is the last insn in the function,
+     emit NOP to guarantee that PC remains inside function boundaries.
+     This workaround is needed to get reliable backtraces.  */
+  
+  rtx_insn *insn = prev_active_insn (get_last_insn ());
+
+  if (insn && NONJUMP_INSN_P (insn))
+    {
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) == PARALLEL)
+	{
+	  rtx vec = XVECEXP (pat, 0, 0);
+	  if (GET_CODE (vec) == TRAP_IF
+	      && XEXP (vec, 0) == const1_rtx)
+	    emit_insn_after (gen_unop (), insn);
+	}
+    }
 }
 
 static void
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index be5f1bd2003..057f8756fba 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 25677d19e20..fa3e2fa6c76 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -32,6 +32,7 @@
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 48992879a8e..7acbaf1bb40 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -444,6 +444,14 @@
                           (V2SF "SF") (V4SF "SF")
                           (DI "DI")   (V2DI "DI")])
 
+;; As above but in lower case.
+(define_mode_attr V_elem_l [(V8QI "qi") (V16QI "qi")
+			    (V4HI "hi") (V8HI "hi")
+			    (V4HF "hf") (V8HF "hf")
+			    (V2SI "si") (V4SI "si")
+			    (V2SF "sf") (V4SF "sf")
+			    (DI "di")   (V2DI "di")])
+
 ;; Element modes for vector extraction, padded up to register size.
 
 (define_mode_attr V_ext [(V8QI "SI") (V16QI "SI")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 235c46da1a1..45b3bd18052 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -412,7 +412,7 @@
   DONE;
 })
 
-(define_insn "vec_extract<mode>"
+(define_insn "vec_extract<mode><V_elem_l>"
   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
         (vec_select:<V_elem>
           (match_operand:VD_LANE 1 "s_register_operand" "w,w")
@@ -434,7 +434,7 @@
   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
 )
 
-(define_insn "vec_extract<mode>"
+(define_insn "vec_extract<mode><V_elem_l>"
   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
 	(vec_select:<V_elem>
           (match_operand:VQ2 1 "s_register_operand" "w,w")
@@ -460,7 +460,7 @@
   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
 )
 
-(define_insn "vec_extractv2di"
+(define_insn "vec_extractv2didi"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
 	(vec_select:DI
           (match_operand:V2DI 1 "s_register_operand" "w,w")
@@ -479,7 +479,7 @@
   [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
 )
 
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><V_elem_l>"
   [(match_operand:VDQ 0 "s_register_operand" "")
    (match_operand 1 "" "")]
   "TARGET_NEON"
@@ -1581,7 +1581,7 @@
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpadd_internal<mode>);
   /* The same result is actually computed into every element.  */
-  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
+  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
   DONE;
 })
 
@@ -1607,7 +1607,7 @@
   rtx vec = gen_reg_rtx (V2DImode);
 
   emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
-  emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
+  emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
 
   DONE;
 })
@@ -1631,7 +1631,7 @@
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpsmin<mode>);
   /* The result is computed into every element of the vector.  */
-  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
+  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
   DONE;
 })
 
@@ -1658,7 +1658,7 @@
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpsmax<mode>);
   /* The result is computed into every element of the vector.  */
-  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
+  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
   DONE;
 })
 
@@ -1685,7 +1685,7 @@
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpumin<mode>);
   /* The result is computed into every element of the vector.  */
-  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
+  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
   DONE;
 })
 
@@ -1711,7 +1711,7 @@
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpumax<mode>);
   /* The result is computed into every element of the vector.  */
-  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
+  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
   DONE;
 })
 
@@ -3272,7 +3272,8 @@
     }
 
   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
-    emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
+    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
+						operands[2]));
   else
     emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
 						       operands[1],
@@ -3301,7 +3302,8 @@
     }
 
   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
-    emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
+    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
+						operands[2]));
   else
     emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
 						       operands[1],
diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks
index 093d7fae88b..0a3840ba8fe 100644
--- a/gcc/config/arm/t-vxworks
+++ b/gcc/config/arm/t-vxworks
@@ -16,9 +16,7 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-MULTILIB_OPTIONS = \
-  mrtp fPIC \
-  t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe
-MULTILIB_MATCHES = fPIC=fpic
-# Don't build -fPIC multilibs for kernel or Thumb code.
-MULTILIB_EXCEPTIONS = fPIC* mrtp/fPIC/*t[45]t*
+MULTILIB_OPTIONS = mrtp fPIC
+
+# -fPIC alone is not supported, only together with -mrtp
+MULTILIB_EXCEPTIONS = fPIC
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index f20324f144d..59f6282a0ea 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -26,7 +26,15 @@ a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
+/* TARGET_OS_CPP_BUILTINS, down to BPABI if defined.  */
 
+#if defined (TARGET_BPABI_CPP_BUILTINS)
+#define MAYBE_TARGET_BPABI_CPP_BUILTINS TARGET_BPABI_CPP_BUILTINS
+#else
+#define MAYBE_TARGET_BPABI_CPP_BUILTINS()
+#endif
+
+#undef TARGET_OS_CPP_BUILTINS
 #define TARGET_OS_CPP_BUILTINS()		\
   do {						\
     if (TARGET_BIG_END)				\
@@ -36,8 +44,29 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 						\
     if (arm_arch_xscale)			\
       builtin_define ("CPU=XSCALE");		\
+    else if (arm_arch7)				\
+      {						\
+	if (!arm_arch_notm)			\
+	  builtin_define ("CPU=ARMARCH7M");	\
+	else if (TARGET_THUMB)			\
+	  builtin_define ("CPU=ARMARCH7_T2");	\
+	else					\
+	  builtin_define ("CPU=ARMARCH7");	\
+      }						\
+    else if (arm_arch6)				\
+      {						\
+	if (TARGET_THUMB)			\
+	  builtin_define ("CPU=ARMARCH6_T");	\
+	else					\
+	  builtin_define ("CPU=ARMARCH6");	\
+      }						\
     else if (arm_arch5)				\
-      builtin_define ("CPU=ARMARCH5");		\
+      {						\
+	if (TARGET_THUMB)			\
+	  builtin_define ("CPU=ARMARCH5_T");	\
+	else					\
+	  builtin_define ("CPU=ARMARCH5");	\
+      }						\
     else if (arm_arch4)				\
       {						\
 	if (TARGET_THUMB)			\
@@ -46,6 +75,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 	  builtin_define ("CPU=ARMARCH4");	\
       }						\
     VXWORKS_OS_CPP_BUILTINS ();			\
+    MAYBE_TARGET_BPABI_CPP_BUILTINS ();		\
   } while (0)
 
 #undef SUBTARGET_OVERRIDE_OPTIONS
@@ -55,27 +85,32 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #undef SUBTARGET_CPP_SPEC
 #define SUBTARGET_CPP_SPEC "-D__ELF__" VXWORKS_ADDITIONAL_CPP_SPEC
 
+/* .text.hot and .text.unlikely sections are badly handled by the
+   VxWorks kernel mode loader for ARM style exceptions.  */
+
+#if ARM_UNWIND_INFO
+#define EXTRA_CC1_SPEC "%{!mrtp:-fno-reorder-functions}"
+#else
+#define EXTRA_CC1_SPEC
+#endif
+
 #undef  CC1_SPEC
-#define CC1_SPEC							\
-"%{tstrongarm:-mlittle-endian -mcpu=strongarm ;				\
-   t4:        -mlittle-endian -march=armv4 ;				\
-   t4be:      -mbig-endian -march=armv4 ;				\
-   t4t:       -mthumb -mthumb-interwork -mlittle-endian -march=armv4t ;	\
-   t4tbe:     -mthumb -mthumb-interwork -mbig-endian -march=armv4t ;	\
-   t5:        -mlittle-endian -march=armv5 ;				\
-   t5be:      -mbig-endian -march=armv5 ;				\
-   t5t:       -mthumb -mthumb-interwork -mlittle-endian -march=armv5 ;	\
-   t5tbe:     -mthumb -mthumb-interwork -mbig-endian -march=armv5 ;	\
-   txscale:   -mlittle-endian -mcpu=xscale ;				\
-   txscalebe: -mbig-endian -mcpu=xscale ;				\
-            : -march=armv4}"
-
-/* Pass -EB for big-endian targets.  */
-#define VXWORKS_ENDIAN_SPEC \
-  "%{mbig-endian|t4be|t4tbe|t5be|t5tbe|txscalebe:-EB}"
+#define CC1_SPEC "" EXTRA_CC1_SPEC
+
+/* Translate an explicit -mbig-endian as an explicit -EB to assembler
+   and linker, and pass abi options matching the target expectations
+   or command-line requests.  */
+#define VXWORKS_ENDIAN_SPEC "%{mbig-endian:-EB}"
+
+#if defined (TARGET_BPABI_CPP_BUILTINS)
+#define MAYBE_ASM_ABI_SPEC \
+  "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
+#else
+#define MAYBE_ASM_ABI_SPEC
+#endif
 
 #undef SUBTARGET_EXTRA_ASM_SPEC
-#define SUBTARGET_EXTRA_ASM_SPEC VXWORKS_ENDIAN_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC MAYBE_ASM_ABI_SPEC " " VXWORKS_ENDIAN_SPEC
 
 #undef LINK_SPEC
 #define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_ENDIAN_SPEC
@@ -89,6 +124,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #undef ENDFILE_SPEC
 #define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
 
+/* For exceptions, pre VX7 uses DWARF2 info, VX7 uses ARM unwinding.  */
+#undef  DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO (!TARGET_VXWORKS7)
+
+#undef ARM_TARGET2_DWARF_FORMAT
+#define ARM_TARGET2_DWARF_FORMAT \
+  (TARGET_VXWORKS_RTP ? DW_EH_PE_pcrel : DW_EH_PE_absptr)
+
 /* There is no default multilib.  */
 #undef MULTILIB_DEFAULTS
 
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 1f333ccc1b2..e453bfb6814 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -26,6 +26,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "cgraph.h"
 #include "c-family/c-common.h"
 #include "cfghooks.h"
diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c
index 9fe90fc37b4..f04fe874777 100644
--- a/gcc/config/bfin/bfin.c
+++ b/gcc/config/bfin/bfin.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "cfghooks.h"
 #include "df.h"
 #include "memmodel.h"
diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
index 4529fd44aae..a7083c12898 100644
--- a/gcc/config/c6x/c6x.c
+++ b/gcc/config/c6x/c6x.c
@@ -32,6 +32,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/cr16/cr16.c b/gcc/config/cr16/cr16.c
index 93edd232af2..ab6ef7adf1d 100644
--- a/gcc/config/cr16/cr16.c
+++ b/gcc/config/cr16/cr16.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index 8c134a6bc8b..b57881ac04e 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -25,6 +25,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "cfghooks.h"
 #include "df.h"
 #include "memmodel.h"
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index 9a8cf31d400..949db25c650 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "insn-config.h"
 #include "emit-rtl.h"
 #include "cgraph.h"
diff --git a/gcc/config/epiphany/epiphany.c b/gcc/config/epiphany/epiphany.c
index b9ec0f40d12..4e27557d3ce 100644
--- a/gcc/config/epiphany/epiphany.c
+++ b/gcc/config/epiphany/epiphany.c
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "emit-rtl.h"
 #include "recog.h"
diff --git a/gcc/config/fr30/fr30.c b/gcc/config/fr30/fr30.c
index 42bec9f733c..d83b2f31daa 100644
--- a/gcc/config/fr30/fr30.c
+++ b/gcc/config/fr30/fr30.c
@@ -27,6 +27,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/frv/frv.c b/gcc/config/frv/frv.c
index c571d63f2c6..1cdbaa81c8d 100644
--- a/gcc/config/frv/frv.c
+++ b/gcc/config/frv/frv.c
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/ft32/ft32.c b/gcc/config/ft32/ft32.c
index db0365e92c1..78c5edc8f09 100644
--- a/gcc/config/ft32/ft32.c
+++ b/gcc/config/ft32/ft32.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/gnu-user.h b/gcc/config/gnu-user.h
index 2787a3d16be..de605b0c466 100644
--- a/gcc/config/gnu-user.h
+++ b/gcc/config/gnu-user.h
@@ -50,19 +50,28 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #if defined HAVE_LD_PIE
 #define GNU_USER_TARGET_STARTFILE_SPEC \
-  "%{!shared: %{pg|p|profile:gcrt1.o%s;: \
-    %{" PIE_SPEC ":Scrt1.o%s} %{" NO_PIE_SPEC ":crt1.o%s}}} \
-   crti.o%s %{static:crtbeginT.o%s;: %{shared:crtbeginS.o%s} \
-	      %{" PIE_SPEC ":crtbeginS.o%s} \
-	      %{" NO_PIE_SPEC ":crtbegin.o%s}} \
+  "%{shared:; \
+     pg|p|profile:gcrt1.o%s; \
+     static:crt1.o%s; \
+     " PIE_SPEC ":Scrt1.o%s; \
+     :crt1.o%s} \
+   crti.o%s \
+   %{static:crtbeginT.o%s; \
+     shared|" PIE_SPEC ":crtbeginS.o%s; \
+     :crtbegin.o%s} \
    %{fvtable-verify=none:%s; \
      fvtable-verify=preinit:vtv_start_preinit.o%s; \
      fvtable-verify=std:vtv_start.o%s} \
    " CRTOFFLOADBEGIN
 #else
 #define GNU_USER_TARGET_STARTFILE_SPEC \
-  "%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
-   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
+  "%{shared:; \
+     pg|p|profile:gcrt1.o%s; \
+     :crt1.o%s} \
+   crti.o%s \
+   %{static:crtbeginT.o%s; \
+     shared|pie:crtbeginS.o%s; \
+     :crtbegin.o%s} \
    %{fvtable-verify=none:%s; \
      fvtable-verify=preinit:vtv_start_preinit.o%s; \
      fvtable-verify=std:vtv_start.o%s} \
@@ -82,15 +91,20 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   "%{fvtable-verify=none:%s; \
      fvtable-verify=preinit:vtv_end_preinit.o%s; \
      fvtable-verify=std:vtv_end.o%s} \
-   %{shared:crtendS.o%s;: %{" PIE_SPEC ":crtendS.o%s} \
-   %{" NO_PIE_SPEC ":crtend.o%s}} crtn.o%s \
+   %{static:crtend.o%s; \
+     shared|" PIE_SPEC ":crtendS.o%s; \
+     :crtend.o%s} \
+   crtn.o%s \
    " CRTOFFLOADEND
 #else
 #define GNU_USER_TARGET_ENDFILE_SPEC \
   "%{fvtable-verify=none:%s; \
      fvtable-verify=preinit:vtv_end_preinit.o%s; \
      fvtable-verify=std:vtv_end.o%s} \
-   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s \
+   %{static:crtend.o%s; \
+     shared|pie:crtendS.o%s; \
+     :crtend.o%s} \
+   crtn.o%s \
    " CRTOFFLOADEND
 #endif
 #undef  ENDFILE_SPEC
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index acdededeccc..0e0bb57768d 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/i386/cygming.opt b/gcc/config/i386/cygming.opt
index 2d7c7d2b674..abe59321040 100644
--- a/gcc/config/i386/cygming.opt
+++ b/gcc/config/i386/cygming.opt
@@ -50,6 +50,10 @@ muse-libstdc-wrappers
 Target Condition({defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS)})
 Compile code that relies on Cygwin DLL wrappers to support C++ operator new/delete replacement.
 
+fset-stack-executable
+Common Report Var(flag_setstackexecutable) Init(1) Optimization
+For nested functions on stack executable permission is set.
+
 posix
 Driver
 
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
index a4683b4662e..7e34bb799c3 100644
--- a/gcc/config/i386/cygwin.h
+++ b/gcc/config/i386/cygwin.h
@@ -153,3 +153,7 @@ along with GCC; see the file COPYING3.  If not see
 #endif
 #define LIBGCC_SONAME "cyggcc_s" LIBGCC_EH_EXTN "-1.dll"
 
+/* Make stack executable to avoid DEP problems with trampolines.  */
+#define HAVE_ENABLE_EXECUTE_STACK
+#undef  CHECK_EXECUTE_STACK_ENABLED
+#define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bf8a0492592..2c15ba8505c 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -201,6 +201,8 @@ extern void ix86_expand_truncdf_32 (rtx, rtx);
 
 extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
 
+extern rtx ix86_split_stack_guard (void);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif	/* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9a35c995f26..1d88e4f247a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -94,6 +94,7 @@ static rtx legitimize_pe_coff_extern_decl (rtx, bool);
 static rtx legitimize_pe_coff_symbol (rtx, bool);
 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
 static bool ix86_save_reg (unsigned int, bool, bool);
+static bool ix86_function_naked (const_tree);
 
 #ifndef CHECK_STACK_LIMIT
 #define CHECK_STACK_LIMIT (-1)
@@ -2491,9 +2492,7 @@ public:
     unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
 
     gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
-    return m_regs[last_reg].offset
-	   + (m->call_ms2sysv_pad_out ? 8 : 0)
-	   + STUB_INDEX_OFFSET;
+    return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
   }
 
   /* Returns the offset for the base pointer used by the stub.  */
@@ -6663,6 +6662,69 @@ ix86_option_override_internal (bool main_args_p,
     opts->x_ix86_stack_protector_guard
       = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
 
+#ifdef TARGET_THREAD_SSP_OFFSET
+  ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
+#endif
+
+  if (global_options_set.x_ix86_stack_protector_guard_offset_str)
+    {
+      char *endp;
+      const char *str = ix86_stack_protector_guard_offset_str;
+
+      errno = 0;
+      int64_t offset;
+
+#if defined(INT64_T_IS_LONG)
+      offset = strtol (str, &endp, 0);
+#else
+      offset = strtoll (str, &endp, 0);
+#endif
+
+      if (!*str || *endp || errno)
+	error ("%qs is not a valid number "
+	       "in -mstack-protector-guard-offset=", str);
+
+      if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
+		     HOST_WIDE_INT_C (0x7fffffff)))
+	error ("%qs is not a valid offset "
+	       "in -mstack-protector-guard-offset=", str);
+
+      ix86_stack_protector_guard_offset = offset;
+    }
+
+  ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
+
+  /* The kernel uses a different segment register for performance
+     reasons; a system call would not have to trash the userspace
+     segment register, which would be expensive.  */
+  if (ix86_cmodel == CM_KERNEL)
+    ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
+
+  if (global_options_set.x_ix86_stack_protector_guard_reg_str)
+    {
+      const char *str = ix86_stack_protector_guard_reg_str;
+      addr_space_t seg = ADDR_SPACE_GENERIC;
+
+      /* Discard optional register prefix.  */
+      if (str[0] == '%')
+	str++;
+
+      if (strlen (str) == 2 && str[1] == 's')
+	{
+	  if (str[0] == 'f')
+	    seg = ADDR_SPACE_SEG_FS;
+	  else if (str[0] == 'g')
+	    seg = ADDR_SPACE_SEG_GS;
+	}
+
+      if (seg == ADDR_SPACE_GENERIC)
+	error ("%qs is not a valid base register "
+	       "in -mstack-protector-guard-reg=",
+	       ix86_stack_protector_guard_reg_str);
+
+      ix86_stack_protector_guard_reg = seg;
+    }
+
   /* Handle -mmemcpy-strategy= and -mmemset-strategy=  */
   if (opts->x_ix86_tune_memcpy_strategy)
     {
@@ -7522,6 +7584,10 @@ ix86_set_func_type (tree fndecl)
       if (lookup_attribute ("interrupt",
 			    TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
 	{
+	  if (ix86_function_naked (fndecl))
+	    error_at (DECL_SOURCE_LOCATION (fndecl),
+		      "interrupt and naked attributes are not compatible");
+
 	  int nargs = 0;
 	  for (tree arg = DECL_ARGUMENTS (fndecl);
 	       arg;
@@ -7929,6 +7995,9 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
   rtx a, b;
   bool bind_global = decl && !targetm.binds_local_p (decl);
 
+  if (ix86_function_naked (current_function_decl))
+    return false;
+
   /* Sibling call isn't OK if there are no caller-saved registers
      since all registers must be preserved before return.  */
   if (cfun->machine->no_caller_saved_registers)
@@ -12857,13 +12926,12 @@ ix86_compute_frame_layout (void)
 	{
 	  unsigned count = xlogue_layout::count_stub_managed_regs ();
 	  m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
+	  m->call_ms2sysv_pad_in = 0;
 	}
     }
 
   frame->nregs = ix86_nsaved_regs ();
   frame->nsseregs = ix86_nsaved_sseregs ();
-  m->call_ms2sysv_pad_in = 0;
-  m->call_ms2sysv_pad_out = 0;
 
   /* 64-bit MS ABI seem to require stack alignment to be always 16,
      except for function prologues, leaf functions and when the defult
@@ -12965,16 +13033,7 @@ ix86_compute_frame_layout (void)
       gcc_assert (!frame->nsseregs);
 
       m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
-
-      /* Select an appropriate layout for incoming stack offset.  */
-      const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
-
-      if ((offset + xlogue.get_stack_space_used ()) & UNITS_PER_WORD)
-	m->call_ms2sysv_pad_out = 1;
-
-      offset += xlogue.get_stack_space_used ();
-      gcc_assert (!(offset & 0xf));
-      frame->outlined_save_offset = offset;
+      offset += xlogue_layout::get_instance ().get_stack_space_used ();
     }
 
   /* Align and set SSE register save area.  */
@@ -13002,6 +13061,7 @@ ix86_compute_frame_layout (void)
 
   /* Align start of frame for local function.  */
   if (stack_realign_fp
+      || m->call_ms2sysv
       || offset != frame->sse_reg_save_offset
       || size != 0
       || !crtl->is_leaf
@@ -14119,10 +14179,11 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
-/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
-   to be generated in correct form.  */
+/* Finalize stack_realign_needed and frame_pointer_needed flags, which
+   will guide prologue/epilogue to be generated in correct form.  */
+
 static void
-ix86_finalize_stack_realign_flags (void)
+ix86_finalize_stack_frame_flags (void)
 {
   /* Check if stack realign is really needed after reload, and
      stores result in cfun */
@@ -14145,13 +14206,13 @@ ix86_finalize_stack_realign_flags (void)
     }
 
   /* If the only reason for frame_pointer_needed is that we conservatively
-     assumed stack realignment might be needed, but in the end nothing that
-     needed the stack alignment had been spilled, clear frame_pointer_needed
-     and say we don't need stack realignment.  */
-  if (stack_realign
+     assumed stack realignment might be needed or -fno-omit-frame-pointer
+     is used, but in the end nothing that needed the stack alignment had
+     been spilled nor stack access, clear frame_pointer_needed and say we
+     don't need stack realignment.  */
+  if ((stack_realign || !flag_omit_frame_pointer)
       && frame_pointer_needed
       && crtl->is_leaf
-      && flag_omit_frame_pointer
       && crtl->sp_is_unchanging
       && !ix86_current_function_calls_tls_descriptor
       && !crtl->accesses_prior_frames
@@ -14220,6 +14281,42 @@ ix86_finalize_stack_realign_flags (void)
       df_scan_blocks ();
       df_compute_regs_ever_live (true);
       df_analyze ();
+
+      if (flag_var_tracking)
+	{
+	  /* Since frame pointer is no longer available, replace it with
+	     stack pointer - UNITS_PER_WORD in debug insns.  */
+	  df_ref ref, next;
+	  for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
+	       ref; ref = next)
+	    {
+	      rtx_insn *insn = DF_REF_INSN (ref);
+	      /* Make sure the next ref is for a different instruction,
+		 so that we're not affected by the rescan.  */
+	      next = DF_REF_NEXT_REG (ref);
+	      while (next && DF_REF_INSN (next) == insn)
+		next = DF_REF_NEXT_REG (next);
+
+	      if (DEBUG_INSN_P (insn))
+		{
+		  bool changed = false;
+		  for (; ref != next; ref = DF_REF_NEXT_REG (ref))
+		    {
+		      rtx *loc = DF_REF_LOC (ref);
+		      if (*loc == hard_frame_pointer_rtx)
+			{
+			  *loc = plus_constant (Pmode,
+						stack_pointer_rtx,
+						-UNITS_PER_WORD);
+			  changed = true;
+			}
+		    }
+		  if (changed)
+		    df_insn_rescan (insn);
+		}
+	    }
+	}
+
       recompute_frame_layout_p = true;
     }
 
@@ -14342,7 +14439,7 @@ ix86_expand_prologue (void)
   if (ix86_function_naked (current_function_decl))
     return;
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
 
   /* DRAP should not coexist with stack_realign_fp */
   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
@@ -15202,11 +15299,11 @@ ix86_expand_epilogue (int style)
   if (ix86_function_naked (current_function_decl))
     {
       /* The program should not reach this point.  */
-      emit_insn (gen_trap ());
+      emit_insn (gen_ud2 ());
       return;
     }
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = m->frame;
 
   m->fs.sp_realigned = stack_realign_fp;
@@ -15724,6 +15821,30 @@ static GTY(()) rtx split_stack_fn;
 
 static GTY(()) rtx split_stack_fn_large;
 
+/* Return location of the stack guard value in the TLS block.  */
+
+rtx
+ix86_split_stack_guard (void)
+{
+  int offset;
+  addr_space_t as = DEFAULT_TLS_SEG_REG;
+  rtx r;
+
+  gcc_assert (flag_split_stack);
+
+#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
+  offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
+#else
+  gcc_unreachable ();
+#endif
+
+  r = GEN_INT (offset);
+  r = gen_const_mem (Pmode, r);
+  set_mem_addr_space (r, as);
+
+  return r;
+}
+
 /* Handle -fsplit-stack.  These are the first instructions in the
    function, even before the regular prologue.  */
 
@@ -15741,7 +15862,7 @@ ix86_expand_split_stack_prologue (void)
 
   gcc_assert (flag_split_stack && reload_completed);
 
-  ix86_finalize_stack_realign_flags ();
+  ix86_finalize_stack_frame_flags ();
   frame = cfun->machine->frame;
   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
 
@@ -15755,10 +15876,8 @@ ix86_expand_split_stack_prologue (void)
      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
      can compare directly.  Otherwise we need to do an addition.  */
 
-  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
-			  UNSPEC_STACK_CHECK);
-  limit = gen_rtx_CONST (Pmode, limit);
-  limit = gen_rtx_MEM (Pmode, limit);
+  limit = ix86_split_stack_guard ();
+
   if (allocate < SPLIT_STACK_AVAILABLE)
     current = stack_pointer_rtx;
   else
@@ -16831,10 +16950,6 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
 	  case UNSPEC_DTPOFF:
 	    break;
 
-	  case UNSPEC_STACK_CHECK:
-	    gcc_assert (flag_split_stack);
-	    break;
-
 	  default:
 	    /* Invalid address unspec.  */
 	    return false;
@@ -17924,17 +18039,10 @@ output_pic_addr_const (FILE *file, rtx x, int code)
 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
       break;
 
-     case UNSPEC:
-       if (XINT (x, 1) == UNSPEC_STACK_CHECK)
-	 {
-	   bool f = i386_asm_output_addr_const_extra (file, x);
-	   gcc_assert (f);
-	   break;
-	 }
-
-       gcc_assert (XVECLEN (x, 0) == 1);
-       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
-       switch (XINT (x, 1))
+    case UNSPEC:
+      gcc_assert (XVECLEN (x, 0) == 1);
+      output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+      switch (XINT (x, 1))
 	{
 	case UNSPEC_GOT:
 	  fputs ("@GOT", file);
@@ -18627,7 +18735,6 @@ print_reg (rtx x, int code, FILE *file)
    + -- print a branch hint as 'cs' or 'ds' prefix
    ; -- print a semicolon (after prefixes due to bug in older gas).
    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-   @ -- print a segment register of thread base pointer load
    ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
    ! -- print MPX prefix for jxx/call/ret instructions if required.
  */
@@ -19171,19 +19278,6 @@ ix86_print_operand (FILE *file, rtx x, int code)
 #endif
 	  return;
 
-	case '@':
-	  if (ASSEMBLER_DIALECT == ASM_ATT)
-	    putc ('%', file);
-
-	  /* The kernel uses a different segment register for performance
-	     reasons; a system call would not have to trash the userspace
-	     segment register, which would be expensive.  */
-	  if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
-	    fputs ("fs", file);
-	  else
-	    fputs ("gs", file);
-	  return;
-
 	case '~':
 	  putc (TARGET_AVX2 ? 'i' : 'f', file);
 	  return;
@@ -19342,8 +19436,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
 static bool
 ix86_print_operand_punct_valid_p (unsigned char code)
 {
-  return (code == '@' || code == '*' || code == '+' || code == '&'
-	  || code == ';' || code == '~' || code == '^' || code == '!');
+  return (code == '*' || code == '+' || code == '&' || code == ';'
+	  || code == '~' || code == '^' || code == '!');
 }
 
 /* Print a memory operand whose address is ADDR.  */
@@ -19442,7 +19536,7 @@ ix86_print_operand_address_as (FILE *file, rtx addr,
       /* Displacement only requires special attention.  */
       if (CONST_INT_P (disp))
 	{
-	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
+	  if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
 	    fputs ("ds:", file);
 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
 	}
@@ -19644,22 +19738,6 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x)
       break;
 #endif
 
-    case UNSPEC_STACK_CHECK:
-      {
-	int offset;
-
-	gcc_assert (flag_split_stack);
-
-#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
-	offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
-#else
-	gcc_unreachable ();
-#endif
-
-	fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
-      }
-      break;
-
     default:
       return false;
     }
@@ -31676,6 +31754,13 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 }
 
 static bool
+ix86_allocate_stack_slots_for_args (void)
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return !ix86_function_naked (current_function_decl);
+}
+
+static bool
 ix86_warn_func_return (tree decl)
 {
   /* Naked functions are implemented entirely in assembly, including the
@@ -33413,13 +33498,18 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
 	      break;
 	    case PROCESSOR_NEHALEM:
 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
-		arg_str = "westmere";
+		{
+		  arg_str = "westmere";
+		  priority = P_AES;
+		}
 	      else
-		/* We translate "arch=corei7" and "arch=nehalem" to
-		   "corei7" so that it will be mapped to M_INTEL_COREI7
-		   as cpu type to cover all M_INTEL_COREI7_XXXs.  */
-		arg_str = "corei7";
-	      priority = P_PROC_SSE4_2;
+		{
+		  /* We translate "arch=corei7" and "arch=nehalem" to
+		     "corei7" so that it will be mapped to M_INTEL_COREI7
+		     as cpu type to cover all M_INTEL_COREI7_XXXs.  */
+		  arg_str = "corei7";
+		  priority = P_PROC_SSE4_2;
+		}
 	      break;
 	    case PROCESSOR_SANDYBRIDGE:
 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
@@ -33859,30 +33949,30 @@ ix86_get_function_versions_dispatcher (void *decl)
 }
 
 /* Make the resolver function decl to dispatch the versions of
-   a multi-versioned function,  DEFAULT_DECL.  Create an
+   a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
+   ifunc alias that will point to the created resolver.  Create an
    empty basic block in the resolver and store the pointer in
    EMPTY_BB.  Return the decl of the resolver function.  */
 
 static tree
 make_resolver_func (const tree default_decl,
-		    const tree dispatch_decl,
+		    const tree ifunc_alias_decl,
 		    basic_block *empty_bb)
 {
   char *resolver_name;
   tree decl, type, decl_name, t;
-  bool is_uniq = false;
 
   /* IFUNC's have to be globally visible.  So, if the default_decl is
      not, then the name of the IFUNC should be made unique.  */
   if (TREE_PUBLIC (default_decl) == 0)
-    is_uniq = true;
+    {
+      char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
+      symtab->change_decl_assembler_name (ifunc_alias_decl,
+					  get_identifier (ifunc_name));
+      XDELETEVEC (ifunc_name);
+    }
 
-  /* Append the filename to the resolver function if the versions are
-     not externally visible.  This is because the resolver function has
-     to be externally visible for the loader to find it.  So, appending
-     the filename will prevent conflicts with a resolver function from
-     another module which is based on the same version name.  */
-  resolver_name = make_unique_name (default_decl, "resolver", is_uniq);
+  resolver_name = make_unique_name (default_decl, "resolver", false);
 
   /* The resolver function should return a (void *). */
   type = build_function_type_list (ptr_type_node, NULL_TREE);
@@ -33895,13 +33985,12 @@ make_resolver_func (const tree default_decl,
   TREE_USED (decl) = 1;
   DECL_ARTIFICIAL (decl) = 1;
   DECL_IGNORED_P (decl) = 0;
-  /* IFUNC resolvers have to be externally visible.  */
-  TREE_PUBLIC (decl) = 1;
+  TREE_PUBLIC (decl) = 0;
   DECL_UNINLINABLE (decl) = 1;
 
   /* Resolver is not external, body is generated.  */
   DECL_EXTERNAL (decl) = 0;
-  DECL_EXTERNAL (dispatch_decl) = 0;
+  DECL_EXTERNAL (ifunc_alias_decl) = 0;
 
   DECL_CONTEXT (decl) = NULL_TREE;
   DECL_INITIAL (decl) = make_node (BLOCK);
@@ -33932,14 +34021,14 @@ make_resolver_func (const tree default_decl,
 
   pop_cfun ();
 
-  gcc_assert (dispatch_decl != NULL);
-  /* Mark dispatch_decl as "ifunc" with resolver as resolver_name.  */
-  DECL_ATTRIBUTES (dispatch_decl) 
-    = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
+  gcc_assert (ifunc_alias_decl != NULL);
+  /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
+  DECL_ATTRIBUTES (ifunc_alias_decl)
+    = make_attribute ("ifunc", resolver_name,
+		      DECL_ATTRIBUTES (ifunc_alias_decl));
 
   /* Create the alias for dispatch to resolver here.  */
-  /*cgraph_create_function_alias (dispatch_decl, decl);*/
-  cgraph_node::create_same_body_alias (dispatch_decl, decl);
+  cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
   XDELETEVEC (resolver_name);
   return decl;
 }
@@ -44311,6 +44400,34 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
   int i;
   rtx x;
 
+  /* Handle first initialization from vector elts.  */
+  if (n_elts != XVECLEN (vals, 0))
+    {
+      rtx subtarget = target;
+      x = XVECEXP (vals, 0, 0);
+      gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
+      if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
+	{
+	  rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
+	  if (inner_mode == QImode || inner_mode == HImode)
+	    {
+	      mode = mode_for_vector (SImode,
+				      n_elts * GET_MODE_SIZE (inner_mode) / 4);
+	      inner_mode
+		= mode_for_vector (SImode,
+				   n_elts * GET_MODE_SIZE (inner_mode) / 8);
+	      ops[0] = gen_lowpart (inner_mode, ops[0]);
+	      ops[1] = gen_lowpart (inner_mode, ops[1]);
+	      subtarget = gen_reg_rtx (mode);
+	    }
+	  ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
+	  if (subtarget != target)
+	    emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
+	  return;
+	}
+      gcc_unreachable ();
+    }
+
   for (i = 0; i < n_elts; ++i)
     {
       x = XVECEXP (vals, 0, i);
@@ -45772,17 +45889,60 @@ ix86_mangle_type (const_tree type)
     }
 }
 
-#ifdef TARGET_THREAD_SSP_OFFSET
-/* If using TLS guards, don't waste time creating and expanding
-   __stack_chk_guard decl and MEM as we are going to ignore it.  */
+static GTY(()) tree ix86_tls_stack_chk_guard_decl;
+
 static tree
 ix86_stack_protect_guard (void)
 {
   if (TARGET_SSP_TLS_GUARD)
-    return NULL_TREE;
+    {
+      tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
+      int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
+      tree type = build_qualified_type (type_node, qual);
+      tree t;
+
+      if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
+	{
+	  t = ix86_tls_stack_chk_guard_decl;
+
+	  if (t == NULL)
+	    {
+	      rtx x;
+
+	      t = build_decl
+		(UNKNOWN_LOCATION, VAR_DECL,
+		 get_identifier (ix86_stack_protector_guard_symbol_str),
+		 type);
+	      TREE_STATIC (t) = 1;
+	      TREE_PUBLIC (t) = 1;
+	      DECL_EXTERNAL (t) = 1;
+	      TREE_USED (t) = 1;
+	      TREE_THIS_VOLATILE (t) = 1;
+	      DECL_ARTIFICIAL (t) = 1;
+	      DECL_IGNORED_P (t) = 1;
+
+	      /* Do not share RTL as the declaration is visible outside of
+		 current function.  */
+	      x = DECL_RTL (t);
+	      RTX_FLAG (x, used) = 1;
+
+	      ix86_tls_stack_chk_guard_decl = t;
+	    }
+	}
+      else
+	{
+	  tree asptrtype = build_pointer_type (type);
+
+	  t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
+	  t = build2 (MEM_REF, asptrtype, t,
+		      build_int_cst (asptrtype, 0));
+	}
+
+      return t;
+    }
+
   return default_stack_protect_guard ();
 }
-#endif
 
 /* For 32-bit code we can save PIC register setup by using
    __stack_chk_fail_local hidden function instead of calling
@@ -52727,6 +52887,8 @@ ix86_run_selftests (void)
 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
 #undef TARGET_MUST_PASS_IN_STACK
 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
+#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
 #undef TARGET_FUNCTION_ARG_ADVANCE
 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
 #undef TARGET_FUNCTION_ARG
@@ -52788,10 +52950,8 @@ ix86_run_selftests (void)
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE ix86_mangle_type
 
-#ifdef TARGET_THREAD_SSP_OFFSET
 #undef TARGET_STACK_PROTECT_GUARD
 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
-#endif
 
 #if !TARGET_MACHO
 #undef TARGET_STACK_PROTECT_FAIL
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index bdea37b7313..e8ae3e3b3cc 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2484,8 +2484,7 @@ enum avx_u128_state
 			<- end of stub-saved/restored regs
      [padding1]
    ]
-					<- outlined_save_offset
-					<- sse_regs_save_offset
+					<- sse_reg_save_offset
    [padding2]
 		       |		<- FRAME_POINTER
    [va_arg registers]  |
@@ -2511,7 +2510,6 @@ struct GTY(()) ix86_frame
   HOST_WIDE_INT reg_save_offset;
   HOST_WIDE_INT stack_realign_allocate_offset;
   HOST_WIDE_INT stack_realign_offset;
-  HOST_WIDE_INT outlined_save_offset;
   HOST_WIDE_INT sse_reg_save_offset;
 
   /* When save_regs_using_mov is set, emit prologue using
@@ -2647,17 +2645,13 @@ struct GTY(()) machine_function {
   BOOL_BITFIELD arg_reg_available : 1;
 
   /* If true, we're out-of-lining reg save/restore for regs clobbered
-     by ms_abi functions calling a sysv function.  */
+     by 64-bit ms_abi functions calling a sysv_abi function.  */
   BOOL_BITFIELD call_ms2sysv : 1;
 
   /* If true, the incoming 16-byte aligned stack has an offset (of 8) and
-     needs padding.  */
+     needs padding prior to out-of-line stub save/restore area.  */
   BOOL_BITFIELD call_ms2sysv_pad_in : 1;
 
-  /* If true, the size of the stub save area plus inline int reg saves will
-     result in an 8 byte offset, so needs padding.  */
-  BOOL_BITFIELD call_ms2sysv_pad_out : 1;
-
   /* This is the number of extra registers saved by stub (valid range is
      0-6). Each additional register is only saved/restored by the stubs
      if all successive ones are. (Will always be zero when using a hard
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5eff4e46fff..8cf6d21c82a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -61,7 +61,6 @@
 ;; + -- print a branch hint as 'cs' or 'ds' prefix
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-;; @ -- print a segment register of thread base pointer load
 ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
 ;; ! -- print MPX prefix for jxx/call/ret instructions if required.
 
@@ -87,7 +86,6 @@
   UNSPEC_SET_RIP
   UNSPEC_SET_GOT_OFFSET
   UNSPEC_MEMORY_BLOCKAGE
-  UNSPEC_STACK_CHECK
   UNSPEC_PROBE_STACK
 
   ;; TLS support
@@ -166,8 +164,6 @@
   ;; SSP patterns
   UNSPEC_SP_SET
   UNSPEC_SP_TEST
-  UNSPEC_SP_TLS_SET
-  UNSPEC_SP_TLS_TEST
 
   ;; For ROUND support
   UNSPEC_ROUND
@@ -201,6 +197,7 @@
 ])
 
 (define_c_enum "unspecv" [
+  UNSPECV_UD2
   UNSPECV_BLOCKAGE
   UNSPECV_STACK_PROBE
   UNSPECV_PROBE_STACK_RANGE
@@ -12632,20 +12629,17 @@
   [(set (pc) (if_then_else
 	      (ltu (minus (reg SP_REG)
 			  (match_operand 0 "register_operand"))
-		   (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+		   (match_dup 2))
 	      (label_ref (match_operand 1))
 	      (pc)))]
   ""
 {
-  rtx reg, size, limit;
+  rtx reg = gen_reg_rtx (Pmode);
 
-  reg = gen_reg_rtx (Pmode);
-  size = force_reg (Pmode, operands[0]);
-  emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size));
-  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
-			  UNSPEC_STACK_CHECK);
-  limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit));
-  ix86_expand_branch (GEU, reg, limit, operands[1]);
+  emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0]));
+
+  operands[2] = ix86_split_stack_guard ();
+  ix86_expand_branch (GEU, reg, operands[2], operands[1]);
 
   DONE;
 })
@@ -13779,82 +13773,78 @@
       (clobber (match_dup 5))
       (clobber (reg:CC FLAGS_REG))])])
 
-;; Segment register for the thread base ptr load
-(define_mode_attr tp_seg [(SI "gs") (DI "fs")])
-
 ;; Load and add the thread base pointer from %<tp_seg>:0.
-(define_insn "*load_tp_x32"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(const_int 0)] UNSPEC_TP))]
-  "TARGET_X32"
-  "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
+(define_insn_and_split "*load_tp_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=r")
+	(unspec:PTR [(const_int 0)] UNSPEC_TP))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0)
+	(match_dup 1))]
+{
+  addr_space_t as = DEFAULT_TLS_SEG_REG;
+
+  operands[1] = gen_const_mem (<MODE>mode, const0_rtx);
+  set_mem_addr_space (operands[1], as);
+})
 
-(define_insn "*load_tp_x32_zext"
+(define_insn_and_split "*load_tp_x32_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))]
+	(zero_extend:DI
+	  (unspec:SI [(const_int 0)] UNSPEC_TP)))]
   "TARGET_X32"
-  "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
+  "#"
+  ""
+  [(set (match_dup 0)
+	(zero_extend:DI (match_dup 1)))]
+{
+  addr_space_t as = DEFAULT_TLS_SEG_REG;
 
-(define_insn "*load_tp_<mode>"
-  [(set (match_operand:P 0 "register_operand" "=r")
-	(unspec:P [(const_int 0)] UNSPEC_TP))]
-  "!TARGET_X32"
-  "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
-  [(set_attr "type" "imov")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
+  operands[1] = gen_const_mem (SImode, const0_rtx);
+  set_mem_addr_space (operands[1], as);
+})
 
-(define_insn "*add_tp_x32"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
-		 (match_operand:SI 1 "register_operand" "0")))
+(define_insn_and_split "*add_tp_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=r")
+	(plus:PTR
+	  (unspec:PTR [(const_int 0)] UNSPEC_TP)
+	  (match_operand:PTR 1 "register_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_X32"
-  "add{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}"
-  [(set_attr "type" "alu")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
+  ""
+  "#"
+  ""
+  [(parallel
+     [(set (match_dup 0)
+	   (plus:PTR (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  addr_space_t as = DEFAULT_TLS_SEG_REG;
+
+  operands[2] = gen_const_mem (<MODE>mode, const0_rtx);
+  set_mem_addr_space (operands[2], as);
+})
 
-(define_insn "*add_tp_x32_zext"
+(define_insn_and_split "*add_tp_x32_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
 	  (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
 		   (match_operand:SI 1 "register_operand" "0"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_X32"
-  "add{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}"
-  [(set_attr "type" "alu")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
+  "#"
+  ""
+  [(parallel
+     [(set (match_dup 0)
+     	   (zero_extend:DI
+	     (plus:SI (match_dup 1) (match_dup 2))))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  addr_space_t as = DEFAULT_TLS_SEG_REG;
 
-(define_insn "*add_tp_<mode>"
-  [(set (match_operand:P 0 "register_operand" "=r")
-	(plus:P (unspec:P [(const_int 0)] UNSPEC_TP)
-		(match_operand:P 1 "register_operand" "0")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_X32"
-  "add{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
-  [(set_attr "type" "alu")
-   (set_attr "modrm" "0")
-   (set_attr "length" "7")
-   (set_attr "memory" "load")
-   (set_attr "imm_disp" "false")])
+  operands[2] = gen_const_mem (SImode, const0_rtx);
+  set_mem_addr_space (operands[2], as);
+})
 
 ;; The Sun linker took the AMD64 TLS spec literally and can only handle
 ;; %rax as destination of the initial executable code sequence.
@@ -18606,6 +18596,18 @@
 }
   [(set_attr "length" "2")])
 
+(define_insn "ud2"
+  [(unspec_volatile [(const_int 0)] UNSPECV_UD2)]
+  ""
+{
+#ifdef HAVE_AS_IX86_UD2
+  return "ud2";
+#else
+  return ASM_SHORT "0x0b0f";
+#endif
+}
+  [(set_attr "length" "2")])
+
 (define_expand "prefetch"
   [(prefetch (match_operand 0 "address_operand")
 	     (match_operand:SI 1 "const_int_operand")
@@ -18705,16 +18707,9 @@
 {
   rtx (*insn)(rtx, rtx);
 
-#ifdef TARGET_THREAD_SSP_OFFSET
-  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
-  insn = (TARGET_LP64
-	  ? gen_stack_tls_protect_set_di
-	  : gen_stack_tls_protect_set_si);
-#else
   insn = (TARGET_LP64
 	  ? gen_stack_protect_set_di
 	  : gen_stack_protect_set_si);
-#endif
 
   emit_insn (insn (operands[0], operands[1]));
   DONE;
@@ -18730,16 +18725,6 @@
   "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
   [(set_attr "type" "multi")])
 
-(define_insn "stack_tls_protect_set_<mode>"
-  [(set (match_operand:PTR 0 "memory_operand" "=m")
-	(unspec:PTR [(match_operand:PTR 1 "const_int_operand" "i")]
-		    UNSPEC_SP_TLS_SET))
-   (set (match_scratch:PTR 2 "=&r") (const_int 0))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "mov{<imodesuffix>}\t{%@:%P1, %2|%2, <iptrsize> PTR %@:%P1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
-  [(set_attr "type" "multi")])
-
 (define_expand "stack_protect_test"
   [(match_operand 0 "memory_operand")
    (match_operand 1 "memory_operand")
@@ -18750,16 +18735,9 @@
 
   rtx (*insn)(rtx, rtx, rtx);
 
-#ifdef TARGET_THREAD_SSP_OFFSET
-  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
-  insn = (TARGET_LP64
-	  ? gen_stack_tls_protect_test_di
-	  : gen_stack_tls_protect_test_si);
-#else
   insn = (TARGET_LP64
 	  ? gen_stack_protect_test_di
 	  : gen_stack_protect_test_si);
-#endif
 
   emit_insn (insn (flags, operands[0], operands[1]));
 
@@ -18778,16 +18756,6 @@
   "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%2, %3|%3, %2}"
   [(set_attr "type" "multi")])
 
-(define_insn "stack_tls_protect_test_<mode>"
-  [(set (match_operand:CCZ 0 "flags_reg_operand")
-	(unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
-		     (match_operand:PTR 2 "const_int_operand" "i")]
-		    UNSPEC_SP_TLS_TEST))
-   (clobber (match_scratch:PTR 3 "=r"))]
-  ""
-  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%@:%P2, %3|%3, <iptrsize> PTR %@:%P2}"
-  [(set_attr "type" "multi")])
-
 (define_insn "sse4_2_crc32<mode>"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index adc75f36602..cd564315f04 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -924,6 +924,24 @@ Enum(stack_protector_guard) String(tls) Value(SSP_TLS)
 EnumValue
 Enum(stack_protector_guard) String(global) Value(SSP_GLOBAL)
 
+mstack-protector-guard-reg=
+Target RejectNegative Joined Var(ix86_stack_protector_guard_reg_str)
+Use the given base register for addressing the stack-protector guard.
+
+TargetVariable
+addr_space_t ix86_stack_protector_guard_reg = ADDR_SPACE_GENERIC
+
+mstack-protector-guard-offset=
+Target RejectNegative Joined Integer Var(ix86_stack_protector_guard_offset_str)
+Use the given offset for addressing the stack-protector guard.
+
+TargetVariable
+HOST_WIDE_INT ix86_stack_protector_guard_offset = 0
+
+mstack-protector-guard-symbol=
+Target RejectNegative Joined Integer Var(ix86_stack_protector_guard_symbol_str)
+Use the given symbol for addressing the stack-protector guard.
+
 mmitigate-rop
 Target Var(flag_mitigate_rop) Init(0)
 Attempt to avoid generating instruction sequences containing ret bytes.
diff --git a/gcc/config/i386/mingw.opt b/gcc/config/i386/mingw.opt
index 210c14f549e..97a9baa6d7a 100644
--- a/gcc/config/i386/mingw.opt
+++ b/gcc/config/i386/mingw.opt
@@ -28,8 +28,4 @@ Wpedantic-ms-format
 C ObjC C++ ObjC++ Var(warn_pedantic_ms_format) Init(1) Warning
 Warn about none ISO msvcrt scanf/printf width extensions.
 
-fset-stack-executable
-Common Report Var(flag_setstackexecutable) Init(1) Optimization
-For nested functions on stack executable permission is set.
-
 ; Need to retain blank line above.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7a1789f0b47..b3f3633e964 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -641,7 +641,7 @@
   [(set (match_dup 0) (match_dup 1))]
   "operands[1] = adjust_address (operands[1], SFmode, 4);")
 
-(define_expand "vec_extractv2sf"
+(define_expand "vec_extractv2sfsf"
   [(match_operand:SF 0 "register_operand")
    (match_operand:V2SF 1 "register_operand")
    (match_operand 2 "const_int_operand")]
@@ -652,7 +652,7 @@
   DONE;
 })
 
-(define_expand "vec_initv2sf"
+(define_expand "vec_initv2sfsf"
   [(match_operand:V2SF 0 "register_operand")
    (match_operand 1)]
   "TARGET_SSE"
@@ -1344,7 +1344,7 @@
   operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
 })
 
-(define_expand "vec_extractv2si"
+(define_expand "vec_extractv2sisi"
   [(match_operand:SI 0 "register_operand")
    (match_operand:V2SI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
@@ -1355,7 +1355,7 @@
   DONE;
 })
 
-(define_expand "vec_initv2si"
+(define_expand "vec_initv2sisi"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand 1)]
   "TARGET_SSE"
@@ -1375,7 +1375,7 @@
   DONE;
 })
 
-(define_expand "vec_extractv4hi"
+(define_expand "vec_extractv4hihi"
   [(match_operand:HI 0 "register_operand")
    (match_operand:V4HI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
@@ -1386,7 +1386,7 @@
   DONE;
 })
 
-(define_expand "vec_initv4hi"
+(define_expand "vec_initv4hihi"
   [(match_operand:V4HI 0 "register_operand")
    (match_operand 1)]
   "TARGET_SSE"
@@ -1406,7 +1406,7 @@
   DONE;
 })
 
-(define_expand "vec_extractv8qi"
+(define_expand "vec_extractv8qiqi"
   [(match_operand:QI 0 "register_operand")
    (match_operand:V8QI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
@@ -1417,7 +1417,7 @@
   DONE;
 })
 
-(define_expand "vec_initv8qi"
+(define_expand "vec_initv8qiqi"
   [(match_operand:V8QI 0 "register_operand")
    (match_operand 1)]
   "TARGET_SSE"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 56b7f436d5d..253ff5d5a7d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -658,13 +658,21 @@
 
 ;; Mapping of vector modes to a vector mode of half size
 (define_mode_attr ssehalfvecmode
-  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
+  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
    (V32QI "V16QI") (V16HI  "V8HI") (V8SI  "V4SI") (V4DI "V2DI")
    (V16QI  "V8QI") (V8HI   "V4HI") (V4SI  "V2SI")
    (V16SF "V8SF") (V8DF "V4DF")
    (V8SF  "V4SF") (V4DF "V2DF")
    (V4SF  "V2SF")])
 
+(define_mode_attr ssehalfvecmodelower
+  [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
+   (V32QI "v16qi") (V16HI  "v8hi") (V8SI  "v4si") (V4DI "v2di")
+   (V16QI  "v8qi") (V8HI   "v4hi") (V4SI  "v2si")
+   (V16SF "v8sf") (V8DF "v4df")
+   (V8SF  "v4sf") (V4DF "v2df")
+   (V4SF  "v2sf")])
+
 ;; Mapping of vector modes ti packed single mode of the same size
 (define_mode_attr ssePSmode
   [(V16SI "V16SF") (V8DF "V16SF")
@@ -690,6 +698,16 @@
    (V8DF "DF")  (V4DF "DF")  (V2DF "DF")
    (V4TI "TI")  (V2TI "TI")])
 
+;; Mapping of vector modes back to the scalar modes
+(define_mode_attr ssescalarmodelower
+  [(V64QI "qi") (V32QI "qi") (V16QI "qi")
+   (V32HI "hi") (V16HI "hi") (V8HI "hi")
+   (V16SI "si") (V8SI "si")  (V4SI "si")
+   (V8DI "di")  (V4DI "di")  (V2DI "di")
+   (V16SF "sf") (V8SF "sf")  (V4SF "sf")
+   (V8DF "df")  (V4DF "df")  (V2DF "df")
+   (V4TI "ti")  (V2TI "ti")])
+
 ;; Mapping of vector modes to the 128bit modes
 (define_mode_attr ssexmmmode
   [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
@@ -2356,7 +2374,7 @@
 {
   rtx tmp = gen_reg_rtx (V8DFmode);
   ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
-  emit_insn (gen_vec_extractv8df (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
   DONE;
 })
 
@@ -2371,7 +2389,7 @@
   emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
   emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
   emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
-  emit_insn (gen_vec_extractv4df (operands[0], vec_res, const0_rtx));
+  emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
   DONE;
 })
 
@@ -2382,7 +2400,7 @@
 {
   rtx tmp = gen_reg_rtx (V2DFmode);
   emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
-  emit_insn (gen_vec_extractv2df (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
   DONE;
 })
 
@@ -2393,7 +2411,7 @@
 {
   rtx tmp = gen_reg_rtx (V16SFmode);
   ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
-  emit_insn (gen_vec_extractv16sf (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
   DONE;
 })
 
@@ -2409,7 +2427,7 @@
   emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
   emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
   emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
-  emit_insn (gen_vec_extractv8sf (operands[0], vec_res, const0_rtx));
+  emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
   DONE;
 })
 
@@ -2427,7 +2445,7 @@
     }
   else
     ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
-  emit_insn (gen_vec_extractv4sf (operands[0], vec_res, const0_rtx));
+  emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
   DONE;
 })
 
@@ -2449,7 +2467,8 @@
 {
   rtx tmp = gen_reg_rtx (<MODE>mode);
   ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
-  emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
+							const0_rtx));
   DONE;
 })
 
@@ -2461,7 +2480,8 @@
 {
   rtx tmp = gen_reg_rtx (<MODE>mode);
   ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
-  emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
+  							const0_rtx));
   DONE;
 })
 
@@ -2473,7 +2493,8 @@
 {
   rtx tmp = gen_reg_rtx (<MODE>mode);
   ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
-  emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
+							const0_rtx));
   DONE;
 })
 
@@ -2485,7 +2506,7 @@
 {
   rtx tmp = gen_reg_rtx (V8HImode);
   ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
-  emit_insn (gen_vec_extractv8hi (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
   DONE;
 })
 
@@ -7881,7 +7902,7 @@
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
    (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><ssescalarmodelower>"
   [(match_operand:<ssescalarmode> 0 "register_operand")
    (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
    (match_operand 2 "const_int_operand")]
@@ -7892,6 +7913,19 @@
   DONE;
 })
 
+(define_expand "vec_extract<mode><ssehalfvecmodelower>"
+  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+   (match_operand:V_512 1 "register_operand")
+   (match_operand 2 "const_0_to_1_operand")]
+  "TARGET_AVX512F"
+{
+  if (INTVAL (operands[2]))
+    emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+  else
+    emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel double-precision floating point element swizzling
@@ -16693,7 +16727,7 @@
       for (i = 0; i < <ssescalarnum>; i++)
 	RTVEC_ELT (vs, i) = op2;
 
-      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
       emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
       DONE;
     }
@@ -16725,7 +16759,7 @@
       for (i = 0; i < <ssescalarnum>; i++)
 	RTVEC_ELT (vs, i) = op2;
 
-      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
       emit_insn (gen_neg<mode>2 (neg, reg));
       emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
       DONE;
@@ -17019,7 +17053,7 @@
         XVECEXP (par, 0, i) = operands[2];
 
       tmp = gen_reg_rtx (V16QImode);
-      emit_insn (gen_vec_initv16qi (tmp, par));
+      emit_insn (gen_vec_initv16qiqi (tmp, par));
 
       if (negate)
 	emit_insn (gen_negv16qi2 (tmp, tmp));
@@ -17055,7 +17089,7 @@
       for (i = 0; i < 2; i++)
 	XVECEXP (par, 0, i) = operands[2];
 
-      emit_insn (gen_vec_initv2di (reg, par));
+      emit_insn (gen_vec_initv2didi (reg, par));
 
       if (negate)
 	emit_insn (gen_negv2di2 (reg, reg));
@@ -18775,7 +18809,7 @@
 				  <ssehalfvecmode>mode);
 })
 
-;; Modes handled by vec_init patterns.
+;; Modes handled by vec_init expanders.
 (define_mode_iterator VEC_INIT_MODE
   [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
@@ -18785,7 +18819,18 @@
    (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
    (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
 
-(define_expand "vec_init<mode>"
+;; Likewise, but for initialization from half sized vectors.
+;; Thus, these are all VEC_INIT_MODE modes except V2??.
+(define_mode_iterator VEC_INIT_HALF_MODE
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
+   (V4TI "TARGET_AVX512F")])
+
+(define_expand "vec_init<mode><ssescalarmodelower>"
   [(match_operand:VEC_INIT_MODE 0 "register_operand")
    (match_operand 1)]
   "TARGET_SSE"
@@ -18794,6 +18839,15 @@
   DONE;
 })
 
+(define_expand "vec_init<mode><ssehalfvecmodelower>"
+  [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
   [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
 	(ashiftrt:VI48_AVX512F_AVX512VL
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index 8272c7fddc1..405f74a7597 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "emit-rtl.h"
 #include "cgraph.h"
 #include "lto-streamer.h"
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index c8e4c74bbdb..79c323f67ee 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index a565df53bfa..8e76864a77f 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -1015,7 +1015,7 @@
 }
   [(set_attr "itanium_class" "mmshf")])
 
-(define_expand "vec_initv2si"
+(define_expand "vec_initv2sisi"
   [(match_operand:V2SI 0 "gr_register_operand" "")
    (match_operand 1 "" "")]
   ""
@@ -1299,7 +1299,7 @@
   "fselect %0 = %F2, %F3, %1"
   [(set_attr "itanium_class" "fmisc")])
 
-(define_expand "vec_initv2sf"
+(define_expand "vec_initv2sfsf"
   [(match_operand:V2SF 0 "fr_register_operand" "")
    (match_operand 1 "" "")]
   ""
@@ -1483,7 +1483,7 @@
   operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
 })
 
-(define_expand "vec_extractv2sf"
+(define_expand "vec_extractv2sfsf"
   [(set (match_operand:SF 0 "register_operand" "")
 	(unspec:SF [(match_operand:V2SF 1 "register_operand" "")
 		    (match_operand:DI 2 "const_int_operand" "")]
diff --git a/gcc/config/iq2000/iq2000.c b/gcc/config/iq2000/iq2000.c
index 99abd76c5a6..5a92164ef05 100644
--- a/gcc/config/iq2000/iq2000.c
+++ b/gcc/config/iq2000/iq2000.c
@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/lm32/lm32.c b/gcc/config/lm32/lm32.c
index 2231412e566..214cc0ac5fd 100644
--- a/gcc/config/lm32/lm32.c
+++ b/gcc/config/lm32/lm32.c
@@ -26,6 +26,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c
index b23f5aaf21a..95e97abf533 100644
--- a/gcc/config/m32c/m32c.c
+++ b/gcc/config/m32c/m32c.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c
index 4efb4b9c287..d7893d7550a 100644
--- a/gcc/config/m32r/m32r.c
+++ b/gcc/config/m32r/m32r.c
@@ -28,6 +28,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "insn-config.h"
 #include "emit-rtl.h"
 #include "recog.h"
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
index c14ce86d810..89726655122 100644
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -23,6 +23,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "backend.h"
 #include "cfghooks.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "rtl.h"
 #include "df.h"
 #include "alias.h"
diff --git a/gcc/config/mcore/mcore.c b/gcc/config/mcore/mcore.c
index c4b7c4cf94f..e67376fb6aa 100644
--- a/gcc/config/mcore/mcore.c
+++ b/gcc/config/mcore/mcore.c
@@ -28,6 +28,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "emit-rtl.h"
 #include "diagnostic-core.h"
 #include "stor-layout.h"
diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c
index 15ceac0b346..2cdd24056a5 100644
--- a/gcc/config/microblaze/microblaze.c
+++ b/gcc/config/microblaze/microblaze.c
@@ -26,6 +26,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md
index 85842551b0d..b48dfa0dc71 100644
--- a/gcc/config/mips/loongson.md
+++ b/gcc/config/mips/loongson.md
@@ -119,7 +119,7 @@
 
 ;; Initialization of a vector.
 
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><unitmode>"
   [(set (match_operand:VWHB 0 "register_operand")
 	(match_operand 1 ""))]
   "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index c80be471026..87d889d7296 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -231,7 +231,7 @@
    (V4SI  "uimm5")
    (V2DI  "uimm6")])
 
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><unitmode>"
   [(match_operand:MSA 0 "register_operand")
    (match_operand:MSA 1 "")]
   "ISA_HAS_MSA"
@@ -311,7 +311,7 @@
   DONE;
 })
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><unitmode>"
   [(match_operand:<UNITMODE> 0 "register_operand")
    (match_operand:IMSA 1 "register_operand")
    (match_operand 2 "const_<indeximm>_operand")]
@@ -329,7 +329,7 @@
   DONE;
 })
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><unitmode>"
   [(match_operand:<UNITMODE> 0 "register_operand")
    (match_operand:FMSA 1 "register_operand")
    (match_operand 2 "const_<indeximm>_operand")]
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index 64733984170..81820b13b11 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -254,7 +254,7 @@
 })
 
 ; vec_init
-(define_expand "vec_initv2sf"
+(define_expand "vec_initv2sfsf"
   [(match_operand:V2SF 0 "register_operand")
    (match_operand:V2SF 1 "")]
   "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
@@ -282,7 +282,7 @@
 ;; emulated.  There is no other way to get a vector mode bitfield extract
 ;; currently.
 
-(define_insn "vec_extractv2sf"
+(define_insn "vec_extractv2sfsf"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(vec_select:SF (match_operand:V2SF 1 "register_operand" "f")
 		       (parallel
@@ -379,7 +379,7 @@
     rtx temp = gen_reg_rtx (V2SFmode);
     emit_insn (gen_mips_addr_ps (temp, operands[1], operands[1]));
     rtx lane = BYTES_BIG_ENDIAN ? const1_rtx : const0_rtx;
-    emit_insn (gen_vec_extractv2sf (operands[0], temp, lane));
+    emit_insn (gen_vec_extractv2sfsf (operands[0], temp, lane));
     DONE;
   })
 
@@ -757,7 +757,7 @@
   rtx temp = gen_reg_rtx (V2SFmode);
   mips_expand_vec_reduc (temp, operands[1], gen_sminv2sf3);
   rtx lane = BYTES_BIG_ENDIAN ? const1_rtx : const0_rtx;
-  emit_insn (gen_vec_extractv2sf (operands[0], temp, lane));
+  emit_insn (gen_vec_extractv2sfsf (operands[0], temp, lane));
   DONE;
 })
 
@@ -769,6 +769,6 @@
   rtx temp = gen_reg_rtx (V2SFmode);
   mips_expand_vec_reduc (temp, operands[1], gen_smaxv2sf3);
   rtx lane = BYTES_BIG_ENDIAN ? const1_rtx : const0_rtx;
-  emit_insn (gen_vec_extractv2sf (operands[0], temp, lane));
+  emit_insn (gen_vec_extractv2sfsf (operands[0], temp, lane));
   DONE;
 })
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 6bfd86a07af..d2737a6ee80 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 971af6f8e09..f45c3eb98ce 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -917,6 +917,11 @@
 			    (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
 			    (V2DF "DF")])
 
+;; As above, but in lower case.
+(define_mode_attr unitmode [(SF "sf") (DF "df") (V2SF "sf") (V4SF "sf")
+			    (V16QI "qi") (V8QI "qi") (V8HI "hi") (V4HI "hi")
+			    (V4SI "si") (V2SI "si") (V2DI "di") (V2DF "df")])
+
 ;; This attribute gives the integer mode that has the same size as a
 ;; fixed-point mode.
 (define_mode_attr IMODE [(QQ "QI") (HQ "HI") (SQ "SI") (DQ "DI")
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
index 9849c19e076..6ca2fd9441c 100644
--- a/gcc/config/mmix/mmix.c
+++ b/gcc/config/mmix/mmix.c
@@ -25,6 +25,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index 301207fa66a..f46caac7e94 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "cfghooks.h"
 #include "cfgloop.h"
 #include "df.h"
diff --git a/gcc/config/moxie/moxie.c b/gcc/config/moxie/moxie.c
index 70d6d7e2eaf..19cd83f5193 100644
--- a/gcc/config/moxie/moxie.c
+++ b/gcc/config/moxie/moxie.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "regs.h"
 #include "memmodel.h"
diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c
index 6acab1e70cb..1ab79a722ed 100644
--- a/gcc/config/msp430/msp430.c
+++ b/gcc/config/msp430/msp430.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "gimple-expr.h"
 #include "df.h"
 #include "memmodel.h"
diff --git a/gcc/config/nds32/nds32-isr.c b/gcc/config/nds32/nds32-isr.c
index 29e94d004c6..7d7b9e27ca6 100644
--- a/gcc/config/nds32/nds32-isr.c
+++ b/gcc/config/nds32/nds32-isr.c
@@ -27,6 +27,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "diagnostic-core.h"
 #include "output.h"
 
diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c
index 705d223e496..14310de8672 100644
--- a/gcc/config/nds32/nds32.c
+++ b/gcc/config/nds32/nds32.c
@@ -27,6 +27,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/nios2/nios2.c b/gcc/config/nios2/nios2.c
index 2fc9a080402..884b1dc367e 100644
--- a/gcc/config/nios2/nios2.c
+++ b/gcc/config/nios2/nios2.c
@@ -27,6 +27,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 208b11555f2..8babac75bc3 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -62,6 +62,7 @@
 #include "internal-fn.h"
 #include "gimple-iterator.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "tree-vrp.h"
 #include "tree-ssa-operands.h"
 #include "tree-ssanames.h"
@@ -180,6 +181,10 @@ nvptx_option_override (void)
   if (!global_options_set.x_flag_no_common)
     flag_no_common = 1;
 
+  /* The patch area requires nops, which we don't have.  */
+  if (function_entry_patch_area_size > 0)
+    sorry ("not generating patch area, nops not supported");
+
   /* Assumes that it will see only hard registers.  */
   flag_var_tracking = 0;
 
@@ -207,17 +212,6 @@ nvptx_option_override (void)
     target_flags |= MASK_SOFT_STACK | MASK_UNIFORM_SIMT;
 }
 
-/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE.  */
-
-static void
-nvptx_override_options_after_change (void)
-{
-  /* This is a workaround for PR81430 - nvptx acceleration compilation broken
-     because of running pass_partition_blocks.  This should be dealt with in the
-     common code, not in the target.  */
-  flag_reorder_blocks_and_partition = 0;
-}
-
 /* Return a ptx type for MODE.  If PROMOTE, then use .u32 for QImode to
    deal with ptx ideosyncracies.  */
 
@@ -5072,7 +5066,9 @@ nvptx_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
   *gsi = gsi_for_stmt (gsi_stmt (*gsi));
 
   post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
+  post_edge->probability = profile_probability::even ();
   edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE);
+  loop_edge->probability = profile_probability::even ();
   set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb);
   set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
 
@@ -5145,7 +5141,9 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi,
   
   /* Create the lock loop ... */
   locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
-  make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE);
+  locked_edge->probability = profile_probability::even ();
+  edge loop_edge = make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE);
+  loop_edge->probability = profile_probability::even ();
   set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb);
   set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb);
 
@@ -5518,9 +5516,6 @@ nvptx_data_alignment (const_tree type, unsigned int basic_align)
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
 
-#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
-#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE nvptx_override_options_after_change
-
 #undef TARGET_ATTRIBUTE_TABLE
 #define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table
 
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 2a78018650c..52f76cfd5f1 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c
index c2ce6e8abe5..fbbb34390f4 100644
--- a/gcc/config/pdp11/pdp11.c
+++ b/gcc/config/pdp11/pdp11.c
@@ -25,6 +25,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/powerpcspe/altivec.md b/gcc/config/powerpcspe/altivec.md
index 649f1810d16..e98309a8ad4 100644
--- a/gcc/config/powerpcspe/altivec.md
+++ b/gcc/config/powerpcspe/altivec.md
@@ -301,7 +301,7 @@
   for (i = 0; i < num_elements; i++)
     RTVEC_ELT (v, i) = constm1_rtx;
 
-  emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v)));
+  emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v)));
   emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest)));
   DONE;
 })
@@ -2222,7 +2222,7 @@
   RTVEC_ELT (v, 2) = GEN_INT (mask_val);
   RTVEC_ELT (v, 3) = GEN_INT (mask_val);
 
-  emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v)));
+  emit_insn (gen_vec_initv4sisi (mask, gen_rtx_PARALLEL (V4SImode, v)));
   emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2],
 				     gen_lowpart (V4SFmode, mask)));
   DONE;
@@ -3014,7 +3014,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  0);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3050,7 +3050,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ?  6 : 17);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3086,7 +3086,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  8);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3122,7 +3122,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3363,7 +3363,7 @@
      = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 17 : 15 - 2 * i);
   }
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2]));
   emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2]));
   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask));
diff --git a/gcc/config/powerpcspe/paired.md b/gcc/config/powerpcspe/paired.md
index 09123eec887..e12f07fc9b8 100644
--- a/gcc/config/powerpcspe/paired.md
+++ b/gcc/config/powerpcspe/paired.md
@@ -377,7 +377,7 @@
   "ps_muls1 %0, %1, %2"
   [(set_attr "type" "fp")])
 
-(define_expand "vec_initv2sf"
+(define_expand "vec_initv2sfsf"
   [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
    (match_operand 1 "" "")]
   "TARGET_PAIRED_FLOAT"
diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c
index b94afd5ca2e..f2840894fc1 100644
--- a/gcc/config/powerpcspe/powerpcspe.c
+++ b/gcc/config/powerpcspe/powerpcspe.c
@@ -31,6 +31,7 @@
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "expmed.h"
 #include "optabs.h"
 #include "regs.h"
diff --git a/gcc/config/powerpcspe/vector.md b/gcc/config/powerpcspe/vector.md
index e6489a861cd..6c43186f8aa 100644
--- a/gcc/config/powerpcspe/vector.md
+++ b/gcc/config/powerpcspe/vector.md
@@ -74,6 +74,16 @@
 			    (V1TI  "TI")
 			    (TI    "TI")])
 
+;; As above, but in lower case
+(define_mode_attr VEC_base_l [(V16QI "qi")
+			      (V8HI  "hi")
+			      (V4SI  "si")
+			      (V2DI  "di")
+			      (V4SF  "sf")
+			      (V2DF  "df")
+			      (V1TI  "ti")
+			      (TI    "ti")])
+
 ;; Same size integer type for floating point data
 (define_mode_attr VEC_int [(V4SF  "v4si")
 			   (V2DF  "v2di")])
@@ -1017,7 +1027,7 @@
 
 
 ;; Vector initialization, set, extract
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><VEC_base_l>"
   [(match_operand:VEC_E 0 "vlogical_operand" "")
    (match_operand:VEC_E 1 "" "")]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
@@ -1036,7 +1046,7 @@
   DONE;
 })
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><VEC_base_l>"
   [(match_operand:<VEC_base> 0 "register_operand" "")
    (match_operand:VEC_E 1 "vlogical_operand" "")
    (match_operand 2 "const_int_operand" "")]
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 57b2edbcb43..cbf2f79bc4d 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -31,6 +31,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "alias.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "varasm.h"
 #include "stor-layout.h"
 #include "calls.h"
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index 460775cb995..f93116230d5 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -29,6 +29,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "emit-rtl.h"
 #include "recog.h"
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 4d34a97c9ae..c8e508cf0a0 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -355,6 +355,7 @@
 #define vec_vsx_ld __builtin_vec_vsx_ld
 #define vec_vsx_st __builtin_vec_vsx_st
 #define vec_xl __builtin_vec_vsx_ld
+#define vec_xl_be __builtin_vec_xl_be
 #define vec_xst __builtin_vec_vsx_st
 
 /* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index c14cb92f0f3..4077afdadb6 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -311,7 +311,7 @@
   for (i = 0; i < num_elements; i++)
     RTVEC_ELT (v, i) = constm1_rtx;
 
-  emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v)));
+  emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v)));
   emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest)));
   DONE;
 })
@@ -2267,7 +2267,7 @@
   RTVEC_ELT (v, 2) = GEN_INT (mask_val);
   RTVEC_ELT (v, 3) = GEN_INT (mask_val);
 
-  emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v)));
+  emit_insn (gen_vec_initv4sisi (mask, gen_rtx_PARALLEL (V4SImode, v)));
   emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2],
 				     gen_lowpart (V4SFmode, mask)));
   DONE;
@@ -3409,7 +3409,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  0);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3445,7 +3445,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ?  6 : 17);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3481,7 +3481,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  8);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3517,7 +3517,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3758,7 +3758,7 @@
      = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 17 : 15 - 2 * i);
   }
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2]));
   emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2]));
   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask));
@@ -3804,7 +3804,7 @@
       RTVEC_ELT (v, i + j * size)
 	= GEN_INT (i + (num_elements - 1 - j) * size);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
 	     operands[1], mask));
   DONE;
diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md
index c9f95867c0f..b0aa329d7b8 100644
--- a/gcc/config/rs6000/paired.md
+++ b/gcc/config/rs6000/paired.md
@@ -377,7 +377,7 @@
   "ps_muls1 %0, %1, %2"
   [(set_attr "type" "fp")])
 
-(define_expand "vec_initv2sf"
+(define_expand "vec_initv2sfsf"
   [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
    (match_operand 1 "" "")]
   "TARGET_PAIRED_FLOAT"
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a043e70f93b..850164a0987 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1735,6 +1735,14 @@ BU_VSX_X (LXVW4X_V4SF,	      "lxvw4x_v4sf",	MEM)
 BU_VSX_X (LXVW4X_V4SI,        "lxvw4x_v4si",	MEM)
 BU_VSX_X (LXVW4X_V8HI,        "lxvw4x_v8hi",	MEM)
 BU_VSX_X (LXVW4X_V16QI,	      "lxvw4x_v16qi",	MEM)
+
+BU_VSX_X (XL_BE_V16QI, "xl_be_v16qi", MEM)
+BU_VSX_X (XL_BE_V8HI, "xl_be_v8hi", MEM)
+BU_VSX_X (XL_BE_V4SI, "xl_be_v4si", MEM)
+BU_VSX_X (XL_BE_V2DI, "xl_be_v2di", MEM)
+BU_VSX_X (XL_BE_V4SF, "xl_be_v4sf", MEM)
+BU_VSX_X (XL_BE_V2DF, "xl_be_v2df", MEM)
+
 BU_VSX_X (STXSDX,	      "stxsdx",		MEM)
 BU_VSX_X (STXVD2X_V1TI,	      "stxvd2x_v1ti",	MEM)
 BU_VSX_X (STXVD2X_V2DF,	      "stxvd2x_v2df",	MEM)
@@ -1835,6 +1843,7 @@ BU_VSX_OVERLOAD_1 (VUNSIGNEDO,  "vunsignedo")
 BU_VSX_OVERLOAD_X (LD,	     "ld")
 BU_VSX_OVERLOAD_X (ST,	     "st")
 BU_VSX_OVERLOAD_X (XL,	     "xl")
+BU_VSX_OVERLOAD_X (XL_BE,    "xl_be")
 BU_VSX_OVERLOAD_X (XST,	     "xst")
 
 /* 2 argument CMPB instructions added in ISA 2.05. */
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 1359099366d..11febbb4d46 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -3077,6 +3077,26 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     ~RS6000_BTI_unsigned_V16QI, 0 },
   { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
@@ -5852,6 +5872,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       tree arg1 = (*arglist)[1];
       tree arg1_type = TREE_TYPE (arg1);
 
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
       /* Power9 instructions provide the most efficient implementation of
 	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
 	 or SFmode or DFmode.  */
@@ -5861,12 +5887,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
 	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
 	{
-	  /* Both arguments must be vectors and the types must be compatible.  */
-	  if (TREE_CODE (arg0_type) != VECTOR_TYPE)
-	    goto bad;
-	  if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
-	    goto bad;
-
 	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
 	    {
 	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
@@ -5931,8 +5951,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	 __int128) and the types must be compatible.  */
       if (TREE_CODE (arg0_type) != VECTOR_TYPE)
 	goto bad;
-      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) ||
-	  !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
 	goto bad;
 
       switch (TYPE_MODE (TREE_TYPE (arg0_type)))
@@ -6014,8 +6034,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	 __int128) and the types must be compatible.  */
       if (TREE_CODE (arg0_type) != VECTOR_TYPE)
 	goto bad;
-      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) ||
-	  !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
 	goto bad;
 
       switch (TYPE_MODE (TREE_TYPE (arg0_type)))
@@ -6464,6 +6484,9 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 
       /* Strip qualifiers like "const" from the pointer arg.  */
       tree arg1_type = TREE_TYPE (arg1);
+      if (!POINTER_TYPE_P (arg1_type) && TREE_CODE (arg1_type) != ARRAY_TYPE)
+	goto bad;
+
       tree inner_type = TREE_TYPE (arg1_type);
       if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
 	{
@@ -6552,11 +6575,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	      arg2 = build1 (ADDR_EXPR, arg2_type, arg2_elt0);
 	    }
 
-	  tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type,
-				       arg2, arg1);
-	  tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, addr,
-					  build_int_cst (arg2_type, -16));
-
 	  /* Find the built-in to make sure a compatible one exists; if not
 	     we fall back to default handling to get the error message.  */
 	  for (desc = altivec_overloaded_builtins;
@@ -6569,6 +6587,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 		&& rs6000_builtin_type_compatible (TREE_TYPE (arg2),
 						   desc->op3))
 	      {
+		tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type,
+					     arg2, arg1);
+		tree aligned
+		  = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type,
+				     addr, build_int_cst (arg2_type, -16));
+
 		tree arg0_type = TREE_TYPE (arg0);
 		if (TYPE_MODE (arg0_type) == V2DImode)
 		  /* Type-based aliasing analysis thinks vector long
@@ -6694,8 +6718,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	    overloaded_code = P6_BUILTIN_CMPB_32;
 	  }
 
-	while (desc->code && desc->code == fcode &&
-	       desc->overloaded_code != overloaded_code)
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
 	  desc++;
 
 	if (desc->code && (desc->code == fcode)
@@ -6741,8 +6765,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	    else
 	      overloaded_code = P9V_BUILTIN_VSIEDP;
 	  }
-	while (desc->code && desc->code == fcode &&
-	       desc->overloaded_code != overloaded_code)
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
 	  desc++;
 	if (desc->code && (desc->code == fcode)
 	    && rs6000_builtin_type_compatible (types[0], desc->op1)
@@ -6778,15 +6802,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
     if (unsupported_builtin)
       {
 	const char *name = rs6000_overloaded_builtin_name (fcode);
-	error ("Builtin function %s not supported in this compiler configuration",
+	error ("builtin function %s not supported in this compiler configuration",
 	       name);
 	return error_mark_node;
       }
   }
  bad:
-    {
-      const char *name = rs6000_overloaded_builtin_name (fcode);
-      error ("invalid parameter combination for AltiVec intrinsic %s", name);
-      return error_mark_node;
-    }
+  {
+    const char *name = rs6000_overloaded_builtin_name (fcode);
+    error ("invalid parameter combination for AltiVec intrinsic %s", name);
+    return error_mark_node;
+  }
 }
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 386a5cebc2f..74158cdd075 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -435,7 +435,7 @@ enum rs6000_reg_type {
   ALTIVEC_REG_TYPE,
   FPR_REG_TYPE,
   SPR_REG_TYPE,
-  CR_REG_TYPE,
+  CR_REG_TYPE
 };
 
 /* Map register class to register type.  */
@@ -4182,6 +4182,10 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_altivec_element_order = 0;
     }
 
+  if (!rs6000_fold_gimple)
+     fprintf (stderr,
+	      "gimple folding of rs6000 builtins has been disabled.\n");
+
   /* Add some warnings for VSX.  */
   if (TARGET_VSX)
     {
@@ -4250,7 +4254,7 @@ rs6000_option_override_internal (bool global_init_p)
 	      rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
 	    }
 	  else
-	    error ("Power9 target option is incompatible with -mcpu=<xxx> for "
+	    error ("power9 target option is incompatible with -mcpu=<xxx> for "
 		   "<xxx> less than power9");
 	}
       else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
@@ -14452,6 +14456,58 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
 }
 
 static rtx
+altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+  machine_mode mode0 = Pmode;
+  machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	  target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode,
+                          gen_rtx_PLUS (Pmode, op1, op0));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+  if (!pat)
+    return 0;
+
+  emit_insn (pat);
+  /*  Reverse element order of elements if in LE mode */
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      rtx sel = swap_selector_for_mode (tmode);
+      rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel),
+				  UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (target, vperm));
+    }
+  return target;
+}
+
+static rtx
 paired_expand_stv_builtin (enum insn_code icode, tree exp)
 {
   tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -15843,6 +15899,50 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
       /* Fall through.  */
     }
 
+  /* XL_BE  We initialized them to always load in big endian order.  */
+  switch (fcode)
+    {
+    case VSX_BUILTIN_XL_BE_V2DI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v2di;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V4SI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v4si;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V8HI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v8hi;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V16QI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v16qi;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+      break;
+    case VSX_BUILTIN_XL_BE_V2DF:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v2df;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V4SF:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v4sf;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    default:
+      break;
+      /* Fall through.  */
+    }
+
   *expandedp = false;
   return NULL_RTX;
 }
@@ -15958,51 +16058,51 @@ paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
 static void
 rs6000_invalid_builtin (enum rs6000_builtins fncode)
 {
-  size_t uns_fncode = (size_t)fncode;
+  size_t uns_fncode = (size_t) fncode;
   const char *name = rs6000_builtin_info[uns_fncode].name;
   HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
 
   gcc_assert (name != NULL);
   if ((fnmask & RS6000_BTM_CELL) != 0)
-    error ("Builtin function %s is only valid for the cell processor", name);
+    error ("builtin function %s is only valid for the cell processor", name);
   else if ((fnmask & RS6000_BTM_VSX) != 0)
-    error ("Builtin function %s requires the -mvsx option", name);
+    error ("builtin function %s requires the -mvsx option", name);
   else if ((fnmask & RS6000_BTM_HTM) != 0)
-    error ("Builtin function %s requires the -mhtm option", name);
+    error ("builtin function %s requires the -mhtm option", name);
   else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
-    error ("Builtin function %s requires the -maltivec option", name);
+    error ("builtin function %s requires the -maltivec option", name);
   else if ((fnmask & RS6000_BTM_PAIRED) != 0)
-    error ("Builtin function %s requires the -mpaired option", name);
+    error ("builtin function %s requires the -mpaired option", name);
   else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
 	   == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
-    error ("Builtin function %s requires the -mhard-dfp and"
+    error ("builtin function %s requires the -mhard-dfp and"
 	   " -mpower8-vector options", name);
   else if ((fnmask & RS6000_BTM_DFP) != 0)
-    error ("Builtin function %s requires the -mhard-dfp option", name);
+    error ("builtin function %s requires the -mhard-dfp option", name);
   else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
-    error ("Builtin function %s requires the -mpower8-vector option", name);
+    error ("builtin function %s requires the -mpower8-vector option", name);
   else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
 	   == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
-    error ("Builtin function %s requires the -mcpu=power9 and"
+    error ("builtin function %s requires the -mcpu=power9 and"
 	   " -m64 options", name);
   else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
-    error ("Builtin function %s requires the -mcpu=power9 option", name);
+    error ("builtin function %s requires the -mcpu=power9 option", name);
   else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
 	   == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
-    error ("Builtin function %s requires the -mcpu=power9 and"
+    error ("builtin function %s requires the -mcpu=power9 and"
 	   " -m64 options", name);
   else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
-    error ("Builtin function %s requires the -mcpu=power9 option", name);
+    error ("builtin function %s requires the -mcpu=power9 option", name);
   else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
 	   == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
-    error ("Builtin function %s requires the -mhard-float and"
+    error ("builtin function %s requires the -mhard-float and"
 	   " -mlong-double-128 options", name);
   else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
-    error ("Builtin function %s requires the -mhard-float option", name);
+    error ("builtin function %s requires the -mhard-float option", name);
   else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
-    error ("Builtin function %s requires the -mfloat128 option", name);
+    error ("builtin function %s requires the -mfloat128 option", name);
   else
-    error ("Builtin function %s is not supported with the current options",
+    error ("builtin function %s is not supported with the current options",
 	   name);
 }
 
@@ -16063,6 +16163,20 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
   tree arg0, arg1, lhs;
 
+  size_t uns_fncode = (size_t) fn_code;
+  enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
+  const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
+  const char *fn_name2 = (icode != CODE_FOR_nothing)
+			  ? get_insn_name ((int) icode)
+			  : "nothing";
+
+  if (TARGET_DEBUG_BUILTIN)
+      fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
+	       fn_code, fn_name1, fn_name2);
+
+  if (!rs6000_fold_gimple)
+    return false;
+
   /* Generic solution to prevent gimple folding of code without a LHS.  */
   if (!gimple_call_lhs (stmt))
     return false;
@@ -16422,6 +16536,9 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	return true;
       }
     default:
+	if (TARGET_DEBUG_BUILTIN)
+	   fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
+		    fn_code, fn_name1, fn_name2);
       break;
     }
 
@@ -16454,9 +16571,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     {
       enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
       const char *name1 = rs6000_builtin_info[uns_fcode].name;
-      const char *name2 = ((icode != CODE_FOR_nothing)
-			   ? get_insn_name ((int)icode)
-			   : "nothing");
+      const char *name2 = (icode != CODE_FOR_nothing)
+			   ? get_insn_name ((int) icode)
+			   : "nothing";
       const char *name3;
 
       switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
@@ -16475,7 +16592,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       fprintf (stderr,
 	       "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
 	       (name1) ? name1 : "---", fcode,
-	       (name2) ? name2 : "---", (int)icode,
+	       (name2) ? name2 : "---", (int) icode,
 	       name3,
 	       func_valid_p ? "" : ", not valid");
     }	     
@@ -17303,6 +17420,19 @@ altivec_init_builtins (void)
   def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
 	       VSX_BUILTIN_ST_ELEMREV_V4SI);
 
+  def_builtin ("__builtin_vsx_le_be_v8hi", v8hi_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V8HI);
+  def_builtin ("__builtin_vsx_le_be_v4si", v4si_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V4SI);
+  def_builtin ("__builtin_vsx_le_be_v2di", v2di_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V2DI);
+  def_builtin ("__builtin_vsx_le_be_v4sf", v4sf_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V4SF);
+  def_builtin ("__builtin_vsx_le_be_v2df", v2df_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V2DF);
+  def_builtin ("__builtin_vsx_le_be_v16qi", v16qi_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V16QI);
+
   if (TARGET_P9_VECTOR)
     {
       def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
@@ -17332,6 +17462,8 @@ altivec_init_builtins (void)
 	       VSX_BUILTIN_VEC_ST);
   def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
 	       VSX_BUILTIN_VEC_XL);
+  def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
+	       VSX_BUILTIN_VEC_XL_BE);
   def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
 	       VSX_BUILTIN_VEC_XST);
 
@@ -24320,6 +24452,21 @@ rs6000_savres_strategy (rs6000_stack_t *info,
   else if (!lr_save_p && info->first_gp_reg_save > 29)
     strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
 
+  /* We can only use save multiple if we need to save all the registers from
+     first_gp_reg_save.  Otherwise, the CFI gets messed up (we save some
+     register we do not restore).  */
+  if (strategy & SAVE_MULTIPLE)
+    {
+      int i;
+
+      for (i = info->first_gp_reg_save; i < 32; i++)
+	if (fixed_reg_p (i) || !save_reg_p (i))
+	  {
+	    strategy &= ~SAVE_MULTIPLE;
+	    break;
+	  }
+    }
+
   /* We can only use load multiple or the out-of-line routines to
      restore gprs if we've saved all the registers from
      first_gp_reg_save.  Otherwise, we risk loading garbage.
@@ -32161,7 +32308,7 @@ rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
 	rtx fnmem, fn_reg, toc_reg;
 
 	if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
-	  error ("You cannot take the address of a nested function if you use "
+	  error ("you cannot take the address of a nested function if you use "
 		 "the -mno-pointers-to-nested-functions option.");
 
 	fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index e94aa07bc7a..1ee84cb4dc5 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -148,6 +148,10 @@ maltivec=be
 Target Report RejectNegative Var(rs6000_altivec_element_order, 2)
 Generate AltiVec instructions using big-endian element order.
 
+mfold-gimple
+Target Report Var(rs6000_fold_gimple) Init(1)
+Enable early gimple folding of builtins.
+
 mhard-dfp
 Target Report Mask(DFP) Var(rs6000_isa_flags)
 Use decimal floating point instructions.
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index de386291a51..cbee89140dd 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -757,24 +757,34 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
 #define CRTOFFLOADEND ""
 #endif
 
-#ifdef HAVE_LD_PIE
-#define	STARTFILE_LINUX_SPEC "\
-%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
-%{mnewlib:ecrti.o%s;:crti.o%s} \
-%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
-" CRTOFFLOADBEGIN
-#else
-#define	STARTFILE_LINUX_SPEC "\
-%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
-%{mnewlib:ecrti.o%s;:crti.o%s} \
-%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
-" CRTOFFLOADBEGIN
-#endif
-
-#define	ENDFILE_LINUX_SPEC "\
-%{shared|pie:crtendS.o%s;:crtend.o%s} \
-%{mnewlib:ecrtn.o%s;:crtn.o%s} \
-" CRTOFFLOADEND
+/* STARTFILE_LINUX_SPEC should be the same as GNU_USER_TARGET_STARTFILE_SPEC
+   but with the mnewlib ecrti.o%s selection substituted for crti.o%s.  */
+#define	STARTFILE_LINUX_SPEC \
+  "%{shared:; \
+     pg|p|profile:gcrt1.o%s; \
+     static:crt1.o%s; \
+     " PIE_SPEC ":Scrt1.o%s; \
+     :crt1.o%s} \
+   %{mnewlib:ecrti.o%s;:crti.o%s} \
+   %{static:crtbeginT.o%s; \
+     shared|" PIE_SPEC ":crtbeginS.o%s; \
+     :crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s} \
+   " CRTOFFLOADBEGIN
+
+/* ENDFILE_LINUX_SPEC should be the same as GNU_USER_TARGET_ENDFILE_SPEC
+   but with the mnewlib ecrtn.o%s selection substituted for crtn.o%s.  */
+#define ENDFILE_LINUX_SPEC \
+  "%{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end_preinit.o%s; \
+     fvtable-verify=std:vtv_end.o%s} \
+   %{static:crtend.o%s; \
+     shared|" PIE_SPEC ":crtendS.o%s; \
+     :crtend.o%s} \
+   %{mnewlib:ecrtn.o%s;:crtn.o%s} \
+   " CRTOFFLOADEND
 
 #define LINK_START_LINUX_SPEC ""
 
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index a3d53e7f439..d6f2fd13fcb 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -74,6 +74,16 @@
 			    (V1TI  "TI")
 			    (TI    "TI")])
 
+;; As above, but in lower case
+(define_mode_attr VEC_base_l [(V16QI "qi")
+			      (V8HI  "hi")
+			      (V4SI  "si")
+			      (V2DI  "di")
+			      (V4SF  "sf")
+			      (V2DF  "df")
+			      (V1TI  "ti")
+			      (TI    "ti")])
+
 ;; Same size integer type for floating point data
 (define_mode_attr VEC_int [(V4SF  "v4si")
 			   (V2DF  "v2di")])
@@ -1016,7 +1026,7 @@
 
 
 ;; Vector initialization, set, extract
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><VEC_base_l>"
   [(match_operand:VEC_E 0 "vlogical_operand" "")
    (match_operand:VEC_E 1 "" "")]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
@@ -1035,7 +1045,7 @@
   DONE;
 })
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><VEC_base_l>"
   [(match_operand:<VEC_base> 0 "register_operand" "")
    (match_operand:VEC_E 1 "vlogical_operand" "")
    (match_operand 2 "const_int_operand" "")]
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 2937499c4a6..510294d97eb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2364,10 +2364,10 @@
 
 ;; Build a V2DF/V2DI vector from two scalars
 (define_insn "vsx_concat_<mode>"
-  [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we")
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
 	(vec_concat:VSX_D
-	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b")
-	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))]
+	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
+	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
   if (which_alternative == 0)
@@ -2385,6 +2385,80 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; Combiner patterns to allow creating XXPERMDI's to access either double
+;; word element in a vector register.
+(define_insn "*vsx_concat_<mode>_1"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+	(vec_concat:VSX_D
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
+	 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  HOST_WIDE_INT dword = INTVAL (operands[2]);
+  if (BYTES_BIG_ENDIAN)
+    {
+      operands[4] = GEN_INT (2*dword);
+      return "xxpermdi %x0,%x1,%x3,%4";
+    }
+  else
+    {
+      operands[4] = GEN_INT (!dword);
+      return "xxpermdi %x0,%x3,%x1,%4";
+    }
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_concat_<mode>_2"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+	(vec_concat:VSX_D
+	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  HOST_WIDE_INT dword = INTVAL (operands[3]);
+  if (BYTES_BIG_ENDIAN)
+    {
+      operands[4] = GEN_INT (dword);
+      return "xxpermdi %x0,%x1,%x2,%4";
+    }
+  else
+    {
+      operands[4] = GEN_INT (2 * !dword);
+      return "xxpermdi %x0,%x2,%x1,%4";
+    }
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_concat_<mode>_3"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+	(vec_concat:VSX_D
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  HOST_WIDE_INT dword1 = INTVAL (operands[2]);
+  HOST_WIDE_INT dword2 = INTVAL (operands[4]);
+  if (BYTES_BIG_ENDIAN)
+    {
+      operands[5] = GEN_INT ((2 * dword1) + dword2);
+      return "xxpermdi %x0,%x1,%x3,%5";
+    }
+  else
+    {
+      operands[5] = GEN_INT ((2 * !dword2) + !dword1);
+      return "xxpermdi %x0,%x3,%x1,%5";
+    }
+}
+  [(set_attr "type" "vecperm")])
+
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -2585,25 +2659,35 @@
   DONE;
 })
 
-;; Set the element of a V2DI/VD2F mode
-(define_insn "vsx_set_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
-	(unspec:VSX_D
-	 [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
-	  (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
-	  (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
-	 UNSPEC_VSX_SET))]
+;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
+(define_expand "vsx_set_<mode>"
+  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
+   (use (match_operand:VSX_D 1 "vsx_register_operand"))
+   (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
+   (use (match_operand:QI 3 "const_0_to_1_operand"))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
-  if (INTVAL (operands[3]) == idx_first)
-    return \"xxpermdi %x0,%x2,%x1,1\";
-  else if (INTVAL (operands[3]) == 1 - idx_first)
-    return \"xxpermdi %x0,%x1,%x2,0\";
+  rtx dest = operands[0];
+  rtx vec_reg = operands[1];
+  rtx value = operands[2];
+  rtx ele = operands[3];
+  rtx tmp = gen_reg_rtx (<VS_scalar>mode);
+
+  if (ele == const0_rtx)
+    {
+      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
+      emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
+      DONE;
+    }
+  else if (ele == const1_rtx)
+    {
+      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
+      emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
+      DONE;
+    }
   else
     gcc_unreachable ();
-}
-  [(set_attr "type" "vecperm")])
+})
 
 ;; Extract a DF/DI element from V2DF/V2DI
 ;; Optimize cases were we can do a simple or direct move.
@@ -4523,7 +4607,7 @@
      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
      src half words 0,1,2,3 for the conversion instruction.  */
   v = gen_rtvec_v (16, rvals);
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
 					  operands[1], mask));
   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
@@ -4552,7 +4636,7 @@
      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
      src half words 4,5,6,7 for the conversion instruction.  */
   v = gen_rtvec_v (16, rvals);
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
 					  operands[1], mask));
   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index 9d512b8959b..daae27b404a 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -29,6 +29,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "cfghooks.h"
 #include "df.h"
 #include "memmodel.h"
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index 9046cb08f94..ddcf370cb23 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -450,12 +450,12 @@ OB_DEF_VAR (s390_vec_extract_u64,       s390_vlgvg,         0,
 OB_DEF_VAR (s390_vec_extract_b64,       s390_vlgvg,         0,                  O2_ELEM,            BT_OV_ULONGLONG_BV2DI_INT)
 OB_DEF_VAR (s390_vec_extract_dbl,       s390_vlgvg_dbl,     0,                  O2_ELEM,            BT_OV_DBL_V2DF_INT)                      /* vlgvg */
 
-B_DEF      (s390_vlgvb,                 vec_extractv16qi,   0,                  B_VX,               O2_ELEM,            BT_FN_UCHAR_UV16QI_INT)
-B_DEF      (s390_vlgvh,                 vec_extractv8hi,    0,                  B_VX,               O2_ELEM,            BT_FN_USHORT_UV8HI_INT)
-B_DEF      (s390_vlgvf,                 vec_extractv4si,    0,                  B_VX,               O2_ELEM,            BT_FN_UINT_UV4SI_INT)
-B_DEF      (s390_vlgvf_flt,             vec_extractv4sf,    0,                  B_INT | B_VXE,      O2_ELEM,            BT_FN_FLT_V4SF_INT)
-B_DEF      (s390_vlgvg,                 vec_extractv2di,    0,                  B_VX,               O2_ELEM,            BT_FN_ULONGLONG_UV2DI_INT)
-B_DEF      (s390_vlgvg_dbl,             vec_extractv2df,    0,                  B_INT | B_VX,       O2_ELEM,            BT_FN_DBL_V2DF_INT)
+B_DEF      (s390_vlgvb,                 vec_extractv16qiqi, 0,                  B_VX,               O2_ELEM,            BT_FN_UCHAR_UV16QI_INT)
+B_DEF      (s390_vlgvh,                 vec_extractv8hihi,  0,                  B_VX,               O2_ELEM,            BT_FN_USHORT_UV8HI_INT)
+B_DEF      (s390_vlgvf,                 vec_extractv4sisi,  0,                  B_VX,               O2_ELEM,            BT_FN_UINT_UV4SI_INT)
+B_DEF      (s390_vlgvf_flt,             vec_extractv4sfsf,  0,                  B_INT | B_VXE,      O2_ELEM,            BT_FN_FLT_V4SF_INT)
+B_DEF      (s390_vlgvg,                 vec_extractv2didi,  0,                  B_VX,               O2_ELEM,            BT_FN_ULONGLONG_UV2DI_INT)
+B_DEF      (s390_vlgvg_dbl,             vec_extractv2dfdf,  0,                  B_INT | B_VX,       O2_ELEM,            BT_FN_DBL_V2DF_INT)
 
 OB_DEF     (s390_vec_insert_and_zero,   s390_vec_insert_and_zero_s8,s390_vec_insert_and_zero_dbl,B_VX,BT_FN_OV4SI_INTCONSTPTR)
 OB_DEF_VAR (s390_vec_insert_and_zero_s8,s390_vllezb,        0,                  0,                  BT_OV_V16QI_SCHARCONSTPTR)
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 77b0e7f5404..deced953d75 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "expmed.h"
 #include "optabs.h"
 #include "regs.h"
@@ -5795,7 +5796,7 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
   add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
 		    REG_BR_PROB,
 		    profile_probability::very_likely ().to_reg_br_prob_note ());
-  emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
+  emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
 
   /* If the string pointer wasn't aligned we have loaded less then 16
      bytes and the remaining bytes got filled with zeros (by vll).
@@ -5853,7 +5854,7 @@ s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
   emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
   emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
   emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
-  emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
+  emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
   /* gpos is the byte index if a zero was found and 16 otherwise.
      So if it is lower than the loaded bytes we have a hit.  */
@@ -5931,7 +5932,7 @@ s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
   force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
 		      1, OPTAB_DIRECT);
 
-  emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
+  emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
 
   emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 2952893834a..3cf79896720 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -90,6 +90,17 @@
 			  (V1DF "DF") (V2DF "DF")
 			  (V1TF "TF") (TF "TF")])
 
+; Like above, but in lower case.
+(define_mode_attr non_vec_l[(V1QI "qi") (V2QI "qi") (V4QI "qi") (V8QI "qi")
+			    (V16QI "qi")
+			    (V1HI "hi") (V2HI "hi") (V4HI "hi") (V8HI "hi")
+			    (V1SI "si") (V2SI "si") (V4SI "si")
+			    (V1DI "di") (V2DI "di")
+			    (V1TI "ti") (TI "ti")
+			    (V1SF "sf") (V2SF "sf") (V4SF "sf")
+			    (V1DF "df") (V2DF "df")
+			    (V1TF "tf") (TF "tf")])
+
 ; The instruction suffix for integer instructions and instructions
 ; which do not care about whether it is floating point or integer.
 (define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b")
@@ -453,7 +464,7 @@
 ; FIXME: Support also vector mode operands for 0
 ; FIXME: This should be (vec_select ..) or something but it does only allow constant selectors :(
 ; This is used via RTL standard name as well as for expanding the builtin
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><non_vec_l>"
   [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "")
 	(unspec:<non_vec> [(match_operand:V  1 "register_operand" "")
 			   (match_operand:SI 2 "nonmemory_operand" "")]
@@ -485,7 +496,7 @@
   "vlgv<bhfgq>\t%0,%v1,%Y3(%2)"
   [(set_attr "op_type" "VRS")])
 
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><non_vec_l>"
   [(match_operand:V_128 0 "register_operand" "")
    (match_operand:V_128 1 "nonmemory_operand" "")]
   "TARGET_VX"
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 6f01dcb700c..c31776ffef8 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "emit-rtl.h"
 #include "recog.h"
diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
index cdabf24e24b..b8ef3409942 100644
--- a/gcc/config/sol2.c
+++ b/gcc/config/sol2.c
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "diagnostic-core.h"
 #include "varasm.h"
 #include "output.h"
diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h
index b8398d082a9..bf5203b8c16 100644
--- a/gcc/config/sol2.h
+++ b/gcc/config/sol2.h
@@ -174,9 +174,9 @@ along with GCC; see the file COPYING3.  If not see
 			    %{!ansi:values-Xa.o%s}"
 
 #if defined(HAVE_LD_PIE) && defined(HAVE_SOLARIS_CRTS)
-#define STARTFILE_CRTBEGIN_SPEC "%{shared:crtbeginS.o%s} \
-				 %{" PIE_SPEC ":crtbeginS.o%s} \
-				 %{" NO_PIE_SPEC ":crtbegin.o%s}"
+#define STARTFILE_CRTBEGIN_SPEC "%{static:crtbegin.o%s; \
+				   shared|" PIE_SPEC ":crtbeginS.o%s; \
+				   :crtbegin.o%s}"
 #else
 #define STARTFILE_CRTBEGIN_SPEC	"crtbegin.o%s"
 #endif
@@ -224,9 +224,9 @@ along with GCC; see the file COPYING3.  If not see
 #endif
 
 #if defined(HAVE_LD_PIE) && defined(HAVE_SOLARIS_CRTS)
-#define ENDFILE_CRTEND_SPEC "%{shared:crtendS.o%s;: \
-			       %{" PIE_SPEC ":crtendS.o%s} \
-			       %{" NO_PIE_SPEC ":crtend.o%s}}"
+#define ENDFILE_CRTEND_SPEC "%{static:crtend.o%s; \
+			       shared|" PIE_SPEC ":crtendS.o%s; \
+			       :crtend.o%s}"
 #else
 #define ENDFILE_CRTEND_SPEC "crtend.o%s"
 #endif
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 59761aac310..d494ecf2410 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "expmed.h"
 #include "optabs.h"
 #include "regs.h"
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 751bacdbcac..925b49e0394 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -8621,6 +8621,8 @@ visl")
 (define_mode_attr vfptype [(V1SI "single") (V2HI "single") (V4QI "single")
 			   (V1DI "double") (V2SI "double") (V4HI "double")
 			   (V8QI "double")])
+(define_mode_attr veltmode [(V1SI "si") (V2HI "hi") (V4QI "qi") (V1DI "di")
+			    (V2SI "si") (V4HI "hi") (V8QI "qi")])
 
 (define_expand "mov<VMALL:mode>"
   [(set (match_operand:VMALL 0 "nonimmediate_operand" "")
@@ -8762,7 +8764,7 @@ visl")
   DONE;
 })
 
-(define_expand "vec_init<VMALL:mode>"
+(define_expand "vec_init<VMALL:mode><VMALL:veltmode>"
   [(match_operand:VMALL 0 "register_operand" "")
    (match_operand:VMALL 1 "" "")]
   "TARGET_VIS"
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index efee614b103..b6d03d7afd4 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -28,6 +28,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "expmed.h"
 #include "optabs.h"
 #include "regs.h"
@@ -1773,7 +1774,7 @@ spu_expand_prologue (void)
 	      size_v4si = scratch_v4si;
 	    }
 	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
-	  emit_insn (gen_vec_extractv4si
+	  emit_insn (gen_vec_extractv4sisi
 		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
 	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
 	}
@@ -5368,7 +5369,7 @@ spu_allocate_stack (rtx op0, rtx op1)
     {
       rtx avail = gen_reg_rtx(SImode);
       rtx result = gen_reg_rtx(SImode);
-      emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
+      emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
       emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
       emit_insn (gen_spu_heq (result, GEN_INT(0) ));
     }
@@ -5684,22 +5685,22 @@ spu_builtin_extract (rtx ops[])
       switch (mode)
 	{
 	case V16QImode:
-	  emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
+	  emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
 	  break;
 	case V8HImode:
-	  emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
+	  emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
 	  break;
 	case V4SFmode:
-	  emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
+	  emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
 	  break;
 	case V4SImode:
-	  emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
+	  emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
 	  break;
 	case V2DImode:
-	  emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
+	  emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
 	  break;
 	case V2DFmode:
-	  emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
+	  emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
 	  break;
 	default:
 	  abort ();
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index 947b044844c..fd6d253378b 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -256,6 +256,13 @@
 			  (V2DI  "DI")
 			  (V4SF  "SF")
 			  (V2DF  "DF")])
+;; Like above, but in lower case
+(define_mode_attr inner_l [(V16QI "qi")
+			   (V8HI  "hi")
+			   (V4SI  "si")
+			   (V2DI  "di")
+			   (V4SF  "sf")
+			   (V2DF  "df")])
 (define_mode_attr vmult  [(V16QI "1")
 			  (V8HI  "2")
 			  (V4SI  "4")
@@ -4318,7 +4325,7 @@ selb\t%0,%4,%0,%3"
 ;; vector patterns
 
 ;; Vector initialization
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><inner_l>"
   [(match_operand:V 0 "register_operand" "")
    (match_operand 1 "" "")]
   ""
@@ -4347,7 +4354,7 @@ selb\t%0,%4,%0,%3"
     operands[6] = GEN_INT (size);
   })
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><inner_l>"
   [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
 	(vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
 			    (parallel [(match_operand 2 "const_int_operand" "i")])))]
diff --git a/gcc/config/stormy16/stormy16.c b/gcc/config/stormy16/stormy16.c
index aee7742de89..1a362524e8d 100644
--- a/gcc/config/stormy16/stormy16.c
+++ b/gcc/config/stormy16/stormy16.c
@@ -25,6 +25,8 @@
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "gimple.h"
 #include "df.h"
 #include "memmodel.h"
diff --git a/gcc/config/tilegx/tilegx.c b/gcc/config/tilegx/tilegx.c
index dafb49daf1f..81559acfce0 100644
--- a/gcc/config/tilegx/tilegx.c
+++ b/gcc/config/tilegx/tilegx.c
@@ -30,6 +30,7 @@
 #include "df.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "expmed.h"
 #include "optabs.h"
 #include "regs.h"
diff --git a/gcc/config/tilepro/tilepro.c b/gcc/config/tilepro/tilepro.c
index 80475b959ee..f03f0670ce9 100644
--- a/gcc/config/tilepro/tilepro.c
+++ b/gcc/config/tilepro/tilepro.c
@@ -30,6 +30,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "expmed.h"
 #include "optabs.h"
 #include "regs.h"
diff --git a/gcc/config/v850/v850.c b/gcc/config/v850/v850.c
index eeb24aa972c..dd73c96435f 100644
--- a/gcc/config/v850/v850.c
+++ b/gcc/config/v850/v850.c
@@ -29,6 +29,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "insn-config.h"
 #include "regs.h"
 #include "emit-rtl.h"
diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c
index 864eaeb8531..fad4849bc5a 100644
--- a/gcc/config/vax/vax.c
+++ b/gcc/config/vax/vax.c
@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "rtl.h"
 #include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "df.h"
 #include "memmodel.h"
 #include "tm_p.h"
diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c
index e5d843e8d4c..2c5b6734ae0 100644
--- a/gcc/config/visium/visium.c
+++ b/gcc/config/visium/visium.c
@@ -30,6 +30,7 @@
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "expmed.h"
 #include "optabs.h"
 #include "regs.h"
diff --git a/gcc/config/vxworksae.h b/gcc/config/vxworksae.h
index 7e65907252a..3b0b6529b8a 100644
--- a/gcc/config/vxworksae.h
+++ b/gcc/config/vxworksae.h
@@ -68,6 +68,10 @@ along with GCC; see the file COPYING3.  If not see
   while (0)
 
 /* Do VxWorks-specific parts of TARGET_OPTION_OVERRIDE.  */
+
+/* None of the VxWorks AE/653/MILS ports to date has native TLS support.  */
+#define VXWORKS_HAVE_TLS 0
+
 #undef VXWORKS_OVERRIDE_OPTIONS
 #define VXWORKS_OVERRIDE_OPTIONS vxworks_override_options ()
 extern void vxworks_override_options (void);
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
index cf9a3a79388..7c36e68b251 100644
--- a/gcc/config/xtensa/xtensa.c
+++ b/gcc/config/xtensa/xtensa.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "memmodel.h"
 #include "tm_p.h"
 #include "stringpool.h"
+#include "attribs.h"
 #include "optabs.h"
 #include "regs.h"
 #include "emit-rtl.h"