Merge branch 'master' into vtv

Conflicts: gcc/ChangeLog gcc/cp/ChangeLog gcc/cp/decl2.c gcc/varasm.c libstdc++-v3/ChangeLog
author: Caroline Tice <cmtice@chromium.org> 2013-06-21 12:05:49 -0700
committer: Caroline Tice <cmtice@chromium.org> 2013-06-21 12:05:49 -0700
commit: 9c1705845241cdea5cf61574218edef282901fbd (patch)
tree: 021da661f8a821bc7eea277521fa13f2043879a0 /gcc/config
parent: 9bdedf4df4166718fa0a44811c643214c6880471 (diff)
parent: 8a46ed36b0449c870d0a938c1010ad610ebcf4f7 (diff)
download: gcc-9c1705845241cdea5cf61574218edef282901fbd.tar.gz
55 files changed, 4047 insertions, 1287 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index bdb6b040578..12f3c3a6fe6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -149,6 +149,8 @@ bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
 bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
 			    enum machine_mode);
+char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
+char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
 bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
 bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
@@ -157,6 +159,8 @@ bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
 bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
 bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
+bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool,
+				   struct simd_immediate_info *);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
 				  enum aarch64_symbol_type *);
@@ -222,6 +226,8 @@ void aarch64_split_128bit_move (rtx, rtx);
 
 bool aarch64_split_128bit_move_p (rtx, rtx);
 
+void aarch64_split_simd_combine (rtx, rtx, rtx);
+
 void aarch64_split_simd_move (rtx, rtx);
 
 /* Check for a legitimate floating point constant for FMOV.  */
@@ -257,6 +263,4 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
 extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 extern bool
 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
-
-char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 04fbdbd5837..02037f3f2cb 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -409,7 +409,7 @@
      case 4: return "ins\t%0.d[0], %1";
      case 5: return "mov\t%0, %1";
      case 6:
-	return aarch64_output_simd_mov_immediate (&operands[1],
+	return aarch64_output_simd_mov_immediate (operands[1],
 						  <MODE>mode, 64);
      default: gcc_unreachable ();
      }
@@ -440,7 +440,7 @@
     case 5:
 	return "#";
     case 6:
-	return aarch64_output_simd_mov_immediate (&operands[1], <MODE>mode, 128);
+	return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
     default:
 	gcc_unreachable ();
     }
@@ -1058,9 +1058,9 @@
 	  (vec_duplicate:<VHALF> (const_int 0))))]
   "TARGET_SIMD"
   "@
-   mov\\t%d0, %d1
-   fmov\t%d0, %1
-   dup\t%d0, %1"
+   dup\\t%d0, %1.d[0]
+   fmov\\t%d0, %1
+   dup\\t%d0, %1"
   [(set_attr "v8type" "*,fmov,*")
    (set_attr "simd_type" "simd_dup,*,simd_dup")
    (set_attr "simd_mode" "<MODE>")
@@ -1190,6 +1190,104 @@
 
 ;; Widening arithmetic.
 
+(define_insn "*aarch64_<su>mlal_lo<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal_hi<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_lo<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))))]
+  "TARGET_SIMD"
+  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_hi<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))))]
+  "TARGET_SIMD"
+  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 1 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 2 "register_operand" "w")))
+          (match_operand:<VWIDE> 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 2 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 3 "register_operand" "w")))))]
+  "TARGET_SIMD"
+  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
@@ -2218,15 +2316,29 @@
    (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_insn "aarch64_combine<mode>"
+(define_insn_and_split "aarch64_combine<mode>"
   [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
         (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
 			   (match_operand:VDC 2 "register_operand" "w")))]
   "TARGET_SIMD"
-  "mov\\t%0.d[0], %1.d[0]\;ins\\t%0.d[1], %2.d[0]"
-  [(set_attr "simd_type" "simd_ins")
-   (set_attr "simd_mode" "<MODE>")]
-)
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "aarch64_simd_combine<mode>"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+        (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+  (match_operand:VDC 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  {
+    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
+    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
+    DONE;
+  })
 
 ;; <su><addsub>l<q>.
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9c77888157d..527b00dbcaa 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -87,6 +87,14 @@ struct aarch64_address_info {
   enum aarch64_symbol_type symbol_type;
 };
 
+struct simd_immediate_info
+{
+  rtx value;
+  int shift;
+  int element_width;
+  bool mvn;
+};
+
 /* The current code model.  */
 enum aarch64_code_model aarch64_cmodel;
 
@@ -103,8 +111,6 @@ static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_override_options_after_change (void);
-static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
-					 int *, unsigned char *, int *, int *);
 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 static unsigned bit_count (unsigned HOST_WIDE_INT);
 static bool aarch64_const_vec_all_same_int_p (rtx,
@@ -532,9 +538,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
       {
 	rtx tmp_reg = dest;
 	if (can_create_pseudo_p ())
-	  {
-	    tmp_reg =  gen_reg_rtx (Pmode);
-	  }
+	  tmp_reg =  gen_reg_rtx (Pmode);
 	emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 	emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
 	return;
@@ -696,6 +700,49 @@ aarch64_split_128bit_move_p (rtx dst, rtx src)
 	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 }
 
+/* Split a complex SIMD combine.  */
+
+void
+aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+{
+  enum machine_mode src_mode = GET_MODE (src1);
+  enum machine_mode dst_mode = GET_MODE (dst);
+
+  gcc_assert (VECTOR_MODE_P (dst_mode));
+
+  if (REG_P (dst) && REG_P (src1) && REG_P (src2))
+    {
+      rtx (*gen) (rtx, rtx, rtx);
+
+      switch (src_mode)
+	{
+	case V8QImode:
+	  gen = gen_aarch64_simd_combinev8qi;
+	  break;
+	case V4HImode:
+	  gen = gen_aarch64_simd_combinev4hi;
+	  break;
+	case V2SImode:
+	  gen = gen_aarch64_simd_combinev2si;
+	  break;
+	case V2SFmode:
+	  gen = gen_aarch64_simd_combinev2sf;
+	  break;
+	case DImode:
+	  gen = gen_aarch64_simd_combinedi;
+	  break;
+	case DFmode:
+	  gen = gen_aarch64_simd_combinedf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_insn (gen (dst, src1, src2));
+      return;
+    }
+}
+
 /* Split a complex SIMD move.  */
 
 void
@@ -5068,6 +5115,10 @@ aarch64_classify_symbol (rtx x,
 	  return SYMBOL_SMALL_ABSOLUTE;
 
 	case AARCH64_CMODEL_TINY_PIC:
+	  if (!aarch64_symbol_binds_local_p (x))
+	    return SYMBOL_SMALL_GOT;
+	  return SYMBOL_TINY_ABSOLUTE;
+
 	case AARCH64_CMODEL_SMALL_PIC:
 	  if (!aarch64_symbol_binds_local_p (x))
 	    return SYMBOL_SMALL_GOT;
@@ -5150,8 +5201,7 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
   /* This could probably go away because
      we now decompose CONST_INTs according to expand_mov_immediate.  */
   if ((GET_CODE (x) == CONST_VECTOR
-       && aarch64_simd_valid_immediate (x, mode, false,
-					NULL, NULL, NULL, NULL, NULL) != -1)
+       && aarch64_simd_valid_immediate (x, mode, false, NULL))
       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
 	return !targetm.cannot_force_const_mem (mode, x);
 
@@ -5982,32 +6032,57 @@ aarch64_vector_mode_supported_p (enum machine_mode mode)
   return false;
 }
 
-/* Return quad mode as the preferred SIMD mode.  */
+/* Return appropriate SIMD container
+   for MODE within a vector of WIDTH bits.  */
 static enum machine_mode
-aarch64_preferred_simd_mode (enum machine_mode mode)
+aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
 {
+  gcc_assert (width == 64 || width == 128);
   if (TARGET_SIMD)
-    switch (mode)
-      {
-      case DFmode:
-        return V2DFmode;
-      case SFmode:
-        return V4SFmode;
-      case SImode:
-        return V4SImode;
-      case HImode:
-        return V8HImode;
-      case QImode:
-        return V16QImode;
-      case DImode:
-          return V2DImode;
-        break;
-
-      default:;
-      }
+    {
+      if (width == 128)
+	switch (mode)
+	  {
+	  case DFmode:
+	    return V2DFmode;
+	  case SFmode:
+	    return V4SFmode;
+	  case SImode:
+	    return V4SImode;
+	  case HImode:
+	    return V8HImode;
+	  case QImode:
+	    return V16QImode;
+	  case DImode:
+	    return V2DImode;
+	  default:
+	    break;
+	  }
+      else
+	switch (mode)
+	  {
+	  case SFmode:
+	    return V2SFmode;
+	  case SImode:
+	    return V2SImode;
+	  case HImode:
+	    return V4HImode;
+	  case QImode:
+	    return V8QImode;
+	  default:
+	    break;
+	  }
+    }
   return word_mode;
 }
 
+/* Return 128-bit container as the preferred SIMD mode for MODE.  */
+static enum machine_mode
+aarch64_preferred_simd_mode (enum machine_mode mode)
+{
+  return aarch64_simd_container_mode (mode, 128);
+}
+
 /* Return the bitmask of possible vector sizes for the vectorizer
    to iterate over.  */
 static unsigned int
@@ -6095,7 +6170,7 @@ aarch64_mangle_type (const_tree type)
 }
 
 /* Return the equivalent letter for size.  */
-static unsigned char
+static char
 sizetochar (int size)
 {
   switch (size)
@@ -6142,15 +6217,10 @@ aarch64_vect_float_const_representable_p (rtx x)
   return aarch64_float_const_representable_p (x0);
 }
 
-/* TODO: This function returns values similar to those
-   returned by neon_valid_immediate in gcc/config/arm/arm.c
-   but the API here is different enough that these magic numbers
-   are not used.  It should be sufficient to return true or false.  */
-static int
-aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
-			      rtx *modconst, int *elementwidth,
-			      unsigned char *elementchar,
-			      int *mvn, int *shift)
+/* Return true for valid and false for invalid.  */
+bool
+aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
+			      struct simd_immediate_info *info)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -6161,7 +6231,6 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
     {							\
       immtype = (CLASS);				\
       elsize = (ELSIZE);				\
-      elchar = sizetochar (elsize);			\
       eshift = (SHIFT);					\
       emvn = (NEG);					\
       break;						\
@@ -6170,36 +6239,25 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
   unsigned char bytes[16];
-  unsigned char elchar = 0;
   int immtype = -1, matches;
   unsigned int invmask = inverse ? 0xff : 0;
   int eshift, emvn;
 
   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
     {
-      bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
-      int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
-
-      if (!(simd_imm_zero
-	    || aarch64_vect_float_const_representable_p (op)))
-	return -1;
-
-	if (modconst)
-	  *modconst = CONST_VECTOR_ELT (op, 0);
-
-	if (elementwidth)
-	  *elementwidth = elem_width;
-
-	if (elementchar)
-	  *elementchar = sizetochar (elem_width);
+      if (! (aarch64_simd_imm_zero_p (op, mode)
+	     || aarch64_vect_float_const_representable_p (op)))
+	return false;
 
-	if (shift)
-	  *shift = 0;
+      if (info)
+	{
+	  info->value = CONST_VECTOR_ELT (op, 0);
+	  info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
+	  info->mvn = false;
+	  info->shift = 0;
+	}
 
-	if (simd_imm_zero)
-	  return 19;
-	else
-	  return 18;
+      return true;
     }
 
   /* Splat vector constant out into a byte vector.  */
@@ -6297,23 +6355,14 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
   if (immtype == -1
       || (immtype >= 12 && immtype <= 15)
       || immtype == 18)
-    return -1;
-
-
-  if (elementwidth)
-    *elementwidth = elsize;
-
-  if (elementchar)
-    *elementchar = elchar;
-
-  if (mvn)
-    *mvn = emvn;
-
-  if (shift)
-    *shift = eshift;
+    return false;
 
-  if (modconst)
+  if (info)
     {
+      info->element_width = elsize;
+      info->mvn = emvn != 0;
+      info->shift = eshift;
+
       unsigned HOST_WIDE_INT imm = 0;
 
       /* Un-invert bytes of recognized vector, if necessary.  */
@@ -6330,68 +6379,27 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
 	      << (i * BITS_PER_UNIT);
 
-          *modconst = GEN_INT (imm);
-        }
-      else
-        {
-          unsigned HOST_WIDE_INT imm = 0;
 
-          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
-            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+	  info->value = GEN_INT (imm);
+	}
+      else
+	{
+	  for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+	    imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
 
 	  /* Construct 'abcdefgh' because the assembler cannot handle
-	     generic constants.  */
-	  gcc_assert (shift != NULL && mvn != NULL);
-	  if (*mvn)
+	     generic constants.	 */
+	  if (info->mvn)
 	    imm = ~imm;
-	  imm = (imm >> *shift) & 0xff;
-          *modconst = GEN_INT (imm);
-        }
+	  imm = (imm >> info->shift) & 0xff;
+	  info->value = GEN_INT (imm);
+	}
     }
 
-  return immtype;
+  return true;
 #undef CHECK
 }
 
-/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
-   (or, implicitly, MVNI) immediate.  Write back width per element
-   to *ELEMENTWIDTH, and a modified constant (whatever should be output
-   for a MOVI instruction) in *MODCONST.  */
-int
-aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
-				       rtx *modconst, int *elementwidth,
-				       unsigned char *elementchar,
-				       int *mvn, int *shift)
-{
-  rtx tmpconst;
-  int tmpwidth;
-  unsigned char tmpwidthc;
-  int tmpmvn = 0, tmpshift = 0;
-  int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
-					     &tmpwidth, &tmpwidthc,
-					     &tmpmvn, &tmpshift);
-
-  if (retval == -1)
-    return 0;
-
-  if (modconst)
-    *modconst = tmpconst;
-
-  if (elementwidth)
-    *elementwidth = tmpwidth;
-
-  if (elementchar)
-    *elementchar = tmpwidthc;
-
-  if (mvn)
-    *mvn = tmpmvn;
-
-  if (shift)
-    *shift = tmpshift;
-
-  return 1;
-}
-
 static bool
 aarch64_const_vec_all_same_int_p (rtx x,
 				  HOST_WIDE_INT minval,
@@ -6496,9 +6504,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
   gcc_assert (!VECTOR_MODE_P (mode));
   vmode = aarch64_preferred_simd_mode (mode);
   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
-  int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
-						      NULL, NULL, NULL, NULL);
-  return retval;
+  return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
 }
 
 /* Construct and return a PARALLEL RTX vector.  */
@@ -6726,8 +6732,7 @@ aarch64_simd_make_constant (rtx vals)
     gcc_unreachable ();
 
   if (const_vec != NULL_RTX
-      && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
-						NULL, NULL, NULL))
+      && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
     /* Load using MOVI/MVNI.  */
     return const_vec;
   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
@@ -7285,49 +7290,78 @@ aarch64_float_const_representable_p (rtx x)
 }
 
 char*
-aarch64_output_simd_mov_immediate (rtx *const_vector,
+aarch64_output_simd_mov_immediate (rtx const_vector,
 				   enum machine_mode mode,
 				   unsigned width)
 {
-  int is_valid;
-  unsigned char widthc;
-  int lane_width_bits;
+  bool is_valid;
   static char templ[40];
-  int shift = 0, mvn = 0;
   const char *mnemonic;
   unsigned int lane_count = 0;
+  char element_char;
 
-  is_valid =
-    aarch64_simd_immediate_valid_for_move (*const_vector, mode,
-					   const_vector, &lane_width_bits,
-					   &widthc, &mvn, &shift);
+  struct simd_immediate_info info;
+
+  /* This will return true to show const_vector is legal for use as either
+     a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
+     also update INFO to show how the immediate should be generated.  */
+  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
   gcc_assert (is_valid);
 
+  element_char = sizetochar (info.element_width);
+  lane_count = width / info.element_width;
+
   mode = GET_MODE_INNER (mode);
   if (mode == SFmode || mode == DFmode)
     {
-      bool zero_p =
-	aarch64_float_const_zero_rtx_p (*const_vector);
-      gcc_assert (shift == 0);
-      mnemonic = zero_p ? "movi" : "fmov";
+      gcc_assert (info.shift == 0 && ! info.mvn);
+      if (aarch64_float_const_zero_rtx_p (info.value))
+        info.value = GEN_INT (0);
+      else
+	{
+#define buf_size 20
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
+	  char float_buf[buf_size] = {'\0'};
+	  real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
+#undef buf_size
+
+	  if (lane_count == 1)
+	    snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
+	  else
+	    snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
+		      lane_count, element_char, float_buf);
+	  return templ;
+	}
     }
-  else
-    mnemonic = mvn ? "mvni" : "movi";
 
-  gcc_assert (lane_width_bits != 0);
-  lane_count = width / lane_width_bits;
+  mnemonic = info.mvn ? "mvni" : "movi";
 
   if (lane_count == 1)
-    snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
-  else if (shift)
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
-	      mnemonic, lane_count, widthc, shift);
+    snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+	      mnemonic, UINTVAL (info.value));
+  else if (info.shift)
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
+	      ", lsl %d", mnemonic, lane_count, element_char,
+	      UINTVAL (info.value), info.shift);
   else
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
-	      mnemonic, lane_count, widthc);
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
+	      mnemonic, lane_count, element_char, UINTVAL (info.value));
   return templ;
 }
 
+char*
+aarch64_output_scalar_simd_mov_immediate (rtx immediate,
+					  enum machine_mode mode)
+{
+  enum machine_mode vmode;
+
+  gcc_assert (!VECTOR_MODE_P (mode));
+  vmode = aarch64_simd_container_mode (mode, 64);
+  rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
+  return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+}
+
 /* Split operands into moves from op[1] + op[2] into op[0].  */
 
 void
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 89db09254eb..e88e5be894e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -774,17 +774,34 @@
         (match_operand:SHORT 1 "general_operand"      " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
   "(register_operand (operands[0], <MODE>mode)
     || aarch64_reg_or_zero (operands[1], <MODE>mode))"
-  "@
-   mov\\t%w0, %w1
-   mov\\t%w0, %1
-   movi\\t%0.<Vallxd>, %1
-   ldr<size>\\t%w0, %1
-   ldr\\t%<size>0, %1
-   str<size>\\t%w1, %0
-   str\\t%<size>1, %0
-   umov\\t%w0, %1.<v>[0]
-   dup\\t%0.<Vallxd>, %w1
-   dup\\t%0, %1.<v>[0]"
+{
+   switch (which_alternative)
+     {
+     case 0:
+       return "mov\t%w0, %w1";
+     case 1:
+       return "mov\t%w0, %1";
+     case 2:
+       return aarch64_output_scalar_simd_mov_immediate (operands[1],
+							<MODE>mode);
+     case 3:
+       return "ldr<size>\t%w0, %1";
+     case 4:
+       return "ldr\t%<size>0, %1";
+     case 5:
+       return "str<size>\t%w1, %0";
+     case 6:
+       return "str\t%<size>1, %0";
+     case 7:
+       return "umov\t%w0, %1.<v>[0]";
+     case 8:
+       return "dup\t%0.<Vallxd>, %w1";
+     case 9:
+       return "dup\t%0, %1.<v>[0]";
+     default:
+       gcc_unreachable ();
+     }
+}
   [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
    (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
    (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
@@ -850,7 +867,8 @@
    movi\\t%d0, %1"
   [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
    (set_attr "mode" "DI")
-   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,yes")]
+   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
 
 (define_insn "insv_imm<mode>"
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index d9c18e692ea..7cafc08fdd9 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -143,9 +143,8 @@
   "@internal
  A constraint that matches vector of immediates."
  (and (match_code "const_vector")
-      (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op),
-							  NULL, NULL, NULL,
-							  NULL, NULL) != 0")))
+      (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
+						 false, NULL)")))
 
 (define_constraint "Dh"
   "@internal
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 860d4d9a187..8e40c5de5d4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -385,7 +385,8 @@
 ;; Double modes of vector modes (lower case).
 (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
 			(V2SI "v4si")  (V2SF "v4sf")
-			(SI   "v2si")  (DI   "v2di")])
+			(SI   "v2si")  (DI   "v2di")
+			(DF   "v2df")])
 
 ;; Narrowed modes for VDN.
 (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 096ef3b1f56..5f5b33e347b 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -2700,12 +2700,12 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
       break;
 
     case GE:  case GT:  case GEU:  case GTU:
-      /* These must be swapped.  */
-      if (op1 != CONST0_RTX (cmp_mode))
-	{
-	  code = swap_condition (code);
-	  tem = op0, op0 = op1, op1 = tem;
-	}
+      /* These normally need swapping, but for integer zero we have
+	 special patterns that recognize swapped operands.  */
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      code = swap_condition (code);
+      tem = op0, op0 = op1, op1 = tem;
       break;
 
     default:
@@ -3068,7 +3068,8 @@ alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
   out = gen_reg_rtx (DImode);
 
   /* What's actually returned is -1,0,1, not a proper boolean value.  */
-  note = gen_rtx_UNSPEC (DImode, gen_rtvec (2, op0, op1), UNSPEC_XFLT_COMPARE);
+  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
+  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
   alpha_emit_xfloating_libcall (func, out, operands, 2, note);
 
   return out;
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index 10da396ab66..12bbbaf9083 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -19,12 +19,13 @@
 ;; This file contains ARM instructions that support fixed-point operations.
 
 (define_insn "add<mode>3"
-  [(set (match_operand:FIXED 0 "s_register_operand" "=r")
-	(plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
-		    (match_operand:FIXED 2 "s_register_operand" "r")))]
+  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+	(plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+		    (match_operand:FIXED 2 "s_register_operand" "l,r")))]
   "TARGET_32BIT"
   "add%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")])
 
 (define_insn "add<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -32,7 +33,8 @@
 		     (match_operand:ADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "sadd<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "usadd<mode>3"
   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -40,7 +42,8 @@
 			  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "uqadd<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "ssadd<mode>3"
   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -48,15 +51,17 @@
 			 (match_operand:QADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "qadd<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "sub<mode>3"
-  [(set (match_operand:FIXED 0 "s_register_operand" "=r")
-	(minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
-		     (match_operand:FIXED 2 "s_register_operand" "r")))]
+  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+	(minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+		     (match_operand:FIXED 2 "s_register_operand" "l,r")))]
   "TARGET_32BIT"
   "sub%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")])
 
 (define_insn "sub<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -64,7 +69,8 @@
 		      (match_operand:ADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "ssub<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "ussub<mode>3"
   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -73,7 +79,8 @@
 	  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "uqsub<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "sssub<mode>3"
   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -81,7 +88,8 @@
 			  (match_operand:QADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "qsub<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 ;; Fractional multiplies.
 
@@ -374,6 +382,7 @@
   "TARGET_32BIT && arm_arch6"
   "ssat%?\\t%0, #16, %2%S1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "insn" "sat")
    (set_attr "shift" "1")
    (set_attr "type" "alu_shift")])
@@ -384,4 +393,5 @@
   "TARGET_INT_SIMD"
   "usat%?\\t%0, #16, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "insn" "sat")])
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
index 2bc9702bee2..e615437b125 100644
--- a/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -146,12 +146,15 @@ let can_thumb addrmode update is_store =
   | IA, true, true -> true
   | _ -> false
 
+exception InvalidAddrMode of string;;
+
 let target addrmode thumb =
   match addrmode, thumb with
     IA, true -> "TARGET_THUMB1"
   | IA, false -> "TARGET_32BIT"
   | DB, false -> "TARGET_32BIT"
   | _, false -> "TARGET_ARM"
+  | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.")
 
 let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
   let astr = string_of_addrmode addrmode in
@@ -181,8 +184,10 @@ let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
   done;
   Printf.printf "}\"\n";
   Printf.printf "  [(set_attr \"type\" \"%s%d\")" ls nregs;
-  begin if not thumb then
+  if not thumb then begin
     Printf.printf "\n   (set_attr \"predicable\" \"yes\")";
+    if addrmode == IA || addrmode == DB then
+      Printf.printf "\n   (set_attr \"predicable_short_it\" \"no\")";
   end;
   Printf.printf "])\n\n"
 
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 55a512349cc..6fc307e7709 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -662,6 +662,10 @@ static const struct attribute_spec arm_attribute_table[] =
 #undef TARGET_ASAN_SHADOW_OFFSET
 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 
+#undef MAX_INSN_PER_IT_BLOCK
+#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Obstack for minipool constant handling.  */
@@ -1871,6 +1875,11 @@ arm_option_override (void)
   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+  if (arm_restrict_it == 2)
+    arm_restrict_it = arm_arch8 && TARGET_THUMB2;
+
+  if (!TARGET_THUMB2)
+    arm_restrict_it = 0;
 
   /* If we are not using the default (ARM mode) section anchor offset
      ranges, then set the correct ranges now.  */
@@ -2677,6 +2686,8 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
   switch (code)
     {
     case AND:
+    case IOR:
+    case XOR:
       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
     case PLUS:
@@ -16168,25 +16179,34 @@ arm_compute_save_reg0_reg12_mask (void)
   return save_reg_mask;
 }
 
+/* Return true if r3 is live at the start of the function.  */
+
+static bool
+arm_r3_live_at_start_p (void)
+{
+  /* Just look at cfg info, which is still close enough to correct at this
+     point.  This gives false positives for broken functions that might use
+     uninitialized data that happens to be allocated in r3, but who cares?  */
+  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3);
+}
 
 /* Compute the number of bytes used to store the static chain register on the
-   stack, above the stack frame. We need to know this accurately to get the
-   alignment of the rest of the stack frame correct. */
+   stack, above the stack frame.  We need to know this accurately to get the
+   alignment of the rest of the stack frame correct.  */
 
-static int arm_compute_static_chain_stack_bytes (void)
+static int
+arm_compute_static_chain_stack_bytes (void)
 {
-  unsigned long func_type = arm_current_func_type ();
-  int static_chain_stack_bytes = 0;
-
-  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
-      IS_NESTED (func_type) &&
-      df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
-    static_chain_stack_bytes = 4;
+  /* See the defining assertion in arm_expand_prologue.  */
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
+      && IS_NESTED (arm_current_func_type ())
+      && arm_r3_live_at_start_p ()
+      && crtl->args.pretend_args_size == 0)
+    return 4;
 
-  return static_chain_stack_bytes;
+  return 0;
 }
 
-
 /* Compute a bit mask of which registers need to be
    saved on the stack for the current function.
    This is used by arm_get_frame_offsets, which may add extra registers.  */
@@ -18141,16 +18161,16 @@ arm_expand_prologue (void)
 	}
       else if (IS_NESTED (func_type))
 	{
-	  /* The Static chain register is the same as the IP register
+	  /* The static chain register is the same as the IP register
 	     used as a scratch register during stack frame creation.
 	     To get around this need to find somewhere to store IP
 	     whilst the frame is being created.  We try the following
 	     places in order:
 
-	       1. The last argument register.
+	       1. The last argument register r3.
 	       2. A slot on the stack above the frame.  (This only
 	          works if the function is not a varargs function).
-	       3. Register r3, after pushing the argument registers
+	       3. Register r3 again, after pushing the argument registers
 	          onto the stack.
 
 	     Note - we only need to tell the dwarf2 backend about the SP
@@ -18158,7 +18178,7 @@ arm_expand_prologue (void)
 	     doesn't need to be unwound, as it doesn't contain a value
 	     inherited from the caller.  */
 
-	  if (df_regs_ever_live_p (3) == false)
+	  if (!arm_r3_live_at_start_p ())
 	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
 	  else if (args_to_push == 0)
 	    {
@@ -18299,8 +18319,7 @@ arm_expand_prologue (void)
 	  if (IS_NESTED (func_type))
 	    {
 	      /* Recover the static chain register.  */
-	      if (!df_regs_ever_live_p (3)
-		  || saved_pretend_args)
+	      if (!arm_r3_live_at_start_p () || saved_pretend_args)
 		insn = gen_rtx_REG (SImode, 3);
 	      else /* if (crtl->args.pretend_args_size == 0) */
 		{
@@ -19592,7 +19611,7 @@ thumb2_final_prescan_insn (rtx insn)
 	break;
       /* Allow up to 4 conditionally executed instructions in a block.  */
       n = get_attr_ce_count (insn);
-      if (arm_condexec_masklen + n > 4)
+      if (arm_condexec_masklen + n > MAX_INSN_PER_IT_BLOCK)
 	break;
 
       predicate = COND_EXEC_TEST (body);
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b7db3616cdf..3f0e021f3ed 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -93,6 +93,15 @@
 ; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
 (define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
 
+; We use this attribute to disable alternatives that can produce 32-bit
+; instructions inside an IT-block in Thumb2 state.  ARMv8 deprecates IT blocks
+; that contain 32-bit instructions.
+(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes"))
+
+; This attribute is used to disable a predicated alternative when we have
+; arm_restrict_it.
+(define_attr "predicable_short_it" "no,yes" (const_string "yes"))
+
 ;; Operand number of an input operand that is shifted.  Zero if the
 ;; given instruction does not shift one of its input operands.
 (define_attr "shift" "" (const_int 0))
@@ -103,6 +112,8 @@
 (define_attr "fpu" "none,vfp"
   (const (symbol_ref "arm_fpu_attr")))
 
+(define_attr "predicated" "yes,no" (const_string "no"))
+
 ; LENGTH of an instruction (in bytes)
 (define_attr "length" ""
   (const_int 4))
@@ -190,6 +201,15 @@
    (cond [(eq_attr "insn_enabled" "no")
 	  (const_string "no")
 
+	  (and (eq_attr "predicable_short_it" "no")
+	       (and (eq_attr "predicated" "yes")
+	            (match_test "arm_restrict_it")))
+	  (const_string "no")
+
+	  (and (eq_attr "enabled_for_depr_it" "no")
+	       (match_test "arm_restrict_it"))
+	  (const_string "no")
+
 	  (eq_attr "arch_enabled" "no")
 	  (const_string "no")
 
@@ -2163,29 +2183,28 @@
 )
 
 (define_insn_and_split "*anddi3_insn"
-  [(set (match_operand:DI         0 "s_register_operand"     "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w")
-	(and:DI (match_operand:DI 1 "s_register_operand"     "%0 ,r ,0,r ,w,0 ,0  ,r  ,w ,0")
-		(match_operand:DI 2 "arm_anddi_operand_neon" "r  ,r ,De,De,w,DL,r  ,r  ,w ,DL")))]
+  [(set (match_operand:DI         0 "s_register_operand"     "=w,w ,&r,&r,&r,&r,?w,?w")
+        (and:DI (match_operand:DI 1 "s_register_operand"     "%w,0 ,0 ,r ,0 ,r ,w ,0")
+                (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))]
   "TARGET_32BIT && !TARGET_IWMMXT"
 {
   switch (which_alternative)
     {
-    case 0:
-    case 1:
+    case 0: /* fall through */
+    case 6: return "vand\t%P0, %P1, %P2";
+    case 1: /* fall through */
+    case 7: return neon_output_logic_immediate ("vand", &operands[2],
+                    DImode, 1, VALID_NEON_QREG_MODE (DImode));
     case 2:
-    case 3: /* fall through */
-      return "#";
-    case 4: /* fall through */
-    case 8: return "vand\t%P0, %P1, %P2";
+    case 3:
+    case 4:
     case 5: /* fall through */
-    case 9: return neon_output_logic_immediate ("vand", &operands[2],
-                    DImode, 1, VALID_NEON_QREG_MODE (DImode));
-    case 6: return "#";
-    case 7: return "#";
+      return "#";
     default: gcc_unreachable ();
     }
 }
-  "TARGET_32BIT && !TARGET_IWMMXT"
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
   [(set (match_dup 3) (match_dup 4))
    (set (match_dup 5) (match_dup 6))]
   "
@@ -2201,19 +2220,11 @@
                                            gen_highpart_mode (SImode, DImode, operands[2]));
 
   }"
-  [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
-   (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*,
+  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,
                      avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "length" "8,8,8,8,*,*,8,8,*,*")
-   (set (attr "insn_enabled") (if_then_else
-                                (lt (symbol_ref "which_alternative")
-                                    (const_int 4))
-                                (if_then_else (match_test "!TARGET_NEON")
-                                              (const_string "yes")
-                                              (const_string "no"))
-                                (if_then_else (match_test "TARGET_NEON")
-                                              (const_string "yes")
-                                              (const_string "no"))))]
+   (set_attr "length" "*,*,8,8,8,8,*,*")
+  ]
 )
 
 (define_insn_and_split "*anddi_zesidi_di"
@@ -2997,14 +3008,47 @@
   ""
 )
 
-(define_insn "*iordi3_insn"
-  [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
-	(ior:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
-		(match_operand:DI 2 "s_register_operand"   "r,r")))]
-  "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
-  "#"
-  [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+(define_insn_and_split "*iordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand"     "=w,w ,&r,&r,&r,&r,?w,?w")
+	(ior:DI (match_operand:DI 1 "s_register_operand"     "%w,0 ,0 ,r ,0 ,r ,w ,0")
+		(match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))]
+  "TARGET_32BIT && !TARGET_IWMMXT"
+  {
+  switch (which_alternative)
+    {
+    case 0: /* fall through */
+    case 6: return "vorr\t%P0, %P1, %P2";
+    case 1: /* fall through */
+    case 7: return neon_output_logic_immediate ("vorr", &operands[2],
+		     DImode, 0, VALID_NEON_QREG_MODE (DImode));
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return "#";
+    default: gcc_unreachable ();
+    }
+  }
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+  {
+    operands[3] = gen_lowpart (SImode, operands[0]);
+    operands[5] = gen_highpart (SImode, operands[0]);
+
+    operands[4] = simplify_gen_binary (IOR, SImode,
+                                           gen_lowpart (SImode, operands[1]),
+                                           gen_lowpart (SImode, operands[2]));
+    operands[6] = simplify_gen_binary (IOR, SImode,
+                                           gen_highpart (SImode, operands[1]),
+                                           gen_highpart_mode (SImode, DImode, operands[2]));
+
+  }"
+  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+   (set_attr "length" "*,*,8,8,8,8,*,*")
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
 )
 
 (define_insn "*iordi_zesidi_di"
@@ -3137,19 +3181,49 @@
 (define_expand "xordi3"
   [(set (match_operand:DI         0 "s_register_operand" "")
 	(xor:DI (match_operand:DI 1 "s_register_operand" "")
-		(match_operand:DI 2 "s_register_operand" "")))]
+		(match_operand:DI 2 "arm_xordi_operand" "")))]
   "TARGET_32BIT"
   ""
 )
 
-(define_insn "*xordi3_insn"
-  [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
-	(xor:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
-		(match_operand:DI 2 "s_register_operand"   "r,r")))]
-  "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
-  "#"
-  [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+(define_insn_and_split "*xordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand" "=w,&r,&r,&r,&r,?w")
+	(xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w")
+		(match_operand:DI 2 "arm_xordi_operand"  "w ,r ,r ,Dg,Dg,w")))]
+  "TARGET_32BIT && !TARGET_IWMMXT"
+{
+  switch (which_alternative)
+    {
+    case 1:
+    case 2:
+    case 3:
+    case 4:  /* fall through */
+      return "#";
+    case 0: /* fall through */
+    case 5: return "veor\t%P0, %P1, %P2";
+    default: gcc_unreachable ();
+    }
+}
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+  {
+    operands[3] = gen_lowpart (SImode, operands[0]);
+    operands[5] = gen_highpart (SImode, operands[0]);
+
+    operands[4] = simplify_gen_binary (XOR, SImode,
+                                           gen_lowpart (SImode, operands[1]),
+                                           gen_lowpart (SImode, operands[2]));
+    operands[6] = simplify_gen_binary (XOR, SImode,
+                                           gen_highpart (SImode, operands[1]),
+                                           gen_highpart_mode (SImode, DImode, operands[2]));
+
+  }"
+  [(set_attr "length" "*,8,8,8,8,*")
+   (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1")
+   (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")]
 )
 
 (define_insn "*xordi_zesidi_di"
@@ -12097,6 +12171,7 @@
      (const_int 0)])]
   "TARGET_32BIT"
   ""
+[(set_attr "predicated" "yes")]
 )
 
 (define_insn "force_register_use"
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index afb42421c06..b9ae2b09682 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -239,6 +239,10 @@ mword-relocations
 Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
 Only generate absolute relocations on word sized values.
 
+mrestrict-it
+Target Report Var(arm_restrict_it) Init(2)
+Generate IT blocks appropriate for ARMv8.
+
 mfix-cortex-m3-ldrd
 Target Report Var(fix_cm3_ldrd) Init(2)
 Avoid overlapping destination and address registers on LDRD instructions
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 7e7b3e69e0a..7cd8e31c97f 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -260,6 +260,18 @@
  (and (match_code "const_int")
       (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
 
+(define_constraint "Df"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn iordi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)")))
+
+(define_constraint "Dg"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn xordi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 8ebdfc81761..ad137d492e4 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -37,7 +37,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -74,7 +75,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm4_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -108,7 +110,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(ia%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm4_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -125,7 +128,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_stm4_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -302,7 +306,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(db%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*ldm4_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -323,7 +328,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "ldm%(db%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm4_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -338,7 +344,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(db%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm4_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -355,7 +362,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stm%(db%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -477,7 +485,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(ia%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm3_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -508,7 +517,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm3_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -537,7 +547,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(ia%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm3_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -552,7 +563,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_stm3_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -704,7 +716,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(db%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*ldm3_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -722,7 +735,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(db%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm3_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -735,7 +749,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(db%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm3_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -750,7 +765,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(db%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -855,7 +871,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "ldm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm2_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -880,7 +897,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm2_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -904,7 +922,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm2_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -917,7 +936,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_stm2_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -1044,7 +1064,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "ldm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*ldm2_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -1059,7 +1080,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(db%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm2_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -1070,7 +1092,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm2_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -1083,7 +1106,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(db%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index f91a6f7d08b..e814df0d264 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -679,29 +679,6 @@
   [(set_attr "neon_type" "neon_int_1")]
 )
 
-(define_insn "iordi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
-        (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
-		(match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))]
-  "TARGET_NEON"
-{
-  switch (which_alternative)
-    {
-    case 0: /* fall through */
-    case 4: return "vorr\t%P0, %P1, %P2";
-    case 1: /* fall through */
-    case 5: return neon_output_logic_immediate ("vorr", &operands[2],
-		     DImode, 0, VALID_NEON_QREG_MODE (DImode));
-    case 2: return "#";
-    case 3: return "#";
-    default: gcc_unreachable ();
-    }
-}
-  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
-   (set_attr "length" "*,*,8,8,*,*")
-   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
-)
-
 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
 ;; vorr. We support the pseudo-instruction vand instead, because that
 ;; corresponds to the canonical form the middle-end expects to use for
@@ -805,21 +782,6 @@
   [(set_attr "neon_type" "neon_int_1")]
 )
 
-(define_insn "xordi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w")
-	        (match_operand:DI 2 "s_register_operand" "w,r,r,w")))]
-  "TARGET_NEON"
-  "@
-   veor\t%P0, %P1, %P2
-   #
-   #
-   veor\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
-)
-
 (define_insn "one_cmpl<mode>2"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
@@ -5617,7 +5579,7 @@
    (match_operand:SI 3 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  emit_insn (gen_ior<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+  emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -5628,7 +5590,7 @@
    (match_operand:SI 3 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  emit_insn (gen_xor<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+  emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
   DONE;
 })
 
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92de9fe8bd9..d169cb27035 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -42,6 +42,17 @@
   (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
        (match_operand 0 "s_register_operand")))
 
+(define_predicate "imm_for_neon_logic_operand"
+  (match_code "const_vector")
+{
+  return (TARGET_NEON
+          && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
+})
+
+(define_predicate "neon_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_logic_operand")
+       (match_operand 0 "s_register_operand")))
+
 ;; Any hard register.
 (define_predicate "arm_hard_register_operand"
   (match_code "reg")
@@ -162,6 +173,17 @@
 	    (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
        (match_operand 0 "neon_inv_logic_op2")))
 
+(define_predicate "arm_iordi_operand_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)"))
+       (match_operand 0 "neon_logic_op2")))
+
+(define_predicate "arm_xordi_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)"))))
+
 (define_predicate "arm_adddi_operand"
   (ior (match_operand 0 "s_register_operand")
        (and (match_code "const_int")
@@ -535,17 +557,6 @@
   (ior (match_operand 0 "s_register_operand")
        (match_operand 0 "imm_for_neon_rshift_operand")))
 
-(define_predicate "imm_for_neon_logic_operand"
-  (match_code "const_vector")
-{
-  return (TARGET_NEON
-          && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
-})
-
-(define_predicate "neon_logic_op2"
-  (ior (match_operand 0 "imm_for_neon_logic_operand")
-       (match_operand 0 "s_register_operand")))
-
 ;; Predicates for named expanders that overlap multiple ISAs.
 
 (define_predicate "cmpdi_operand"
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 980234836c9..8f7bd71c317 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -124,7 +124,8 @@
 		   UNSPEC_LL))]
   "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN"
   "ldrexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "s_register_operand" "")		;; bool out
@@ -361,7 +362,8 @@
 	    VUNSPEC_LL)))]
   "TARGET_HAVE_LDREXBH"
   "ldrex<sync_sfx>%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_acquire_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -371,7 +373,8 @@
 	    VUNSPEC_LAX)))]
   "TARGET_HAVE_LDACQ"
   "ldaex<sync_sfx>%?\\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_exclusivesi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -380,7 +383,8 @@
 	  VUNSPEC_LL))]
   "TARGET_HAVE_LDREX"
   "ldrex%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_acquire_exclusivesi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -389,7 +393,8 @@
 	  VUNSPEC_LAX))]
   "TARGET_HAVE_LDACQ"
   "ldaex%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_exclusivedi"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -398,7 +403,8 @@
 	  VUNSPEC_LL))]
   "TARGET_HAVE_LDREXD"
   "ldrexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_acquire_exclusivedi"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -407,7 +413,8 @@
 	  VUNSPEC_LAX))]
   "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
   "ldaexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_store_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -431,7 +438,8 @@
       }
     return "strex<sync_sfx>%?\t%0, %2, %C1";
   }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_store_release_exclusivedi"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -448,7 +456,8 @@
     operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
     return "stlexd%?\t%0, %2, %3, %C1";
   }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_store_release_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -459,4 +468,5 @@
 	  VUNSPEC_SLX))]
   "TARGET_HAVE_LDACQ"
   "stlex<sync_sfx>%?\t%0, %2, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index ce331cbe363..c30a9718e76 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -134,7 +134,7 @@ extern c6x_cpu_t c6x_arch;
    Really only externally visible arrays must be aligned this way, as
    only those are directly visible from another compilation unit.  But
    we don't have that information available here.  */
-#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN)					\
   (((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE)	\
    ? BITS_PER_UNIT * 8 : (ALIGN))
 
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ef4dc761d5a..f228e87a2e4 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -207,7 +207,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif	/* RTX_CODE  */
 
 #ifdef TREE_CODE
-extern int ix86_data_alignment (tree, int);
+extern int ix86_data_alignment (tree, int, bool);
 extern unsigned int ix86_local_alignment (tree, enum machine_mode,
 					  unsigned int);
 extern unsigned int ix86_minimum_alignment (tree, enum machine_mode,
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e8f47c9d417..45e88996ad2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2108,7 +2108,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
 
   /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
      a conditional move.  */
-  m_ATOM
+  m_ATOM,
+
+  /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
+     fp converts to destination register.  */
+  m_SLM
+
 };
 
 /* Feature tests against the various architecture variations.  */
@@ -17392,10 +17397,24 @@ distance_agu_use (unsigned int regno0, rtx insn)
 
 static bool
 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
-		      unsigned int regno2, int split_cost)
+		      unsigned int regno2, int split_cost, bool has_scale)
 {
   int dist_define, dist_use;
 
+  /* For Silvermont if using a 2-source or 3-source LEA for
+     non-destructive destination purposes, or due to wanting
+     ability to use SCALE, the use of LEA is justified.  */
+  if (ix86_tune == PROCESSOR_SLM)
+    {
+      if (has_scale)
+	return true;
+      if (split_cost < 1)
+	return false;
+      if (regno0 == regno1 || regno0 == regno2)
+	return false;
+      return true;
+    }
+
   dist_define = distance_non_agu_define (regno1, regno2, insn);
   dist_use = distance_agu_use (regno0, insn);
 
@@ -17484,7 +17503,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[])
   if (regno0 == regno1 || regno0 == regno2)
     return false;
   else
-    return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
+    return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
 }
 
 /* Return true if we should emit lea instruction instead of mov
@@ -17506,7 +17525,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[])
   regno0 = true_regnum (operands[0]);
   regno1 = true_regnum (operands[1]);
 
-  return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
+  return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
 }
 
 /* Return true if we need to split lea into a sequence of
@@ -17585,7 +17604,8 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
       split_cost -= 1;
     }
 
-  return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
+  return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
+				parts.scale > 1);
 }
 
 /* Emit x86 binary operand CODE in mode MODE, where the first operand
@@ -17770,7 +17790,7 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[])
   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
     return false;
 
-  return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
+  return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
 }
 
 /* Return true if destination reg of SET_BODY is shift count of
@@ -24368,6 +24388,73 @@ ix86_agi_dependent (rtx set_insn, rtx use_insn)
   return false;
 }
 
+/* Helper function for exact_store_load_dependency.
+   Return true if addr is found in insn.  */
+static bool
+exact_dependency_1 (rtx addr, rtx insn)
+{
+  enum rtx_code code;
+  const char *format_ptr;
+  int i, j;
+
+  code = GET_CODE (insn);
+  switch (code)
+    {
+    case MEM:
+      if (rtx_equal_p (addr, insn))
+	return true;
+      break;
+    case REG:
+    CASE_CONST_ANY:
+    case SYMBOL_REF:
+    case CODE_LABEL:
+    case PC:
+    case CC0:
+    case EXPR_LIST:
+      return false;
+    default:
+      break;
+    }
+
+  format_ptr = GET_RTX_FORMAT (code);
+  for (i = 0; i < GET_RTX_LENGTH (code); i++)
+    {
+      switch (*format_ptr++)
+	{
+	case 'e':
+	  if (exact_dependency_1 (addr, XEXP (insn, i)))
+	    return true;
+	  break;
+	case 'E':
+	  for (j = 0; j < XVECLEN (insn, i); j++)
+	    if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
+	      return true;
+	    break;
+	}
+    }
+  return false;
+}
+
+/* Return true if there exists exact dependency for store & load, i.e.
+   the same memory address is used in them.  */
+static bool
+exact_store_load_dependency (rtx store, rtx load)
+{
+  rtx set1, set2;
+
+  set1 = single_set (store);
+  if (!set1)
+    return false;
+  if (!MEM_P (SET_DEST (set1)))
+    return false;
+  set2 = single_set (load);
+  if (!set2)
+    return false;
+  if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
+    return true;
+  return false;
+}
+
 static int
 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
 {
@@ -24519,6 +24606,39 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
 	  else
 	    cost = 0;
 	}
+      break;
+
+    case PROCESSOR_SLM:
+      if (!reload_completed)
+	return cost;
+
+      /* Increase cost of integer loads.  */
+      memory = get_attr_memory (dep_insn);
+      if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	{
+	  enum attr_unit unit = get_attr_unit (dep_insn);
+	  if (unit == UNIT_INTEGER && cost == 1)
+	    {
+	      if (memory == MEMORY_LOAD)
+		cost = 3;
+	      else
+		{
+		  /* Increase cost of ld/st for short int types only
+		     because of store forwarding issue.  */
+		  rtx set = single_set (dep_insn);
+		  if (set && (GET_MODE (SET_DEST (set)) == QImode
+			      || GET_MODE (SET_DEST (set)) == HImode))
+		    {
+		      /* Increase cost of store/load insn if exact
+			 dependence exists and it is load insn.  */
+		      enum attr_memory insn_memory = get_attr_memory (insn);
+		      if (insn_memory == MEMORY_LOAD
+			  && exact_store_load_dependency (dep_insn, insn))
+			cost = 3;
+		    }
+		}
+	    }
+	}
 
     default:
       break;
@@ -24565,110 +24685,204 @@ ia32_multipass_dfa_lookahead (void)
    execution. It is applied if
    (1) IMUL instruction is on the top of list;
    (2) There exists the only producer of independent IMUL instruction in
-       ready list;
-   (3) Put found producer on the top of ready list.
-   Returns issue rate.  */
-
+       ready list.
+   Return index of IMUL producer if it was found and -1 otherwise.  */
 static int
-ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
-                   int clock_var ATTRIBUTE_UNUSED)
+do_reorder_for_imul (rtx *ready, int n_ready)
 {
-  static int issue_rate = -1;
-  int n_ready = *pn_ready;
-  rtx insn, insn1, insn2;
-  int i;
+  rtx insn, set, insn1, insn2;
   sd_iterator_def sd_it;
   dep_t dep;
   int index = -1;
+  int i;
 
-  /* Set up issue rate.  */
-  issue_rate = ix86_issue_rate();
-
-  /* Do reodering for Atom only.  */
   if (ix86_tune != PROCESSOR_ATOM)
-    return issue_rate;
-  /* Do not perform ready list reodering for pre-reload schedule pass.  */
-  if (!reload_completed)
-    return issue_rate;
-  /* Nothing to do if ready list contains only 1 instruction.  */
-  if (n_ready <= 1)
-    return issue_rate;
+    return index;
 
   /* Check that IMUL instruction is on the top of ready list.  */
   insn = ready[n_ready - 1];
-  if (!NONDEBUG_INSN_P (insn))
-    return issue_rate;
-  insn = PATTERN (insn);
-  if (GET_CODE (insn) == PARALLEL)
-    insn = XVECEXP (insn, 0, 0);
-  if (GET_CODE (insn) != SET)
-    return issue_rate;
-  if (!(GET_CODE (SET_SRC (insn)) == MULT
-      && GET_MODE (SET_SRC (insn)) == SImode))
-    return issue_rate;
+  set = single_set (insn);
+  if (!set)
+    return index;
+  if (!(GET_CODE (SET_SRC (set)) == MULT
+      && GET_MODE (SET_SRC (set)) == SImode))
+    return index;
 
   /* Search for producer of independent IMUL instruction.  */
-  for (i = n_ready - 2; i>= 0; i--)
+  for (i = n_ready - 2; i >= 0; i--)
     {
       insn = ready[i];
       if (!NONDEBUG_INSN_P (insn))
-        continue;
+	continue;
       /* Skip IMUL instruction.  */
       insn2 = PATTERN (insn);
       if (GET_CODE (insn2) == PARALLEL)
-        insn2 = XVECEXP (insn2, 0, 0);
+	insn2 = XVECEXP (insn2, 0, 0);
       if (GET_CODE (insn2) == SET
-          && GET_CODE (SET_SRC (insn2)) == MULT
-          && GET_MODE (SET_SRC (insn2)) == SImode)
-        continue;
+	  && GET_CODE (SET_SRC (insn2)) == MULT
+	  && GET_MODE (SET_SRC (insn2)) == SImode)
+	continue;
 
       FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
-        {
-          rtx con;
+	{
+	  rtx con;
 	  con = DEP_CON (dep);
 	  if (!NONDEBUG_INSN_P (con))
 	    continue;
-          insn1 = PATTERN (con);
-          if (GET_CODE (insn1) == PARALLEL)
-            insn1 = XVECEXP (insn1, 0, 0);
-
-          if (GET_CODE (insn1) == SET
-              && GET_CODE (SET_SRC (insn1)) == MULT
-              && GET_MODE (SET_SRC (insn1)) == SImode)
-            {
-              sd_iterator_def sd_it1;
-              dep_t dep1;
-              /* Check if there is no other dependee for IMUL.  */
-              index = i;
-              FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
-                {
-                  rtx pro;
-                  pro = DEP_PRO (dep1);
+	  insn1 = PATTERN (con);
+	  if (GET_CODE (insn1) == PARALLEL)
+	    insn1 = XVECEXP (insn1, 0, 0);
+
+	  if (GET_CODE (insn1) == SET
+	      && GET_CODE (SET_SRC (insn1)) == MULT
+	      && GET_MODE (SET_SRC (insn1)) == SImode)
+	    {
+	      sd_iterator_def sd_it1;
+	      dep_t dep1;
+	      /* Check if there is no other dependee for IMUL.  */
+	      index = i;
+	      FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+		{
+		  rtx pro;
+		  pro = DEP_PRO (dep1);
 		  if (!NONDEBUG_INSN_P (pro))
 		    continue;
-                  if (pro != insn)
-                    index = -1;
-                }
-              if (index >= 0)
-                break;
-            }
-        }
+		  if (pro != insn)
+		    index = -1;
+		}
+	      if (index >= 0)
+		break;
+	    }
+	}
       if (index >= 0)
-        break;
+	break;
     }
-  if (index < 0)
-    return issue_rate; /* Didn't find IMUL producer.  */
+  return index;
+}
 
-  if (sched_verbose > 1)
-    fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
-            INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
+/* Try to find the best candidate on the top of ready list if two insns
+   have the same priority - candidate is best if its dependees were
+   scheduled earlier. Applied for Silvermont only.
+   Return true if top 2 insns must be interchanged.  */
+static bool
+swap_top_of_ready_list (rtx *ready, int n_ready)
+{
+  rtx top = ready[n_ready - 1];
+  rtx next = ready[n_ready - 2];
+  rtx set;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int clock1 = -1;
+  int clock2 = -1;
+  #define INSN_TICK(INSN) (HID (INSN)->tick)
 
-  /* Put IMUL producer (ready[index]) at the top of ready list.  */
-  insn1= ready[index];
-  for (i = index; i < n_ready - 1; i++)
-    ready[i] = ready[i + 1];
-  ready[n_ready - 1] = insn1;
+  if (ix86_tune != PROCESSOR_SLM)
+    return false;
+
+  if (!NONDEBUG_INSN_P (top))
+    return false;
+  if (!NONJUMP_INSN_P (top))
+    return false;
+  if (!NONDEBUG_INSN_P (next))
+    return false;
+  if (!NONJUMP_INSN_P (next))
+    return false;
+  set = single_set (top);
+  if (!set)
+    return false;
+  set = single_set (next);
+  if (!set)
+    return false;
 
+  if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
+    {
+      if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
+	return false;
+      /* Determine winner more precise.  */
+      FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock1)
+	    clock1 = INSN_TICK (pro);
+	}
+      FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock2)
+	    clock2 = INSN_TICK (pro);
+	}
+
+      if (clock1 == clock2)
+	{
+	  /* Determine winner - load must win. */
+	  enum attr_memory memory1, memory2;
+	  memory1 = get_attr_memory (top);
+	  memory2 = get_attr_memory (next);
+	  if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
+	    return true;
+	}
+	return (bool) (clock2 < clock1);
+    }
+  return false;
+  #undef INSN_TICK
+}
+
+/* Perform possible reodering of ready list for Atom/Silvermont only.
+   Return issue rate.  */
+static int
+ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+		   int clock_var)
+{
+  int issue_rate = -1;
+  int n_ready = *pn_ready;
+  int i;
+  rtx insn;
+  int index = -1;
+
+  /* Set up issue rate.  */
+  issue_rate = ix86_issue_rate ();
+
+  /* Do reodering for Atom/SLM only.  */
+  if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM)
+    return issue_rate;
+
+  /* Nothing to do if ready list contains only 1 instruction.  */
+  if (n_ready <= 1)
+    return issue_rate;
+
+  /* Do reodering for post-reload scheduler only.  */
+  if (!reload_completed)
+    return issue_rate;
+
+  if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
+		 INSN_UID (ready[index]));
+
+      /* Put IMUL producer (ready[index]) at the top of ready list.  */
+      insn = ready[index];
+      for (i = index; i < n_ready - 1; i++)
+	ready[i] = ready[i + 1];
+      ready[n_ready - 1] = insn;
+      return issue_rate;
+    }
+  if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
+		 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
+      /* Swap 2 top elements of ready list.  */
+      insn = ready[n_ready - 1];
+      ready[n_ready - 1] = ready[n_ready - 2];
+      ready[n_ready - 2] = insn;
+    }
   return issue_rate;
 }
 
@@ -25161,11 +25375,12 @@ ix86_constant_alignment (tree exp, int align)
    instead of that alignment to align the object.  */
 
 int
-ix86_data_alignment (tree type, int align)
+ix86_data_alignment (tree type, int align, bool opt)
 {
   int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
 
-  if (AGGREGATE_TYPE_P (type)
+  if (opt
+      && AGGREGATE_TYPE_P (type)
       && TYPE_SIZE (type)
       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
@@ -25177,14 +25392,17 @@ ix86_data_alignment (tree type, int align)
      to 16byte boundary.  */
   if (TARGET_64BIT)
     {
-      if (AGGREGATE_TYPE_P (type)
-	   && TYPE_SIZE (type)
-	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
-	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
-	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+      if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
+	  && TYPE_SIZE (type)
+	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	  && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+	      || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
 	return 128;
     }
 
+  if (!opt)
+    return align;
+
   if (TREE_CODE (type) == ARRAY_TYPE)
     {
       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
@@ -29834,11 +30052,11 @@ fold_builtin_cpu (tree fndecl, tree *args)
     M_AMD,
     M_CPU_TYPE_START,
     M_INTEL_ATOM,
-    M_INTEL_SLM,
     M_INTEL_CORE2,
     M_INTEL_COREI7,
     M_AMDFAM10H,
     M_AMDFAM15H,
+    M_INTEL_SLM,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
@@ -33737,6 +33955,8 @@ static inline bool
 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
 				enum machine_mode mode, int strict)
 {
+  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
+    return false;
   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 776582a66de..7d940f98804 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -333,6 +333,7 @@ enum ix86_tune_indices {
   X86_TUNE_REASSOC_FP_TO_PARALLEL,
   X86_TUNE_GENERAL_REGS_SSE_SPILL,
   X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
+  X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
 
   X86_TUNE_LAST
 };
@@ -443,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
 #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
 	ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
+#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
+	ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
@@ -856,7 +859,18 @@ enum target_cpu_default
    cause character arrays to be word-aligned so that `strcpy' calls
    that copy constants to character arrays can be done inline.  */
 
-#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN))
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+  ix86_data_alignment ((TYPE), (ALIGN), true)
+
+/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates
+   some alignment increase, instead of optimization only purposes.  E.g.
+   AMD x86-64 psABI says that variables with array type larger than 15 bytes
+   must be aligned to 16 byte boundaries.
+
+   If this macro is not defined, then ALIGN is used.  */
+
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+  ix86_data_alignment ((TYPE), (ALIGN), false)
 
 /* If defined, a C expression to compute the alignment for a local
    variable.  TYPE is the data type, and ALIGN is the alignment that
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 28b0c78093a..e97a4570501 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3625,6 +3625,18 @@
 				  CONST0_RTX (V4SFmode), operands[1]));
 })
 
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF
+	  (match_operand:SF 1 "memory_operand")))]
+  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && SSE_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float_extend:DF (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+
 (define_insn "*extendsfdf2_mixed"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
         (float_extend:DF
@@ -3766,6 +3778,18 @@
 				 CONST0_RTX (V2DFmode), operands[1]));
 })
 
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand")
+	(float_truncate:SF
+	  (match_operand:DF 1 "memory_operand")))]
+  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && SSE_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
 (define_expand "truncdfsf2_with_temp"
   [(parallel [(set (match_operand:SF 0)
 		   (float_truncate:SF (match_operand:DF 1)))
@@ -16567,6 +16591,7 @@
   "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    && peep2_reg_dead_p (4, operands[0])
    && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
    && (<MODE>mode != QImode
        || immediate_operand (operands[2], QImode)
        || q_regs_operand (operands[2], QImode))
@@ -16631,6 +16656,7 @@
        || immediate_operand (operands[2], SImode)
        || q_regs_operand (operands[2], SImode))
    && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
    && ix86_match_ccmode (peep2_next_insn (3),
 			 (GET_CODE (operands[3]) == PLUS
 			  || GET_CODE (operands[3]) == MINUS)
diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def
index f08fad6778d..d7db0ba96d6 100644
--- a/gcc/config/mips/mips-cpus.def
+++ b/gcc/config/mips/mips-cpus.def
@@ -68,6 +68,7 @@ MIPS_CPU ("r4600", PROCESSOR_R4600, 3, 0)
 MIPS_CPU ("orion", PROCESSOR_R4600, 3, 0)
 MIPS_CPU ("r4650", PROCESSOR_R4650, 3, 0)
 MIPS_CPU ("r4700", PROCESSOR_R4700, 3, 0)
+MIPS_CPU ("r5900", PROCESSOR_R5900, 3, 0)
 /* ST Loongson 2E/2F processors.  */
 MIPS_CPU ("loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY)
 MIPS_CPU ("loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY)
diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md
index 002c9992001..49a08689638 100644
--- a/gcc/config/mips/mips-dsp.md
+++ b/gcc/config/mips/mips-dsp.md
@@ -1131,8 +1131,7 @@
   "ISA_HAS_L<SHORT:SIZE><U>X"
   "l<SHORT:size><u>x\t%0,%2(%1)"
   [(set_attr "type"	"load")
-   (set_attr "mode"	"<GPR:MODE>")
-   (set_attr "length"	"4")])
+   (set_attr "mode"	"<GPR:MODE>")])
 
 (define_expand "mips_lhx"
   [(match_operand:SI 0 "register_operand")
@@ -1165,8 +1164,7 @@
   "ISA_HAS_L<GPR:SIZE>X"
   "l<GPR:size>x\t%0,%2(%1)"
   [(set_attr "type"	"load")
-   (set_attr "mode"	"<GPR:MODE>")
-   (set_attr "length"	"4")])
+   (set_attr "mode"	"<GPR:MODE>")])
 
 (define_insn "*mips_lw<u>x_<P:mode>_ext"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -1176,8 +1174,7 @@
   "ISA_HAS_LW<U>X && TARGET_64BIT"
   "lw<u>x\t%0,%2(%1)"
   [(set_attr "type"	"load")
-   (set_attr "mode"	"DI")
-   (set_attr "length"	"4")])
+   (set_attr "mode"	"DI")])
 
 ;; Table 2-8. MIPS DSP ASE Instructions: Branch
 ;; BPOSGE32
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index 9c70cc4324f..a22c7829b77 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -481,7 +481,7 @@
   operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
 }
   [(set_attr "type" "fcmp")
-   (set_attr "length" "8")
+   (set_attr "insn_count" "2")
    (set_attr "mode" "FPSW")])
 
 (define_insn_and_split "mips_cabs_cond_4s"
@@ -510,7 +510,7 @@
   operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
 }
   [(set_attr "type" "fcmp")
-   (set_attr "length" "8")
+   (set_attr "insn_count" "2")
    (set_attr "mode" "FPSW")])
 
 
diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt
index 0d7fa26510d..8ed412cc15e 100644
--- a/gcc/config/mips/mips-tables.opt
+++ b/gcc/config/mips/mips-tables.opt
@@ -208,425 +208,431 @@ EnumValue
 Enum(mips_arch_opt_value) String(4700) Value(22)
 
 EnumValue
-Enum(mips_arch_opt_value) String(loongson2e) Value(23) Canonical
+Enum(mips_arch_opt_value) String(r5900) Value(23) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(loongson2f) Value(24) Canonical
+Enum(mips_arch_opt_value) String(5900) Value(23)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r8000) Value(25) Canonical
+Enum(mips_arch_opt_value) String(loongson2e) Value(24) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r8k) Value(25)
+Enum(mips_arch_opt_value) String(loongson2f) Value(25) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(8000) Value(25)
+Enum(mips_arch_opt_value) String(r8000) Value(26) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(8k) Value(25)
+Enum(mips_arch_opt_value) String(r8k) Value(26)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r10000) Value(26) Canonical
+Enum(mips_arch_opt_value) String(8000) Value(26)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r10k) Value(26)
+Enum(mips_arch_opt_value) String(8k) Value(26)
 
 EnumValue
-Enum(mips_arch_opt_value) String(10000) Value(26)
+Enum(mips_arch_opt_value) String(r10000) Value(27) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(10k) Value(26)
+Enum(mips_arch_opt_value) String(r10k) Value(27)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r12000) Value(27) Canonical
+Enum(mips_arch_opt_value) String(10000) Value(27)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r12k) Value(27)
+Enum(mips_arch_opt_value) String(10k) Value(27)
 
 EnumValue
-Enum(mips_arch_opt_value) String(12000) Value(27)
+Enum(mips_arch_opt_value) String(r12000) Value(28) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(12k) Value(27)
+Enum(mips_arch_opt_value) String(r12k) Value(28)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r14000) Value(28) Canonical
+Enum(mips_arch_opt_value) String(12000) Value(28)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r14k) Value(28)
+Enum(mips_arch_opt_value) String(12k) Value(28)
 
 EnumValue
-Enum(mips_arch_opt_value) String(14000) Value(28)
+Enum(mips_arch_opt_value) String(r14000) Value(29) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(14k) Value(28)
+Enum(mips_arch_opt_value) String(r14k) Value(29)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r16000) Value(29) Canonical
+Enum(mips_arch_opt_value) String(14000) Value(29)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r16k) Value(29)
+Enum(mips_arch_opt_value) String(14k) Value(29)
 
 EnumValue
-Enum(mips_arch_opt_value) String(16000) Value(29)
+Enum(mips_arch_opt_value) String(r16000) Value(30) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(16k) Value(29)
+Enum(mips_arch_opt_value) String(r16k) Value(30)
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5000) Value(30) Canonical
+Enum(mips_arch_opt_value) String(16000) Value(30)
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5k) Value(30)
+Enum(mips_arch_opt_value) String(16k) Value(30)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5000) Value(30)
+Enum(mips_arch_opt_value) String(vr5000) Value(31) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(5k) Value(30)
+Enum(mips_arch_opt_value) String(vr5k) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5000) Value(30)
+Enum(mips_arch_opt_value) String(5000) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5k) Value(30)
+Enum(mips_arch_opt_value) String(5k) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5400) Value(31) Canonical
+Enum(mips_arch_opt_value) String(r5000) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5400) Value(31)
+Enum(mips_arch_opt_value) String(r5k) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5400) Value(31)
+Enum(mips_arch_opt_value) String(vr5400) Value(32) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5500) Value(32) Canonical
+Enum(mips_arch_opt_value) String(5400) Value(32)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5500) Value(32)
+Enum(mips_arch_opt_value) String(r5400) Value(32)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5500) Value(32)
+Enum(mips_arch_opt_value) String(vr5500) Value(33) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm7000) Value(33) Canonical
+Enum(mips_arch_opt_value) String(5500) Value(33)
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm7k) Value(33)
+Enum(mips_arch_opt_value) String(r5500) Value(33)
 
 EnumValue
-Enum(mips_arch_opt_value) String(7000) Value(33)
+Enum(mips_arch_opt_value) String(rm7000) Value(34) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(7k) Value(33)
+Enum(mips_arch_opt_value) String(rm7k) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r7000) Value(33)
+Enum(mips_arch_opt_value) String(7000) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r7k) Value(33)
+Enum(mips_arch_opt_value) String(7k) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm9000) Value(34) Canonical
+Enum(mips_arch_opt_value) String(r7000) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm9k) Value(34)
+Enum(mips_arch_opt_value) String(r7k) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(9000) Value(34)
+Enum(mips_arch_opt_value) String(rm9000) Value(35) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(9k) Value(34)
+Enum(mips_arch_opt_value) String(rm9k) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r9000) Value(34)
+Enum(mips_arch_opt_value) String(9000) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r9k) Value(34)
+Enum(mips_arch_opt_value) String(9k) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kc) Value(35) Canonical
+Enum(mips_arch_opt_value) String(r9000) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kc) Value(35)
+Enum(mips_arch_opt_value) String(r9k) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4km) Value(36) Canonical
+Enum(mips_arch_opt_value) String(4kc) Value(36) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4km) Value(36)
+Enum(mips_arch_opt_value) String(r4kc) Value(36)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kp) Value(37) Canonical
+Enum(mips_arch_opt_value) String(4km) Value(37) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kp) Value(37)
+Enum(mips_arch_opt_value) String(r4km) Value(37)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4ksc) Value(38) Canonical
+Enum(mips_arch_opt_value) String(4kp) Value(38) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4ksc) Value(38)
+Enum(mips_arch_opt_value) String(r4kp) Value(38)
 
 EnumValue
-Enum(mips_arch_opt_value) String(m4k) Value(39) Canonical
+Enum(mips_arch_opt_value) String(4ksc) Value(39) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(m14kc) Value(40) Canonical
+Enum(mips_arch_opt_value) String(r4ksc) Value(39)
 
 EnumValue
-Enum(mips_arch_opt_value) String(m14k) Value(41) Canonical
+Enum(mips_arch_opt_value) String(m4k) Value(40) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kec) Value(42) Canonical
+Enum(mips_arch_opt_value) String(m14kc) Value(41) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kec) Value(42)
+Enum(mips_arch_opt_value) String(m14k) Value(42) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kem) Value(43) Canonical
+Enum(mips_arch_opt_value) String(4kec) Value(43) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kem) Value(43)
+Enum(mips_arch_opt_value) String(r4kec) Value(43)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kep) Value(44) Canonical
+Enum(mips_arch_opt_value) String(4kem) Value(44) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kep) Value(44)
+Enum(mips_arch_opt_value) String(r4kem) Value(44)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4ksd) Value(45) Canonical
+Enum(mips_arch_opt_value) String(4kep) Value(45) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4ksd) Value(45)
+Enum(mips_arch_opt_value) String(r4kep) Value(45)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kc) Value(46) Canonical
+Enum(mips_arch_opt_value) String(4ksd) Value(46) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kc) Value(46)
+Enum(mips_arch_opt_value) String(r4ksd) Value(46)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kf2_1) Value(47) Canonical
+Enum(mips_arch_opt_value) String(24kc) Value(47) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kf2_1) Value(47)
+Enum(mips_arch_opt_value) String(r24kc) Value(47)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kf) Value(48) Canonical
+Enum(mips_arch_opt_value) String(24kf2_1) Value(48) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kf) Value(48)
+Enum(mips_arch_opt_value) String(r24kf2_1) Value(48)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kf1_1) Value(49) Canonical
+Enum(mips_arch_opt_value) String(24kf) Value(49) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kf1_1) Value(49)
+Enum(mips_arch_opt_value) String(r24kf) Value(49)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kfx) Value(50) Canonical
+Enum(mips_arch_opt_value) String(24kf1_1) Value(50) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kfx) Value(50)
+Enum(mips_arch_opt_value) String(r24kf1_1) Value(50)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kx) Value(51) Canonical
+Enum(mips_arch_opt_value) String(24kfx) Value(51) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kx) Value(51)
+Enum(mips_arch_opt_value) String(r24kfx) Value(51)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kec) Value(52) Canonical
+Enum(mips_arch_opt_value) String(24kx) Value(52) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kec) Value(52)
+Enum(mips_arch_opt_value) String(r24kx) Value(52)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kef2_1) Value(53) Canonical
+Enum(mips_arch_opt_value) String(24kec) Value(53) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kef2_1) Value(53)
+Enum(mips_arch_opt_value) String(r24kec) Value(53)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kef) Value(54) Canonical
+Enum(mips_arch_opt_value) String(24kef2_1) Value(54) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kef) Value(54)
+Enum(mips_arch_opt_value) String(r24kef2_1) Value(54)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kef1_1) Value(55) Canonical
+Enum(mips_arch_opt_value) String(24kef) Value(55) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kef1_1) Value(55)
+Enum(mips_arch_opt_value) String(r24kef) Value(55)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kefx) Value(56) Canonical
+Enum(mips_arch_opt_value) String(24kef1_1) Value(56) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kefx) Value(56)
+Enum(mips_arch_opt_value) String(r24kef1_1) Value(56)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kex) Value(57) Canonical
+Enum(mips_arch_opt_value) String(24kefx) Value(57) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kex) Value(57)
+Enum(mips_arch_opt_value) String(r24kefx) Value(57)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kc) Value(58) Canonical
+Enum(mips_arch_opt_value) String(24kex) Value(58) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kc) Value(58)
+Enum(mips_arch_opt_value) String(r24kex) Value(58)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kf2_1) Value(59) Canonical
+Enum(mips_arch_opt_value) String(34kc) Value(59) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kf2_1) Value(59)
+Enum(mips_arch_opt_value) String(r34kc) Value(59)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kf) Value(60) Canonical
+Enum(mips_arch_opt_value) String(34kf2_1) Value(60) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kf) Value(60)
+Enum(mips_arch_opt_value) String(r34kf2_1) Value(60)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kf1_1) Value(61) Canonical
+Enum(mips_arch_opt_value) String(34kf) Value(61) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kf1_1) Value(61)
+Enum(mips_arch_opt_value) String(r34kf) Value(61)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kfx) Value(62) Canonical
+Enum(mips_arch_opt_value) String(34kf1_1) Value(62) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kfx) Value(62)
+Enum(mips_arch_opt_value) String(r34kf1_1) Value(62)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kx) Value(63) Canonical
+Enum(mips_arch_opt_value) String(34kfx) Value(63) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kx) Value(63)
+Enum(mips_arch_opt_value) String(r34kfx) Value(63)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kn) Value(64) Canonical
+Enum(mips_arch_opt_value) String(34kx) Value(64) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kn) Value(64)
+Enum(mips_arch_opt_value) String(r34kx) Value(64)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kc) Value(65) Canonical
+Enum(mips_arch_opt_value) String(34kn) Value(65) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kc) Value(65)
+Enum(mips_arch_opt_value) String(r34kn) Value(65)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf2_1) Value(66) Canonical
+Enum(mips_arch_opt_value) String(74kc) Value(66) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf2_1) Value(66)
+Enum(mips_arch_opt_value) String(r74kc) Value(66)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf) Value(67) Canonical
+Enum(mips_arch_opt_value) String(74kf2_1) Value(67) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf) Value(67)
+Enum(mips_arch_opt_value) String(r74kf2_1) Value(67)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf1_1) Value(68) Canonical
+Enum(mips_arch_opt_value) String(74kf) Value(68) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf1_1) Value(68)
+Enum(mips_arch_opt_value) String(r74kf) Value(68)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kfx) Value(69) Canonical
+Enum(mips_arch_opt_value) String(74kf1_1) Value(69) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kfx) Value(69)
+Enum(mips_arch_opt_value) String(r74kf1_1) Value(69)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kx) Value(70) Canonical
+Enum(mips_arch_opt_value) String(74kfx) Value(70) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kx) Value(70)
+Enum(mips_arch_opt_value) String(r74kfx) Value(70)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf3_2) Value(71) Canonical
+Enum(mips_arch_opt_value) String(74kx) Value(71) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf3_2) Value(71)
+Enum(mips_arch_opt_value) String(r74kx) Value(71)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kc) Value(72) Canonical
+Enum(mips_arch_opt_value) String(74kf3_2) Value(72) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kc) Value(72)
+Enum(mips_arch_opt_value) String(r74kf3_2) Value(72)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kf2_1) Value(73) Canonical
+Enum(mips_arch_opt_value) String(1004kc) Value(73) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kf2_1) Value(73)
+Enum(mips_arch_opt_value) String(r1004kc) Value(73)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kf) Value(74) Canonical
+Enum(mips_arch_opt_value) String(1004kf2_1) Value(74) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kf) Value(74)
+Enum(mips_arch_opt_value) String(r1004kf2_1) Value(74)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kf1_1) Value(75) Canonical
+Enum(mips_arch_opt_value) String(1004kf) Value(75) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kf1_1) Value(75)
+Enum(mips_arch_opt_value) String(r1004kf) Value(75)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5kc) Value(76) Canonical
+Enum(mips_arch_opt_value) String(1004kf1_1) Value(76) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5kc) Value(76)
+Enum(mips_arch_opt_value) String(r1004kf1_1) Value(76)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5kf) Value(77) Canonical
+Enum(mips_arch_opt_value) String(5kc) Value(77) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5kf) Value(77)
+Enum(mips_arch_opt_value) String(r5kc) Value(77)
 
 EnumValue
-Enum(mips_arch_opt_value) String(20kc) Value(78) Canonical
+Enum(mips_arch_opt_value) String(5kf) Value(78) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r20kc) Value(78)
+Enum(mips_arch_opt_value) String(r5kf) Value(78)
 
 EnumValue
-Enum(mips_arch_opt_value) String(sb1) Value(79) Canonical
+Enum(mips_arch_opt_value) String(20kc) Value(79) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(sb1a) Value(80) Canonical
+Enum(mips_arch_opt_value) String(r20kc) Value(79)
 
 EnumValue
-Enum(mips_arch_opt_value) String(sr71000) Value(81) Canonical
+Enum(mips_arch_opt_value) String(sb1) Value(80) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(sr71k) Value(81)
+Enum(mips_arch_opt_value) String(sb1a) Value(81) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(xlr) Value(82) Canonical
+Enum(mips_arch_opt_value) String(sr71000) Value(82) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(loongson3a) Value(83) Canonical
+Enum(mips_arch_opt_value) String(sr71k) Value(82)
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon) Value(84) Canonical
+Enum(mips_arch_opt_value) String(xlr) Value(83) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon+) Value(85) Canonical
+Enum(mips_arch_opt_value) String(loongson3a) Value(84) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon2) Value(86) Canonical
+Enum(mips_arch_opt_value) String(octeon) Value(85) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(xlp) Value(87) Canonical
+Enum(mips_arch_opt_value) String(octeon+) Value(86) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon2) Value(87) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(xlp) Value(88) Canonical
 
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 222c768b9b9..bd1d10b0e4e 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -1029,6 +1029,19 @@ static const struct mips_rtx_cost_data
 		     1,           /* branch_cost */
 		     4            /* memory_latency */
   },
+  { /* R5900 */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (256),          /* fp_mult_df */
+    COSTS_N_INSNS (8),            /* fp_div_sf */
+    COSTS_N_INSNS (256),          /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (256),          /* int_mult_di */
+    COSTS_N_INSNS (37),           /* int_div_si */
+    COSTS_N_INSNS (256),          /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
   { /* R7000 */
     /* The only costs that are changed here are
        integer multiplication.  */
@@ -12450,7 +12463,10 @@ mips_start_ll_sc_sync_block (void)
   if (!ISA_HAS_LL_SC)
     {
       output_asm_insn (".set\tpush", 0);
-      output_asm_insn (".set\tmips2", 0);
+      if (TARGET_64BIT)
+	output_asm_insn (".set\tmips3", 0);
+      else
+	output_asm_insn (".set\tmips2", 0);
     }
 }
 
@@ -13005,6 +13021,7 @@ mips_issue_rate (void)
     case PROCESSOR_R4130:
     case PROCESSOR_R5400:
     case PROCESSOR_R5500:
+    case PROCESSOR_R5900:
     case PROCESSOR_R7000:
     case PROCESSOR_R9000:
     case PROCESSOR_OCTEON:
@@ -16025,8 +16042,9 @@ mips_reorg_process_insns (void)
     cfun->machine->all_noreorder_p = false;
 
   /* Code compiled with -mfix-vr4120 or -mfix-24k can't be all noreorder
-     because we rely on the assembler to work around some errata.  */
-  if (TARGET_FIX_VR4120 || TARGET_FIX_24K)
+     because we rely on the assembler to work around some errata.
+     The r5900 too has several bugs.  */
+  if (TARGET_FIX_VR4120 || TARGET_FIX_24K || TARGET_MIPS5900)
     cfun->machine->all_noreorder_p = false;
 
   /* The same is true for -mfix-vr4130 if we might generate MFLO or
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index a8cf1dbedc7..ff631c1a30b 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -222,6 +222,7 @@ struct mips_cpu_info {
 #define TARGET_MIPS4130             (mips_arch == PROCESSOR_R4130)
 #define TARGET_MIPS5400             (mips_arch == PROCESSOR_R5400)
 #define TARGET_MIPS5500             (mips_arch == PROCESSOR_R5500)
+#define TARGET_MIPS5900             (mips_arch == PROCESSOR_R5900)
 #define TARGET_MIPS7000             (mips_arch == PROCESSOR_R7000)
 #define TARGET_MIPS9000             (mips_arch == PROCESSOR_R9000)
 #define TARGET_OCTEON		    (mips_arch == PROCESSOR_OCTEON	\
@@ -399,6 +400,9 @@ struct mips_cpu_info {
       if (TARGET_MCU)							\
 	builtin_define ("__mips_mcu");					\
 									\
+      if (TARGET_EVA)							\
+	builtin_define ("__mips_eva");					\
+									\
       if (TARGET_DSP)							\
 	{								\
 	  builtin_define ("__mips_dsp");				\
@@ -803,6 +807,7 @@ struct mips_cpu_info {
 #define ISA_HAS_MUL3		((TARGET_MIPS3900                       \
 				  || TARGET_MIPS5400			\
 				  || TARGET_MIPS5500			\
+				  || TARGET_MIPS5900			\
 				  || TARGET_MIPS7000			\
 				  || TARGET_MIPS9000			\
 				  || TARGET_MAD				\
@@ -817,6 +822,22 @@ struct mips_cpu_info {
 				 && TARGET_OCTEON			\
 				 && !TARGET_MIPS16)
 
+/* ISA supports instructions DMULT and DMULTU. */
+#define ISA_HAS_DMULT		(TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions MULT and MULTU.
+   This is always true, but the macro is needed for ISA_HAS_<D>MULT
+   in mips.md.  */
+#define ISA_HAS_MULT		(1)
+
+/* ISA supports instructions DDIV and DDIVU. */
+#define ISA_HAS_DDIV		(TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions DIV and DIVU.
+   This is always true, but the macro is needed for ISA_HAS_<D>DIV
+   in mips.md.  */
+#define ISA_HAS_DIV		(1)
+
 #define ISA_HAS_DIV3		((TARGET_LOONGSON_2EF			\
 				  || TARGET_LOONGSON_3A)		\
 				 && !TARGET_MIPS16)
@@ -833,7 +854,9 @@ struct mips_cpu_info {
 
 /* ISA has the integer conditional move instructions introduced in mips4 and
    ST Loongson 2E/2F.  */
-#define ISA_HAS_CONDMOVE        (ISA_HAS_FP_CONDMOVE || TARGET_LOONGSON_2EF)
+#define ISA_HAS_CONDMOVE        (ISA_HAS_FP_CONDMOVE			\
+				 || TARGET_MIPS5900			\
+				 || TARGET_LOONGSON_2EF)
 
 /* ISA has LDC1 and SDC1.  */
 #define ISA_HAS_LDC1_SDC1	(!ISA_MIPS1 && !TARGET_MIPS16)
@@ -946,6 +969,7 @@ struct mips_cpu_info {
 /* ISA has data prefetch instructions.  This controls use of 'pref'.  */
 #define ISA_HAS_PREFETCH	((ISA_MIPS4				\
 				  || TARGET_LOONGSON_2EF		\
+				  || TARGET_MIPS5900			\
 				  || ISA_MIPS32				\
 				  || ISA_MIPS32R2			\
 				  || ISA_MIPS64				\
@@ -1007,15 +1031,18 @@ struct mips_cpu_info {
    and "addiu $4,$4,1".  */
 #define ISA_HAS_LOAD_DELAY	(ISA_MIPS1				\
 				 && !TARGET_MIPS3900			\
+				 && !TARGET_MIPS5900			\
 				 && !TARGET_MIPS16			\
 				 && !TARGET_MICROMIPS)
 
 /* Likewise mtc1 and mfc1.  */
 #define ISA_HAS_XFER_DELAY	(mips_isa <= 3			\
+				 && !TARGET_MIPS5900		\
 				 && !TARGET_LOONGSON_2EF)
 
 /* Likewise floating-point comparisons.  */
 #define ISA_HAS_FCMP_DELAY	(mips_isa <= 3			\
+				 && !TARGET_MIPS5900		\
 				 && !TARGET_LOONGSON_2EF)
 
 /* True if mflo and mfhi can be immediately followed by instructions
@@ -1035,6 +1062,7 @@ struct mips_cpu_info {
 				 || ISA_MIPS64				\
 				 || ISA_MIPS64R2			\
 				 || TARGET_MIPS5500			\
+				 || TARGET_MIPS5900			\
 				 || TARGET_LOONGSON_2EF)
 
 /* ISA includes synci, jr.hb and jalr.hb.  */
@@ -1052,7 +1080,7 @@ struct mips_cpu_info {
 /* ISA includes ll and sc.  Note that this implies ISA_HAS_SYNC
    because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC
    instructions.  */
-#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS16)
+#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS5900 && !TARGET_MIPS16)
 #define GENERATE_LL_SC			\
   (target_flags_explicit & MASK_LLSC	\
    ? TARGET_LLSC && !TARGET_MIPS16	\
@@ -1125,6 +1153,7 @@ struct mips_cpu_info {
 %{mdsp} %{mno-dsp} \
 %{mdspr2} %{mno-dspr2} \
 %{mmcu} %{mno-mcu} \
+%{meva} %{mno-eva} \
 %{msmartmips} %{mno-smartmips} \
 %{mmt} %{mno-mt} \
 %{mfix-vr4120} %{mfix-vr4130} \
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 6f6484b0d8c..ce322d8bc36 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -55,6 +55,7 @@
   r5000
   r5400
   r5500
+  r5900
   r7000
   r8000
   r9000
@@ -406,8 +407,12 @@
 
 ;; Is this an extended instruction in mips16 mode?
 (define_attr "extended_mips16" "no,yes"
-  (if_then_else (ior (eq_attr "move_type" "sll0")
-		     (eq_attr "jal" "direct"))
+  (if_then_else (ior ;; In general, constant-pool loads are extended
+  		     ;; instructions.  We don't yet optimize for 16-bit
+		     ;; PC-relative references.
+  		     (eq_attr "move_type" "sll0,loadpool")
+		     (eq_attr "jal" "direct")
+		     (eq_attr "got" "load"))
 		(const_string "yes")
 		(const_string "no")))
 
@@ -420,14 +425,89 @@
 	                  (match_test "TARGET_MICROMIPS")))
 	        (const_string "yes")
 	        (const_string "no")))
-  
-;; Length of instruction in bytes.
-(define_attr "length" ""
-   (cond [(and (eq_attr "extended_mips16" "yes")
-	       (match_test "TARGET_MIPS16"))
-	  (const_int 4)
 
-	  (and (eq_attr "compression" "micromips,all")
+;; The number of individual instructions that a non-branch pattern generates,
+;; using units of BASE_INSN_LENGTH.
+(define_attr "insn_count" ""
+  (cond [;; "Ghost" instructions occupy no space.
+	 (eq_attr "type" "ghost")
+	 (const_int 0)
+
+	 ;; Extended instructions count as 2.
+   	 (and (eq_attr "extended_mips16" "yes")
+	      (match_test "TARGET_MIPS16"))
+	 (const_int 2)
+
+	 ;; A GOT load followed by an add of $gp.  This is not used for MIPS16.
+	 (eq_attr "got" "xgot_high")
+	 (const_int 2)
+
+	 ;; SHIFT_SHIFTs are decomposed into two separate instructions.
+	 ;; They are extended instructions on MIPS16 targets.
+	 (eq_attr "move_type" "shift_shift")
+	 (if_then_else (match_test "TARGET_MIPS16")
+	 	       (const_int 4)
+	 	       (const_int 2))
+
+	 ;; Check for doubleword moves that are decomposed into two
+	 ;; instructions.  The individual instructions are unextended
+	 ;; MIPS16 ones.
+	 (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
+	      (eq_attr "dword_mode" "yes"))
+	 (const_int 2)
+
+	 ;; Constants, loads and stores are handled by external routines.
+	 (and (eq_attr "move_type" "const,constN")
+	      (eq_attr "dword_mode" "yes"))
+	 (symbol_ref "mips_split_const_insns (operands[1])")
+	 (eq_attr "move_type" "const,constN")
+	 (symbol_ref "mips_const_insns (operands[1])")
+	 (eq_attr "move_type" "load,fpload")
+	 (symbol_ref "mips_load_store_insns (operands[1], insn)")
+	 (eq_attr "move_type" "store,fpstore")
+	 (symbol_ref "mips_load_store_insns (operands[0], insn)
+		      + (TARGET_FIX_24K ? 1 : 0)")
+
+	 ;; In the worst case, a call macro will take 8 instructions:
+	 ;;
+	 ;;	lui $25,%call_hi(FOO)
+	 ;;	addu $25,$25,$28
+	 ;;	lw $25,%call_lo(FOO)($25)
+	 ;;	nop
+	 ;;	jalr $25
+	 ;;	nop
+	 ;;	lw $gp,X($sp)
+	 ;;	nop
+	 (eq_attr "jal_macro" "yes")
+	 (const_int 8)
+
+	 ;; Various VR4120 errata require a nop to be inserted after a macc
+	 ;; instruction.  The assembler does this for us, so account for
+	 ;; the worst-case length here.
+	 (and (eq_attr "type" "imadd")
+	      (match_test "TARGET_FIX_VR4120"))
+	 (const_int 2)
+
+	 ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
+	 ;; the result of the second one is missed.  The assembler should work
+	 ;; around this by inserting a nop after the first dmult.
+	 (and (eq_attr "type" "imul,imul3")
+	      (eq_attr "mode" "DI")
+	      (match_test "TARGET_FIX_VR4120"))
+	 (const_int 2)
+
+	 (eq_attr "type" "idiv,idiv3")
+	 (symbol_ref "mips_idiv_insns ()")
+
+	 (not (eq_attr "sync_mem" "none"))
+	 (symbol_ref "mips_sync_loop_insns (insn, operands)")]
+	(const_int 1)))
+
+;; Length of instruction in bytes.  The default is derived from "insn_count",
+;; but there are special cases for branches (which must be handled here)
+;; and for compressed single instructions.
+(define_attr "length" ""
+   (cond [(and (eq_attr "compression" "micromips,all")
 	       (eq_attr "dword_mode" "no")
 	       (match_test "TARGET_MICROMIPS"))
 	  (const_int 2)
@@ -580,95 +660,8 @@
 		 (const_int 20)
 		 (match_test "Pmode == SImode")
 		 (const_int 16)
-		 ] (const_int 24))
-
-	  ;; "Ghost" instructions occupy no space.
-	  (eq_attr "type" "ghost")
-	  (const_int 0)
-
-	  ;; GOT loads are extended MIPS16 instructions and 4-byte
-	  ;; microMIPS instructions.
-	  (eq_attr "got" "load")
-	  (const_int 4)
-
-	  ;; A GOT load followed by an add of $gp.
-	  (eq_attr "got" "xgot_high")
-	  (const_int 8)
-
-	  ;; In general, constant-pool loads are extended instructions.
-	  (eq_attr "move_type" "loadpool")
-	  (const_int 4)
-
-	  ;; SHIFT_SHIFTs are decomposed into two separate instructions.
-	  ;; They are extended instructions on MIPS16 targets.
-	  (eq_attr "move_type" "shift_shift")
-	  (const_int 8)
-
-	  ;; Check for doubleword moves that are decomposed into two
-	  ;; instructions.  The individual instructions are unextended
-	  ;; MIPS16 ones or 2-byte microMIPS ones.
-	  (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
-	       (eq_attr "dword_mode" "yes"))
-	  (if_then_else (match_test "TARGET_COMPRESSION")
-	  		(const_int 4)
-	  		(const_int 8))
-
-	  ;; Doubleword CONST{,N} moves are split into two word
-	  ;; CONST{,N} moves.
-	  (and (eq_attr "move_type" "const,constN")
-	       (eq_attr "dword_mode" "yes"))
-	  (symbol_ref "mips_split_const_insns (operands[1]) * BASE_INSN_LENGTH")
-
-	  ;; Otherwise, constants, loads and stores are handled by external
-	  ;; routines.
-	  (eq_attr "move_type" "const,constN")
-	  (symbol_ref "mips_const_insns (operands[1]) * BASE_INSN_LENGTH")
-	  (eq_attr "move_type" "load,fpload")
-	  (symbol_ref "mips_load_store_insns (operands[1], insn)
-	  	       * BASE_INSN_LENGTH")
-	  (eq_attr "move_type" "store,fpstore")
-	  (symbol_ref "mips_load_store_insns (operands[0], insn)
-		       * BASE_INSN_LENGTH
-		       + (TARGET_FIX_24K ? NOP_INSN_LENGTH : 0)")
-
-	  ;; In the worst case, a call macro will take 8 instructions:
-	  ;;
-	  ;;	 lui $25,%call_hi(FOO)
-	  ;;	 addu $25,$25,$28
-	  ;;     lw $25,%call_lo(FOO)($25)
-	  ;;	 nop
-	  ;;	 jalr $25
-	  ;;	 nop
-	  ;;	 lw $gp,X($sp)
-	  ;;	 nop
-	  (eq_attr "jal_macro" "yes")
-	  (const_int 32)
-
-	  ;; Various VR4120 errata require a nop to be inserted after a macc
-	  ;; instruction.  The assembler does this for us, so account for
-	  ;; the worst-case length here.
-	  (and (eq_attr "type" "imadd")
-	       (match_test "TARGET_FIX_VR4120"))
-	  (const_int 8)
-
-	  ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
-	  ;; the result of the second one is missed.  The assembler should work
-	  ;; around this by inserting a nop after the first dmult.
-	  (and (eq_attr "type" "imul,imul3")
-	       (and (eq_attr "mode" "DI")
-		    (match_test "TARGET_FIX_VR4120")))
-	  (const_int 8)
-
-	  (eq_attr "type" "idiv,idiv3")
-	  (symbol_ref "mips_idiv_insns () * BASE_INSN_LENGTH")
-
-	  (not (eq_attr "sync_mem" "none"))
-	  (symbol_ref "mips_sync_loop_insns (insn, operands)
-	  	       * BASE_INSN_LENGTH")
-
-	  (match_test "TARGET_MIPS16")
-	  (const_int 2)
-	  ] (const_int 4)))
+		 ] (const_int 24))]
+	 (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH")))
 
 ;; Attribute describing the processor.
 (define_enum_attr "cpu" "processor"
@@ -701,16 +694,11 @@
 	 (const_string "hilo")]
 	(const_string "none")))
 
-;; Is it a single instruction?
-(define_attr "single_insn" "no,yes"
-  (symbol_ref "(get_attr_length (insn) == (TARGET_MIPS16 ? 2 : 4)
-		? SINGLE_INSN_YES : SINGLE_INSN_NO)"))
-
 ;; Can the instruction be put into a delay slot?
 (define_attr "can_delay" "no,yes"
   (if_then_else (and (eq_attr "type" "!branch,call,jump")
-		     (and (eq_attr "hazard" "none")
-			  (eq_attr "single_insn" "yes")))
+		     (eq_attr "hazard" "none")
+		     (match_test "get_attr_insn_count (insn) == 1"))
 		(const_string "yes")
 		(const_string "no")))
 
@@ -755,7 +743,9 @@
 ;; This mode iterator allows :MOVECC to be used anywhere that a
 ;; conditional-move-type condition is needed.
 (define_mode_iterator MOVECC [SI (DI "TARGET_64BIT")
-                              (CC "TARGET_HARD_FLOAT && !TARGET_LOONGSON_2EF")])
+                              (CC "TARGET_HARD_FLOAT
+				   && !TARGET_LOONGSON_2EF
+				   && !TARGET_MIPS5900")])
 
 ;; 32-bit integer moves for which we provide move patterns.
 (define_mode_iterator IMOVE32
@@ -1417,7 +1407,7 @@
   "mul.<fmt>\t%0,%1,%2\;nop"
   [(set_attr "type" "fmul")
    (set_attr "mode" "<MODE>")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_insn "mulv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=f")
@@ -1478,7 +1468,7 @@
   [(set (match_operand:GPR 0 "register_operand")
 	(mult:GPR (match_operand:GPR 1 "register_operand")
 		  (match_operand:GPR 2 "register_operand")))]
-  ""
+  "ISA_HAS_<D>MULT"
 {
   rtx lo;
 
@@ -1524,7 +1514,7 @@
 {
   if (which_alternative == 1)
     return "<d>mult\t%1,%2";
-  if (<MODE>mode == SImode && TARGET_MIPS3900)
+  if (<MODE>mode == SImode && (TARGET_MIPS3900 || TARGET_MIPS5900))
     return "mult\t%0,%1,%2";
   return "<d>mul\t%0,%1,%2";
 }
@@ -1558,7 +1548,7 @@
   [(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
 	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
 		  (match_operand:GPR 2 "register_operand" "d")))]
-  "!TARGET_FIX_R4000"
+  "ISA_HAS_<D>MULT && !TARGET_FIX_R4000"
   "<d>mult\t%1,%2"
   [(set_attr "type" "imul")
    (set_attr "mode" "<MODE>")])
@@ -1568,11 +1558,11 @@
 	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
 		  (match_operand:GPR 2 "register_operand" "d")))
    (clobber (match_scratch:GPR 3 "=l"))]
-  "TARGET_FIX_R4000"
+  "ISA_HAS_<D>MULT && TARGET_FIX_R4000"
   "<d>mult\t%1,%2\;mflo\t%0"
   [(set_attr "type" "imul")
    (set_attr "mode" "<MODE>")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 ;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead
 ;; of "mult; mflo".  They have the same latency, but the first form gives
@@ -1632,7 +1622,7 @@
   [(set_attr "type"	"imadd")
    (set_attr "accum_in"	"3")
    (set_attr "mode"	"SI")
-   (set_attr "length"	"4,8")])
+   (set_attr "insn_count" "1,2")])
 
 ;; The same idea applies here.  The middle alternative needs one less
 ;; clobber than the final alternative, so we add "*?" as a counterweight.
@@ -1651,7 +1641,7 @@
   [(set_attr "type"	"imadd")
    (set_attr "accum_in"	"3")
    (set_attr "mode"	"SI")
-   (set_attr "length"	"4,4,8")])
+   (set_attr "insn_count" "1,1,2")])
 
 ;; Split *mul_acc_si if both the source and destination accumulator
 ;; values are GPRs.
@@ -1732,7 +1722,7 @@
   ""
   [(set_attr "type"     "imadd")
    (set_attr "accum_in"	"1")
-   (set_attr "length"	"8")])
+   (set_attr "insn_count" "2")])
 
 ;; Patterns generated by the define_peephole2 below.
 
@@ -1868,7 +1858,7 @@
   [(set_attr "type"     "imadd")
    (set_attr "accum_in"	"1")
    (set_attr "mode"     "SI")
-   (set_attr "length"   "4,8")])
+   (set_attr "insn_count" "1,2")])
 
 ;; Split *mul_sub_si if both the source and destination accumulator
 ;; values are GPRs.
@@ -1949,7 +1939,7 @@
   "mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
   [(set_attr "type" "imul")
    (set_attr "mode" "SI")
-   (set_attr "length" "12")])
+   (set_attr "insn_count" "3")])
 
 (define_insn_and_split "<u>mulsidi3_64bit"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -1968,10 +1958,10 @@
 }
   [(set_attr "type" "imul")
    (set_attr "mode" "SI")
-   (set (attr "length")
+   (set (attr "insn_count")
 	(if_then_else (match_test "ISA_HAS_EXT_INS")
-		      (const_int 16)
-		      (const_int 28)))])
+		      (const_int 4)
+		      (const_int 7)))])
 
 (define_expand "<u>mulsidi3_64bit_mips16"
   [(set (match_operand:DI 0 "register_operand")
@@ -2035,7 +2025,7 @@
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
 		 (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))
    (clobber (match_scratch:DI 3 "=l"))]
-  "TARGET_64BIT && ISA_HAS_DMUL3"
+  "ISA_HAS_DMUL3"
   "dmul\t%0,%1,%2"
   [(set_attr "type" "imul3")
    (set_attr "mode" "DI")])
@@ -2122,7 +2112,7 @@
 }
   [(set_attr "type" "imul")
    (set_attr "mode" "SI")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_expand "<su>mulsi3_highpart_split"
   [(set (match_operand:SI 0 "register_operand")
@@ -2189,7 +2179,7 @@
 	  (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
 		   (any_extend:TI (match_operand:DI 2 "register_operand")))
 	  (const_int 64))))]
-  "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
 {
   if (TARGET_MIPS16)
     emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1],
@@ -2208,7 +2198,7 @@
 		   (any_extend:TI (match_operand:DI 2 "register_operand" "d")))
 	  (const_int 64))))
    (clobber (match_scratch:DI 3 "=l"))]
-  "TARGET_64BIT
+  "ISA_HAS_DMULT
    && !TARGET_MIPS16
    && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
   { return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
@@ -2221,7 +2211,7 @@
 }
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_expand "<su>muldi3_highpart_split"
   [(set (match_operand:DI 0 "register_operand")
@@ -2244,7 +2234,7 @@
   [(set (match_operand:TI 0 "register_operand")
 	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
 		 (any_extend:TI (match_operand:DI 2 "register_operand"))))]
-  "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
 {
   rtx hilo;
 
@@ -2266,7 +2256,7 @@
   [(set (match_operand:TI 0 "muldiv_target_operand" "=x")
 	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
 		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))]
-  "TARGET_64BIT
+  "ISA_HAS_DMULT
    && !TARGET_FIX_R4000
    && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
   "dmult<u>\t%1,%2"
@@ -2278,13 +2268,13 @@
 	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
 		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))
    (clobber (match_scratch:TI 3 "=x"))]
-  "TARGET_64BIT
+  "ISA_HAS_DMULT
    && TARGET_FIX_R4000
    && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
   "dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")
-   (set_attr "length" "12")])
+   (set_attr "insn_count" "3")])
 
 ;; The R4650 supports a 32-bit multiply/ 64-bit accumulate
 ;; instruction.  The HI/LO registers are used as a 64-bit accumulator.
@@ -2535,10 +2525,10 @@
 }
   [(set_attr "type" "fdiv")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 (define_insn "*recip<mode>3"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2553,10 +2543,10 @@
 }
   [(set_attr "type" "frdiv")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 ;; VR4120 errata MD(A1): signed division instructions do not work correctly
 ;; with negative operands.  We use special libgcc functions instead.
@@ -2574,7 +2564,7 @@
    (set (match_operand:GPR 3 "register_operand" "=d")
 	(mod:GPR (match_dup 1)
 		 (match_dup 2)))]
-  "!TARGET_FIX_VR4120"
+  "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120"
   "#"
   "&& ((TARGET_MIPS16 && cse_not_expected) || reload_completed)"
   [(const_int 0)]
@@ -2586,7 +2576,8 @@
 }
  [(set_attr "type" "idiv")
   (set_attr "mode" "<MODE>")
-  (set_attr "length" "8")])
+  ;; Worst case for MIPS16.
+  (set_attr "insn_count" "3")])
 
 ;; See the comment above "divmod<mode>4" for the MIPS16 handling.
 (define_insn_and_split "udivmod<mode>4"
@@ -2596,7 +2587,7 @@
    (set (match_operand:GPR 3 "register_operand" "=d")
 	(umod:GPR (match_dup 1)
 		  (match_dup 2)))]
-  ""
+  "ISA_HAS_<D>DIV"
   "#"
   "(TARGET_MIPS16 && cse_not_expected) || reload_completed"
   [(const_int 0)]
@@ -2606,9 +2597,10 @@
     emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
   DONE;
 }
- [(set_attr "type" "idiv")
-  (set_attr "mode" "<MODE>")
-  (set_attr "length" "8")])
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")
+   ;; Worst case for MIPS16.
+   (set_attr "insn_count" "3")])
 
 (define_expand "<u>divmod<mode>4_split"
   [(set (match_operand:GPR 0 "register_operand")
@@ -2641,7 +2633,7 @@
 	  [(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
 			(match_operand:GPR 2 "register_operand" "d"))]
 	  UNSPEC_SET_HILO))]
-  ""
+  "ISA_HAS_<GPR:D>DIV"
   { return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); }
   [(set_attr "type" "idiv")
    (set_attr "mode" "<GPR:MODE>")])
@@ -2668,10 +2660,10 @@
 }
   [(set_attr "type" "fsqrt")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 (define_insn "*rsqrt<mode>a"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2686,10 +2678,10 @@
 }
   [(set_attr "type" "frsqrt")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 (define_insn "*rsqrt<mode>b"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2704,10 +2696,10 @@
 }
   [(set_attr "type" "frsqrt")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 ;;
 ;;  ....................
@@ -3500,7 +3492,7 @@
   [(set_attr "type"	"fcvt")
    (set_attr "mode"	"DF")
    (set_attr "cnv_mode"	"D2I")
-   (set_attr "length"	"36")])
+   (set_attr "insn_count" "9")])
 
 (define_expand "fix_truncsfsi2"
   [(set (match_operand:SI 0 "register_operand")
@@ -3537,7 +3529,7 @@
   [(set_attr "type"	"fcvt")
    (set_attr "mode"	"SF")
    (set_attr "cnv_mode"	"S2I")
-   (set_attr "length"	"36")])
+   (set_attr "insn_count" "9")])
 
 
 (define_insn "fix_truncdfdi2"
@@ -4015,7 +4007,7 @@
   operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
   operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID);
 }
-  [(set_attr "length" "20")])
+  [(set_attr "insn_count" "5")])
 
 ;; Use a scratch register to reduce the latency of the above pattern
 ;; on superscalar machines.  The optimized sequence is:
@@ -4070,7 +4062,7 @@
   operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
   operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW);
 }
-  [(set_attr "length" "24")])
+  [(set_attr "insn_count" "6")])
 
 ;; Split HIGHs into:
 ;;
@@ -5080,7 +5072,7 @@
     return ".cprestore\t%1";
 }
   [(set_attr "type" "store")
-   (set_attr "length" "4,12")])
+   (set_attr "insn_count" "1,3")])
 
 (define_insn "use_cprestore_<mode>"
   [(set (reg:P CPRESTORE_SLOT_REGNUM)
@@ -5141,7 +5133,7 @@
          "\tjr.hb\t$31\n"
          "\tnop%>%)";
 }
-  [(set_attr "length" "20")])
+  [(set_attr "insn_count" "5")])
 
 ;; Cache operations for R4000-style caches.
 (define_insn "mips_cache"
@@ -5334,8 +5326,7 @@
 ;; not have and immediate).  We recognize a shift of a load in order
 ;; to make it simple enough for combine to understand.
 ;;
-;; The length here is the worst case: the length of the split version
-;; will be more accurate.
+;; The instruction count here is the worst case.
 (define_insn_and_split ""
   [(set (match_operand:SI 0 "register_operand" "=d")
 	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
@@ -5348,7 +5339,8 @@
   ""
   [(set_attr "type"	"load")
    (set_attr "mode"	"SI")
-   (set_attr "length"	"8")])
+   (set (attr "insn_count")
+	(symbol_ref "mips_load_store_insns (operands[1], insn) + 2"))])
 
 (define_insn "rotr<mode>3"
   [(set (match_operand:GPR 0 "register_operand" "=d")
@@ -5987,7 +5979,7 @@
   
   return "j\t%4";
 }
-  [(set_attr "length" "32")])
+  [(set_attr "insn_count" "16")])
 
 ;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well.
 ;; While it is possible to either pull it off the stack (in the
@@ -6878,11 +6870,8 @@
    (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
   ""
   [(set_attr "type" "unknown")
-   ; Since rdhwr always generates a trap for now, putting it in a delay
-   ; slot would make the kernel's emulation of it much slower.
-   (set_attr "can_delay" "no")
    (set_attr "mode" "<MODE>")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_insn "*tls_get_tp_<mode>_split"
   [(set (reg:P TLS_GET_TP_REGNUM)
@@ -6890,7 +6879,8 @@
   "HAVE_AS_TLS && !TARGET_MIPS16"
   ".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop"
   [(set_attr "type" "unknown")
-   ; See tls_get_tp_<mode>
+   ; Since rdhwr always generates a trap for now, putting it in a delay
+   ; slot would make the kernel's emulation of it much slower.
    (set_attr "can_delay" "no")
    (set_attr "mode" "<MODE>")])
 
@@ -6922,7 +6912,7 @@
    (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
   ""
   [(set_attr "type" "multi")
-   (set_attr "length" "8")
+   (set_attr "insn_count" "4")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*tls_get_tp_mips16_call_<mode>"
@@ -6934,7 +6924,7 @@
   "HAVE_AS_TLS && TARGET_MIPS16"
   { return MIPS_CALL ("jal", operands, 0, -1); }
   [(set_attr "type" "call")
-   (set_attr "length" "6")
+   (set_attr "insn_count" "3")
    (set_attr "mode" "<MODE>")])
 
 ;; Named pattern for expanding thread pointer reference.
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index e11710db3c0..08ab29b1810 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -141,6 +141,10 @@ membedded-data
 Target Report Var(TARGET_EMBEDDED_DATA)
 Use ROM instead of RAM
 
+meva
+Target Report Var(TARGET_EVA)
+Use Enhanced Virtual Addressing instructions
+
 mexplicit-relocs
 Target Report Mask(EXPLICIT_RELOCS)
 Use NewABI-style %reloc() assembly operators
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
index 1af09e559b0..bd37067dfc4 100644
--- a/gcc/config/mmix/mmix.c
+++ b/gcc/config/mmix/mmix.c
@@ -313,7 +313,7 @@ mmix_init_machine_status (void)
   return ggc_alloc_cleared_machine_function ();
 }
 
-/* DATA_ALIGNMENT.
+/* DATA_ABI_ALIGNMENT.
    We have trouble getting the address of stuff that is located at other
    than 32-bit alignments (GETA requirements), so try to give everything
    at least 32-bit alignment.  */
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
index 4ca1a2b8c86..c5edc5777a9 100644
--- a/gcc/config/mmix/mmix.h
+++ b/gcc/config/mmix/mmix.h
@@ -164,7 +164,7 @@ struct GTY(()) machine_function
 /* Copied from elfos.h.  */
 #define MAX_OFILE_ALIGNMENT (32768 * 8)
 
-#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \
+#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \
  mmix_data_alignment (TYPE, BASIC_ALIGN)
 
 #define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index 2e18bebf3d8..c2ed7389bc4 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -647,6 +647,15 @@ rl78_addr_space_pointer_mode (addr_space_t addrspace)
     }
 }
 
+/* Returns TRUE for valid addresses.  */
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE rl78_valid_pointer_mode
+static bool
+rl78_valid_pointer_mode (enum machine_mode m)
+{
+  return (m == HImode || m == SImode);
+}
+
 /* Return the appropriate mode for a named address address.  */
 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
 #define TARGET_ADDR_SPACE_ADDRESS_MODE rl78_addr_space_address_mode
@@ -2730,6 +2739,16 @@ rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 }
 
 
+
+#undef  TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode
+
+static enum machine_mode
+rl78_unwind_word_mode (void)
+{
+  return HImode;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rl78.h"
diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md
index b3cfe6d1bbc..efc26210498 100644
--- a/gcc/config/rl78/rl78.md
+++ b/gcc/config/rl78/rl78.md
@@ -235,6 +235,24 @@
   [(set_attr "valloc" "macax")]
 )
 
+(define_expand "mulqi3"
+  [(set (match_operand:QI          0 "register_operand" "")
+	(mult:QI  (match_operand:QI 1 "general_operand" "")
+		  (match_operand:QI 2 "nonmemory_operand" "")))
+   ]
+  "" ; mulu supported by all targets
+  ""
+)
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI          0 "register_operand" "")
+	(mult:HI (match_operand:HI 1 "general_operand" "")
+		 (match_operand:HI 2 "nonmemory_operand" "")))
+   ]
+  "! RL78_MUL_NONE"
+  ""
+)
+
 (define_expand "mulsi3"
   [(set (match_operand:SI          0 "register_operand" "=&v")
 	(mult:SI (match_operand:SI 1 "nonmemory_operand" "vi")
@@ -244,6 +262,58 @@
   ""
 )
 
+(define_insn "*mulqi3_rl78"
+  [(set (match_operand:QI          0 "register_operand" "=&v")
+	(mult:QI (match_operand:QI 1 "general_operand" "+viU")
+		 (match_operand:QI 2 "general_operand" "vi")))
+   ]
+  "" ; mulu supported by all targets
+  "; mulqi macro %0 = %1 * %2
+	mov    a, %h1
+	mov    x, a
+	mov    a, %h2
+	mulu   x ; ax = a * x
+	mov    a, x
+	mov    %h0, a
+	; end of mulqi macro"
+;;  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_rl78"
+  [(set (match_operand:HI          0 "register_operand" "=&v")
+	(mult:HI (match_operand:HI 1 "general_operand" "+viU")
+		 (match_operand:HI 2 "general_operand" "vi")))
+   ]
+  "RL78_MUL_RL78"
+  "; mulhi macro %0 = %1 * %2
+	movw    ax, %h1
+	movw    bc, %h2
+	mulhu   ; bcax = bc * ax
+	movw    %h0, ax
+	; end of mulhi macro"
+;;  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_g13"
+  [(set (match_operand:HI          0 "register_operand" "=&v")
+	(mult:HI (match_operand:HI 1 "general_operand" "+viU")
+		 (match_operand:HI 2 "general_operand" "vi")))
+   ]
+  "RL78_MUL_G13"
+  "; mulhi macro %0 = %1 * %2
+	mov     a, #0x00
+	mov     !0xf00e8, a     ; MDUC
+	movw    ax, %h1
+	movw    0xffff0, ax     ; MDAL
+	movw    ax, %h2
+	movw    0xffff2, ax     ; MDAH
+	nop     ; mdb = mdal * mdah
+	movw    ax, 0xffff6     ; MDBL
+	movw    %h0, ax
+        ; end of mulhi macro"
+;;  [(set_attr "valloc" "umul")]
+)
+
 ;; 0xFFFF0 is MACR(L).  0xFFFF2 is MACR(H) but we don't care about it
 ;; because we're only using the lower 16 bits (which is the upper 16
 ;; bits of the result).
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 19a4ae9297d..4b91c5c5e24 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -323,15 +323,31 @@
 
 #ifdef _ARCH_PWR8
 /* Vector additions added in ISA 2.07.  */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
 #define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vgbbd __builtin_vec_vgbbd
 #define vec_vmaxsd __builtin_vec_vmaxsd
 #define vec_vmaxud __builtin_vec_vmaxud
 #define vec_vminsd __builtin_vec_vminsd
 #define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
 #define vec_vpksdss __builtin_vec_vpksdss
 #define vec_vpksdus __builtin_vec_vpksdus
 #define vec_vpkudum __builtin_vec_vpkudum
 #define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
 #define vec_vrld __builtin_vec_vrld
 #define vec_vsld __builtin_vec_vsld
 #define vec_vsrad __builtin_vec_vsrad
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 78d29001440..6607e450be3 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -128,6 +128,7 @@
    UNSPEC_VUPKLS_V4SF
    UNSPEC_VUPKHU_V4SF
    UNSPEC_VUPKLU_V4SF
+   UNSPEC_VGBBD
 ])
 
 (define_c_enum "unspecv"
@@ -941,6 +942,31 @@
   "vmrglw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_P8_VECTOR"
+  "vmrgew %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_vmrgow"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_P8_VECTOR"
+  "vmrgow %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
 (define_insn "vec_widen_umult_even_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1017,10 +1043,13 @@
 ;; logical ops.  Have the logical ops follow the memory ops in
 ;; terms of whether to prefer VSX or Altivec
 
+;; AND has a clobber to be consistant with VSX, which adds splitters for using
+;; the GPR registers.
 (define_insn "*altivec_and<mode>3"
   [(set (match_operand:VM 0 "register_operand" "=v")
         (and:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
+		(match_operand:VM 2 "register_operand" "v")))
+   (clobber (match_scratch:CC 3 "=X"))]
   "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
   "vand %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -1050,8 +1079,8 @@
   
 (define_insn "*altivec_nor<mode>3"
   [(set (match_operand:VM 0 "register_operand" "=v")
-        (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
-			(match_operand:VM 2 "register_operand" "v"))))]
+	(and:VM (not:VM (match_operand:VM 1 "register_operand" "v"))
+		(not:VM (match_operand:VM 2 "register_operand" "v"))))]
   "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
   "vnor %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -2370,3 +2399,34 @@
   emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
   DONE;
 }")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+	(clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vclz<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vpopcnt<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+		      UNSPEC_VGBBD))]
+  "TARGET_P8_VECTOR"
+  "vgbbd %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c
index e608dce184c..1a173d0b1cc 100644
--- a/gcc/config/rs6000/driver-rs6000.c
+++ b/gcc/config/rs6000/driver-rs6000.c
@@ -167,7 +167,7 @@ elf_platform (void)
 
   if (fd != -1)
     {
-      char buf[1024];
+      static char buf[1024];
       ElfW(auxv_t) *av;
       ssize_t n;
 
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 3f280581feb..79f0f0b5f00 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -136,8 +136,11 @@ extern int dot_symbols;
 		SET_CMODEL (CMODEL_MEDIUM);			\
 	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
 		{						\
-		  TARGET_NO_FP_IN_TOC = 0;			\
-		  TARGET_NO_SUM_IN_TOC = 0;			\
+		  if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
+		    TARGET_NO_FP_IN_TOC				\
+		      = rs6000_current_cmodel == CMODEL_MEDIUM;	\
+		  if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
+		    TARGET_NO_SUM_IN_TOC = 0;			\
 		}						\
 	    }							\
 	}							\
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 078c9387350..12a602b78c4 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -207,7 +207,7 @@
   if (!REG_P (op))
     return 0;
 
-  if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
+  if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
     return 1;
 
   return INT_REGNO_P (REGNO (op));
@@ -1121,9 +1121,16 @@
 						   GET_MODE (XEXP (op, 0))),
 			  1"))))
 
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; If we're assuming that FP operations cannot generate user-visible traps,
+;; then on e500 we can use the ordered-signaling instructions to implement
+;; the unordered-quiet FP comparison predicates modulo a reversal.
 (define_predicate "rs6000_cbranch_operator"
   (if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS")
-		(match_operand 0 "ordered_comparison_operator")
+		(if_then_else (match_test "flag_trapping_math")
+			      (match_operand 0 "ordered_comparison_operator")
+			      (ior (match_operand 0 "ordered_comparison_operator")
+				   (match_code ("unlt,unle,ungt,unge"))))
 		(match_operand 0 "comparison_operator")))
 
 ;; Return 1 if OP is a comparison operation that is valid for an SCC insn --
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 7a80eff8924..1a5a709751d 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1234,10 +1234,23 @@ BU_VSX_OVERLOAD_2 (XXSPLTW,  "xxspltw")
 BU_VSX_OVERLOAD_X (LD,	     "ld")
 BU_VSX_OVERLOAD_X (ST,	     "st")
 
+/* 1 argument VSX instructions added in ISA 2.07.  */
+BU_P8V_VSX_1 (XSCVSPDPN,      "xscvspdpn",	CONST,	vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN,      "xscvdpspn",	CONST,	vsx_xscvdpspn)
+
 /* 1 argument altivec instructions added in ISA 2.07.  */
 BU_P8V_AV_1 (ABS_V2DI,	      "abs_v2di",	CONST,	absv2di2)
 BU_P8V_AV_1 (VUPKHSW,	      "vupkhsw",	CONST,	altivec_vupkhsw)
 BU_P8V_AV_1 (VUPKLSW,	      "vupklsw",	CONST,	altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB,	      "vclzb",		CONST,  clzv16qi2)
+BU_P8V_AV_1 (VCLZH,	      "vclzh",		CONST,  clzv8hi2)
+BU_P8V_AV_1 (VCLZW,	      "vclzw",		CONST,  clzv4si2)
+BU_P8V_AV_1 (VCLZD,	      "vclzd",		CONST,  clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB,	      "vpopcntb",	CONST,  popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH,	      "vpopcnth",	CONST,  popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW,	      "vpopcntw",	CONST,  popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD,	      "vpopcntd",	CONST,  popcountv2di2)
+BU_P8V_AV_1 (VGBBD,	      "vgbbd",		CONST,  p8v_vgbbd)
 
 /* 2 argument altivec instructions added in ISA 2.07.  */
 BU_P8V_AV_2 (VADDUDM,		"vaddudm",	CONST,	addv2di3)
@@ -1245,6 +1258,8 @@ BU_P8V_AV_2 (VMINSD,		"vminsd",	CONST,	sminv2di3)
 BU_P8V_AV_2 (VMAXSD,		"vmaxsd",	CONST,	smaxv2di3)
 BU_P8V_AV_2 (VMINUD,		"vminud",	CONST,	uminv2di3)
 BU_P8V_AV_2 (VMAXUD,		"vmaxud",	CONST,	umaxv2di3)
+BU_P8V_AV_2 (VMRGEW,		"vmrgew",	CONST,	p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW,		"vmrgow",	CONST,	p8_vmrgow)
 BU_P8V_AV_2 (VPKUDUM,		"vpkudum",	CONST,	altivec_vpkudum)
 BU_P8V_AV_2 (VPKSDSS,		"vpksdss",	CONST,	altivec_vpksdss)
 BU_P8V_AV_2 (VPKUDUS,		"vpkudus",	CONST,	altivec_vpkudus)
@@ -1255,6 +1270,27 @@ BU_P8V_AV_2 (VSRD,		"vsrd",		CONST,	vlshrv2di3)
 BU_P8V_AV_2 (VSRAD,		"vsrad",	CONST,	vashrv2di3)
 BU_P8V_AV_2 (VSUBUDM,		"vsubudm",	CONST,	subv2di3)
 
+BU_P8V_AV_2 (EQV_V16QI,		"eqv_v16qi",	CONST,	eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI,		"eqv_v8hi",	CONST,	eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI,		"eqv_v4si",	CONST,	eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI,		"eqv_v2di",	CONST,	eqvv2di3)
+BU_P8V_AV_2 (EQV_V4SF,		"eqv_v4sf",	CONST,	eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF,		"eqv_v2df",	CONST,	eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI,	"nand_v16qi",	CONST,	nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI,		"nand_v8hi",	CONST,	nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI,		"nand_v4si",	CONST,	nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI,		"nand_v2di",	CONST,	nandv2di3)
+BU_P8V_AV_2 (NAND_V4SF,		"nand_v4sf",	CONST,	nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF,		"nand_v2df",	CONST,	nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI,		"orc_v16qi",	CONST,	orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI,		"orc_v8hi",	CONST,	orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI,		"orc_v4si",	CONST,	orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI,		"orc_v2di",	CONST,	orcv2di3)
+BU_P8V_AV_2 (ORC_V4SF,		"orc_v4sf",	CONST,	orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF,		"orc_v2df",	CONST,	orcv2df3)
+
 /* Vector comparison instructions added in ISA 2.07.  */
 BU_P8V_AV_2 (VCMPEQUD,		"vcmpequd",	CONST,	vector_eqv2di)
 BU_P8V_AV_2 (VCMPGTSD,		"vcmpgtsd",	CONST,	vector_gtv2di)
@@ -1268,13 +1304,29 @@ BU_P8V_AV_P (VCMPGTUD_P,	"vcmpgtud_p",	CONST,	vector_gtu_v2di_p)
 /* ISA 2.07 vector overloaded 1 argument functions.  */
 BU_P8V_OVERLOAD_1 (VUPKHSW,	"vupkhsw")
 BU_P8V_OVERLOAD_1 (VUPKLSW,	"vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ,	"vclz")
+BU_P8V_OVERLOAD_1 (VCLZB,	"vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH,	"vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW,	"vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD,	"vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT,	"vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB,	"vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH,	"vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW,	"vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD,	"vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD,	"vgbbd")
 
 /* ISA 2.07 vector overloaded 2 argument functions.  */
+BU_P8V_OVERLOAD_2 (EQV,		"eqv")
+BU_P8V_OVERLOAD_2 (NAND,	"nand")
+BU_P8V_OVERLOAD_2 (ORC,		"orc")
 BU_P8V_OVERLOAD_2 (VADDUDM,	"vaddudm")
 BU_P8V_OVERLOAD_2 (VMAXSD,	"vmaxsd")
 BU_P8V_OVERLOAD_2 (VMAXUD,	"vmaxud")
 BU_P8V_OVERLOAD_2 (VMINSD,	"vminsd")
 BU_P8V_OVERLOAD_2 (VMINUD,	"vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW,	"vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW,	"vmrgow")
 BU_P8V_OVERLOAD_2 (VPKSDSS,	"vpksdss")
 BU_P8V_OVERLOAD_2 (VPKSDUS,	"vpksdus")
 BU_P8V_OVERLOAD_2 (VPKUDUM,	"vpkudum")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 801b5bb225d..593b772ebd1 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -3515,6 +3515,404 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
 
+  /* Power8 vector overloaded functions.  */
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, 0, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
   /* Crypto builtins.  */
   { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
@@ -3822,11 +4220,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       TREE_USED (decl) = 1;
       TREE_TYPE (decl) = arg1_type;
       TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
-      DECL_INITIAL (decl) = arg1;
-      stmt = build1 (DECL_EXPR, arg1_type, decl);
-      TREE_ADDRESSABLE (decl) = 1;
-      SET_EXPR_LOCATION (stmt, loc);
-      stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
 
       innerptrtype = build_pointer_type (arg1_inner_type);
 
@@ -3901,11 +4308,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       TREE_USED (decl) = 1;
       TREE_TYPE (decl) = arg1_type;
       TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
-      DECL_INITIAL (decl) = arg1;
-      stmt = build1 (DECL_EXPR, arg1_type, decl);
-      TREE_ADDRESSABLE (decl) = 1;
-      SET_EXPR_LOCATION (stmt, loc);
-      stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
 
       innerptrtype = build_pointer_type (arg1_inner_type);
 
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index e143a4ca203..d528a4fd87a 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -30,21 +30,22 @@
 /* Processor type.  Order must match cpu attribute in MD file.  */
 enum processor_type
  {
-   PROCESSOR_RS64A,
-   PROCESSOR_MPCCORE,
-   PROCESSOR_PPC403,
-   PROCESSOR_PPC405,
-   PROCESSOR_PPC440,
-   PROCESSOR_PPC476,
    PROCESSOR_PPC601,
    PROCESSOR_PPC603,
    PROCESSOR_PPC604,
    PROCESSOR_PPC604e,
    PROCESSOR_PPC620,
    PROCESSOR_PPC630,
+
    PROCESSOR_PPC750,
    PROCESSOR_PPC7400,
    PROCESSOR_PPC7450,
+
+   PROCESSOR_PPC403,
+   PROCESSOR_PPC405,
+   PROCESSOR_PPC440,
+   PROCESSOR_PPC476,
+
    PROCESSOR_PPC8540,
    PROCESSOR_PPC8548,
    PROCESSOR_PPCE300C2,
@@ -53,16 +54,21 @@ enum processor_type
    PROCESSOR_PPCE500MC64,
    PROCESSOR_PPCE5500,
    PROCESSOR_PPCE6500,
+
    PROCESSOR_POWER4,
    PROCESSOR_POWER5,
    PROCESSOR_POWER6,
    PROCESSOR_POWER7,
+   PROCESSOR_POWER8,
+
+   PROCESSOR_RS64A,
+   PROCESSOR_MPCCORE,
    PROCESSOR_CELL,
    PROCESSOR_PPCA2,
-   PROCESSOR_TITAN,
-   PROCESSOR_POWER8
+   PROCESSOR_TITAN
 };
 
+
 /* FP processor type.  */
 enum fpu_type_t
 {
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index d9bcf1a41ed..02836ecea6d 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx);
 extern rtx find_addr_reg (rtx);
 extern rtx gen_easy_altivec_constant (rtx);
 extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
@@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
 extern int registers_ok_for_quad_peep (rtx, rtx);
 extern int mems_ok_for_quad_peep (rtx, rtx);
 extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
 extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
 							    enum reg_class);
 extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index cb6876051d7..55273ab81bd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx);
    don't link in rs6000-c.c, so we can't call it directly.  */
 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
 
+/* Simplfy register classes into simpler classifications.  We assume
+   GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+   check for standard register classes (gpr/floating/altivec/vsx) and
+   floating/vector classes (float/altivec/vsx).  */
+
+enum rs6000_reg_type {
+  NO_REG_TYPE,
+  PSEUDO_REG_TYPE,
+  GPR_REG_TYPE,
+  VSX_REG_TYPE,
+  ALTIVEC_REG_TYPE,
+  FPR_REG_TYPE,
+  SPR_REG_TYPE,
+  CR_REG_TYPE,
+  SPE_ACC_TYPE,
+  SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type.  */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+   purpose, floating point, altivec, and VSX registers).  */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+/* Direct moves to/from vsx/gpr registers that need an additional register to
+   do the move.  */
+static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES];
+static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES];
+static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES];
+
 
 /* Target cpu costs.  */
 
@@ -1042,6 +1075,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *,
 static void rs6000_print_builtin_options (FILE *, int, const char *,
 					  HOST_WIDE_INT);
 
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+					  enum rs6000_reg_type,
+					  enum machine_mode,
+					  secondary_reload_info *,
+					  bool);
+
 /* Hash table stuff for keeping track of TOC entries.  */
 
 struct GTY(()) toc_hash_struct
@@ -1587,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
 	return ALTIVEC_REGNO_P (last_regno);
     }
 
-  /* Allow TImode in all VSX registers if the user asked for it.  Note, PTImode
-     can only go in GPRs.  */
+  /* Allow TImode in all VSX registers if the user asked for it.  */
   if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno))
     return 1;
 
@@ -2154,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
   rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
 
+  /* Precalculate register class to simpler reload register class.  We don't
+     need all of the register classes that are combinations of different
+     classes, just the simple ones that have constraint letters.  */
+  for (c = 0; c < N_REG_CLASSES; c++)
+    reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+  reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+  reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+  reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+  if (TARGET_VSX)
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
+    }
+  else
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+    }
+
   /* Precalculate vector information, this must be set up before the
      rs6000_hard_regno_nregs_internal below.  */
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2305,7 +2374,15 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_LFIWZX)
     rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
 
-  /* Set up the reload helper functions.  */
+  /* Setup the direct move combinations.  */
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    {
+      reload_fpr_gpr[m] = CODE_FOR_nothing;
+      reload_gpr_vsx[m] = CODE_FOR_nothing;
+      reload_vsx_gpr[m] = CODE_FOR_nothing;
+    }
+
+  /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
       if (TARGET_64BIT)
@@ -2329,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	      rs6000_vector_reload[DDmode][0]  = CODE_FOR_reload_dd_di_store;
 	      rs6000_vector_reload[DDmode][1]  = CODE_FOR_reload_dd_di_load;
 	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      rs6000_vector_reload[SFmode][0]  = CODE_FOR_reload_sf_di_store;
+	      rs6000_vector_reload[SFmode][1]  = CODE_FOR_reload_sf_di_load;
+	      rs6000_vector_reload[SDmode][0]  = CODE_FOR_reload_sd_di_store;
+	      rs6000_vector_reload[SDmode][1]  = CODE_FOR_reload_sd_di_load;
+	    }
 	  if (TARGET_VSX_TIMODE)
 	    {
 	      rs6000_vector_reload[TImode][0]  = CODE_FOR_reload_ti_di_store;
 	      rs6000_vector_reload[TImode][1]  = CODE_FOR_reload_ti_di_load;
 	    }
+	  if (TARGET_DIRECT_MOVE)
+	    {
+	      if (TARGET_POWERPC64)
+		{
+		  reload_gpr_vsx[TImode]    = CODE_FOR_reload_gpr_from_vsxti;
+		  reload_gpr_vsx[V2DFmode]  = CODE_FOR_reload_gpr_from_vsxv2df;
+		  reload_gpr_vsx[V2DImode]  = CODE_FOR_reload_gpr_from_vsxv2di;
+		  reload_gpr_vsx[V4SFmode]  = CODE_FOR_reload_gpr_from_vsxv4sf;
+		  reload_gpr_vsx[V4SImode]  = CODE_FOR_reload_gpr_from_vsxv4si;
+		  reload_gpr_vsx[V8HImode]  = CODE_FOR_reload_gpr_from_vsxv8hi;
+		  reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi;
+		  reload_gpr_vsx[SFmode]    = CODE_FOR_reload_gpr_from_vsxsf;
+
+		  reload_vsx_gpr[TImode]    = CODE_FOR_reload_vsx_from_gprti;
+		  reload_vsx_gpr[V2DFmode]  = CODE_FOR_reload_vsx_from_gprv2df;
+		  reload_vsx_gpr[V2DImode]  = CODE_FOR_reload_vsx_from_gprv2di;
+		  reload_vsx_gpr[V4SFmode]  = CODE_FOR_reload_vsx_from_gprv4sf;
+		  reload_vsx_gpr[V4SImode]  = CODE_FOR_reload_vsx_from_gprv4si;
+		  reload_vsx_gpr[V8HImode]  = CODE_FOR_reload_vsx_from_gprv8hi;
+		  reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi;
+		  reload_vsx_gpr[SFmode]    = CODE_FOR_reload_vsx_from_gprsf;
+		}
+	      else
+		{
+		  reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi;
+		  reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd;
+		  reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf;
+		}
+	    }
 	}
       else
 	{
@@ -2356,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	      rs6000_vector_reload[DDmode][0]  = CODE_FOR_reload_dd_si_store;
 	      rs6000_vector_reload[DDmode][1]  = CODE_FOR_reload_dd_si_load;
 	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      rs6000_vector_reload[SFmode][0]  = CODE_FOR_reload_sf_si_store;
+	      rs6000_vector_reload[SFmode][1]  = CODE_FOR_reload_sf_si_load;
+	      rs6000_vector_reload[SDmode][0]  = CODE_FOR_reload_sd_si_store;
+	      rs6000_vector_reload[SDmode][1]  = CODE_FOR_reload_sd_si_load;
+	    }
 	  if (TARGET_VSX_TIMODE)
 	    {
 	      rs6000_vector_reload[TImode][0]  = CODE_FOR_reload_ti_si_store;
@@ -2916,6 +3036,16 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
     }
 
+  /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+     silently turn off quad memory mode.  */
+  if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+      rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+    }
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
 
@@ -3042,7 +3172,8 @@ rs6000_option_override_internal (bool global_init_p)
 
   /* Place FP constants in the constant pool instead of TOC
      if section anchors enabled.  */
-  if (flag_section_anchors)
+  if (flag_section_anchors
+      && !global_options_set.x_TARGET_NO_FP_IN_TOC)
     TARGET_NO_FP_IN_TOC = 1;
 
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
@@ -4082,6 +4213,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
       switch (fn)
 	{
+	case BUILT_IN_CLZIMAX:
+	case BUILT_IN_CLZLL:
+	case BUILT_IN_CLZL:
+	case BUILT_IN_CLZ:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+	    }
+	  break;
 	case BUILT_IN_COPYSIGN:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -4097,6 +4244,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
 	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
 	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
 	  break;
+	case BUILT_IN_POPCOUNTIMAX:
+	case BUILT_IN_POPCOUNTLL:
+	case BUILT_IN_POPCOUNTL:
+	case BUILT_IN_POPCOUNT:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+	    }
+	  break;
 	case BUILT_IN_SQRT:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -4955,8 +5118,11 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 	{
 	  rtx freg = gen_reg_rtx (V4SFmode);
 	  rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+	  rtx cvt  = ((TARGET_XSCVDPSPN)
+		      ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+		      : gen_vsx_xscvdpsp_scalar (freg, sreg));
 
-	  emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+	  emit_insn (cvt);
 	  emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
 	}
       else
@@ -5339,6 +5505,72 @@ gpr_or_gpr_p (rtx op0, rtx op1)
 	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
 }
 
+/* Return true if this is a move direct operation between GPR registers and
+   floating point/VSX registers.  */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+  int regno0, regno1;
+
+  if (!REG_P (op0) || !REG_P (op1))
+    return false;
+
+  if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+    return false;
+
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+  if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+    return false;
+
+  if (INT_REGNO_P (regno0))
+    return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+  else if (INT_REGNO_P (regno1))
+    {
+      if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+	return true;
+
+      else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if this is a load or store quad operation.  */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+  bool ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = false;
+
+  else if (REG_P (op0) && MEM_P (op1))
+    ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+	   && quad_memory_operand (op1, GET_MODE (op1))
+	   && !reg_overlap_mentioned_p (op0, op1));
+
+  else if (MEM_P (op0) && REG_P (op1))
+    ret = (quad_memory_operand (op0, GET_MODE (op0))
+	   && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+  else
+    ret = false;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n========== quad_load_store, return %s\n",
+	       ret ? "true" : "false");
+      debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+    }
+
+  return ret;
+}
+
 /* Given an address, return a constant offset term if one exists.  */
 
 static rtx
@@ -5474,91 +5706,102 @@ virtual_stack_registers_memory_p (rtx op)
 	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
 }
 
-/* Return true if memory accesses to OP are known to never straddle
-   a 32k boundary.  */
+/* Return true if a MODE sized memory accesses to OP plus OFFSET
+   is known to not straddle a 32k boundary.  */
 
 static bool
 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
 			     enum machine_mode mode)
 {
   tree decl, type;
-  unsigned HOST_WIDE_INT dsize, dalign;
+  unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
 
   if (GET_CODE (op) != SYMBOL_REF)
     return false;
 
+  dsize = GET_MODE_SIZE (mode);
   decl = SYMBOL_REF_DECL (op);
   if (!decl)
     {
-      if (GET_MODE_SIZE (mode) == 0)
+      if (dsize == 0)
 	return false;
 
       /* -fsection-anchors loses the original SYMBOL_REF_DECL when
 	 replacing memory addresses with an anchor plus offset.  We
 	 could find the decl by rummaging around in the block->objects
 	 VEC for the given offset but that seems like too much work.  */
-      dalign = 1;
+      dalign = BITS_PER_UNIT;
       if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
 	  && SYMBOL_REF_ANCHOR_P (op)
 	  && SYMBOL_REF_BLOCK (op) != NULL)
 	{
 	  struct object_block *block = SYMBOL_REF_BLOCK (op);
-	  HOST_WIDE_INT lsb, mask;
 
-	  /* Given the alignment of the block..  */
 	  dalign = block->alignment;
-	  mask = dalign / BITS_PER_UNIT - 1;
-
-	  /* ..and the combined offset of the anchor and any offset
-	     to this block object..  */
 	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
-	  lsb = offset & -offset;
+	}
+      else if (CONSTANT_POOL_ADDRESS_P (op))
+	{
+	  /* It would be nice to have get_pool_align()..  */
+	  enum machine_mode cmode = get_pool_mode (op);
 
-	  /* ..find how many bits of the alignment we know for the
-	     object.  */
-	  mask &= lsb - 1;
-	  dalign = mask + 1;
+	  dalign = GET_MODE_ALIGNMENT (cmode);
 	}
-      return dalign >= GET_MODE_SIZE (mode);
     }
-
-  if (DECL_P (decl))
+  else if (DECL_P (decl))
     {
-      if (TREE_CODE (decl) == FUNCTION_DECL)
-	return true;
+      dalign = DECL_ALIGN (decl);
 
-      if (!DECL_SIZE_UNIT (decl))
-	return false;
+      if (dsize == 0)
+	{
+	  /* Allow BLKmode when the entire object is known to not
+	     cross a 32k boundary.  */
+	  if (!DECL_SIZE_UNIT (decl))
+	    return false;
 
-      if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
-	return false;
+	  if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
+	    return false;
 
-      dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
-      if (dsize > 32768)
-	return false;
+	  dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+	  if (dsize > 32768)
+	    return false;
 
-      dalign = DECL_ALIGN_UNIT (decl);
-      return dalign >= dsize;
+	  return dalign / BITS_PER_UNIT >= dsize;
+	}
     }
+  else
+    {
+      type = TREE_TYPE (decl);
 
-  type = TREE_TYPE (decl);
+      dalign = TYPE_ALIGN (type);
+      if (CONSTANT_CLASS_P (decl))
+	dalign = CONSTANT_ALIGNMENT (decl, dalign);
+      else
+	dalign = DATA_ALIGNMENT (decl, dalign);
 
-  if (TREE_CODE (decl) == STRING_CST)
-    dsize = TREE_STRING_LENGTH (decl);
-  else if (TYPE_SIZE_UNIT (type)
-	   && host_integerp (TYPE_SIZE_UNIT (type), 1))
-    dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
-  else
-    return false;
-  if (dsize > 32768)
-    return false;
+      if (dsize == 0)
+	{
+	  /* BLKmode, check the entire object.  */
+	  if (TREE_CODE (decl) == STRING_CST)
+	    dsize = TREE_STRING_LENGTH (decl);
+	  else if (TYPE_SIZE_UNIT (type)
+		   && host_integerp (TYPE_SIZE_UNIT (type), 1))
+	    dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+	  else
+	    return false;
+	  if (dsize > 32768)
+	    return false;
+
+	  return dalign / BITS_PER_UNIT >= dsize;
+	}
+    }
+
+  /* Find how many bits of the alignment we know for this access.  */
+  mask = dalign / BITS_PER_UNIT - 1;
+  lsb = offset & -offset;
+  mask &= lsb - 1;
+  dalign = mask + 1;
 
-  dalign = TYPE_ALIGN (type);
-  if (CONSTANT_CLASS_P (decl))
-    dalign = CONSTANT_ALIGNMENT (decl, dalign);
-  else
-    dalign = DATA_ALIGNMENT (decl, dalign);
-  dalign /= BITS_PER_UNIT;
   return dalign >= dsize;
 }
 
@@ -5846,8 +6089,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
 	return force_reg (Pmode, XEXP (x, 0));
 
+      /* For TImode with load/store quad, restrict addresses to just a single
+	 pointer, so it works with both GPRs and VSX registers.  */
       /* Make sure both operands are registers.  */
-      else if (GET_CODE (x) == PLUS)
+      else if (GET_CODE (x) == PLUS
+	       && (mode != TImode || !TARGET_QUAD_MEMORY))
 	return gen_rtx_PLUS (Pmode,
 			     force_reg (Pmode, XEXP (x, 0)),
 			     force_reg (Pmode, XEXP (x, 1)));
@@ -6504,7 +6750,6 @@ use_toc_relative_ref (rtx sym)
 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
 					       get_pool_mode (sym)))
 	  || (TARGET_CMODEL == CMODEL_MEDIUM
-	      && !CONSTANT_POOL_ADDRESS_P (sym)
 	      && SYMBOL_REF_LOCAL_P (sym)));
 }
 
@@ -6802,6 +7047,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
   if (reg_offset_p
       && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
     return 1;
+  /* For TImode, if we have load/store quad, only allow register indirect
+     addresses.  This will allow the values to go in either GPRs or VSX
+     registers without reloading.  The vector types would tend to go into VSX
+     registers, so we allow REG+REG, while TImode seems somewhat split, in that
+     some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY)
+    return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
       && reg_offset_p
@@ -9314,20 +9566,17 @@ setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
   if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
       && cfun->va_list_gpr_size)
     {
-      int nregs = GP_ARG_NUM_REG - first_reg_offset;
+      int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
 
       if (va_list_gpr_counter_field)
-	{
-	  /* V4 va_list_gpr_size counts number of registers needed.  */
-	  if (nregs > cfun->va_list_gpr_size)
-	    nregs = cfun->va_list_gpr_size;
-	}
+	/* V4 va_list_gpr_size counts number of registers needed.  */
+	n_gpr = cfun->va_list_gpr_size;
       else
-	{
-	  /* char * va_list instead counts number of bytes needed.  */
-	  if (nregs > cfun->va_list_gpr_size / reg_size)
-	    nregs = cfun->va_list_gpr_size / reg_size;
-	}
+	/* char * va_list instead counts number of bytes needed.  */
+	n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
+
+      if (nregs > n_gpr)
+	nregs = n_gpr;
 
       mem = gen_rtx_MEM (BLKmode,
 			 plus_constant (Pmode, save_area,
@@ -12857,6 +13106,7 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
     {
       /* unsigned 1 argument functions.  */
     case CRYPTO_BUILTIN_VSBOX:
+    case P8V_BUILTIN_VGBBD:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       break;
@@ -13947,29 +14197,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
   return NULL_TREE;
 }
 
-enum reload_reg_type {
-  GPR_REGISTER_TYPE,
-  VECTOR_REGISTER_TYPE,
-  OTHER_REGISTER_TYPE
-};
+/* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
+   on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+   only work on the traditional altivec registers, note if an altivec register
+   was choosen.  */
 
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
 {
-  switch (rclass)
+  HOST_WIDE_INT regno;
+  enum reg_class rclass;
+
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  if (!REG_P (reg))
+    return NO_REG_TYPE;
+
+  regno = REGNO (reg);
+  if (regno >= FIRST_PSEUDO_REGISTER)
     {
-    case GENERAL_REGS:
-    case BASE_REGS:
-      return GPR_REGISTER_TYPE;
+      if (!lra_in_progress && !reload_in_progress && !reload_completed)
+	return PSEUDO_REG_TYPE;
 
-    case FLOAT_REGS:
-    case ALTIVEC_REGS:
-    case VSX_REGS:
-      return VECTOR_REGISTER_TYPE;
+      regno = true_regnum (reg);
+      if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+	return PSEUDO_REG_TYPE;
+    }	
 
-    default:
-      return OTHER_REGISTER_TYPE;
+  gcc_assert (regno >= 0);
+
+  if (is_altivec && ALTIVEC_REGNO_P (regno))
+    *is_altivec = true;
+
+  rclass = rs6000_regno_regclass[regno];
+  return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+   different register classe is really a simple move.  */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode)
+{
+  int size;
+
+  /* Add support for various direct moves available.  In this function, we only
+     look at cases where we don't need any extra registers, and one or more
+     simple move insns are issued.  At present, 32-bit integers are not allowed
+     in FPR/VSX registers.  Single precision binary floating is not a simple
+     move because we need to convert to the single precision memory layout.
+     The 4-byte SDmode can be moved.  */
+  size = GET_MODE_SIZE (mode);
+  if (TARGET_DIRECT_MOVE
+      && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+      && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+	   && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+	       || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+   special direct moves that involve allocating an extra register, return the
+   insn code of the helper function if there is such a function or
+   CODE_FOR_nothing if not.  */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode,
+				     secondary_reload_info *sri,
+				     bool altivec_p)
+{
+  bool ret = false;
+  enum insn_code icode = CODE_FOR_nothing;
+  int cost = 0;
+  int size = GET_MODE_SIZE (mode);
+
+  if (TARGET_POWERPC64)
+    {
+      if (size == 16)
+	{
+	  /* Handle moving 128-bit values from GPRs to VSX point registers on
+	     power8 when running in 64-bit mode using XXPERMDI to glue the two
+	     64-bit values back together.  */
+	  if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	      icode = reload_vsx_gpr[(int)mode];
+	    }
+
+	  /* Handle moving 128-bit values from VSX point registers to GPRs on
+	     power8 when running in 64-bit mode using XXPERMDI to get access to the
+	     bottom 64-bit value.  */
+	  else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	      icode = reload_gpr_vsx[(int)mode];
+	    }
+	}
+
+      else if (mode == SFmode)
+	{
+	  if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* xscvdpspn, mfvsrd, and.  */
+	      icode = reload_gpr_vsx[(int)mode];
+	    }
+
+	  else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 2;			/* mtvsrz, xscvspdpn.  */
+	      icode = reload_vsx_gpr[(int)mode];
+	    }
+	}
     }
+
+  if (TARGET_POWERPC64 && size == 16)
+    {
+      /* Handle moving 128-bit values from GPRs to VSX point registers on
+	 power8 when running in 64-bit mode using XXPERMDI to glue the two
+	 64-bit values back together.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	  icode = reload_vsx_gpr[(int)mode];
+	}
+
+      /* Handle moving 128-bit values from VSX point registers to GPRs on
+	 power8 when running in 64-bit mode using XXPERMDI to get access to the
+	 bottom 64-bit value.  */
+      else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	  icode = reload_gpr_vsx[(int)mode];
+	}
+    }
+
+  else if (!TARGET_POWERPC64 && size == 8)
+    {
+      /* Handle moving 64-bit values from GPRs to floating point registers on
+	 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+	 values back together.  Altivec register classes must be handled
+	 specially since a different instruction is used, and the secondary
+	 reload support requires a single instruction class in the scratch
+	 register constraint.  However, right now TFmode is not allowed in
+	 Altivec registers, so the pattern will never match.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+	{
+	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
+	  icode = reload_fpr_gpr[(int)mode];
+	}
+    }
+
+  if (icode != CODE_FOR_nothing)
+    {
+      ret = true;
+      if (sri)
+	{
+	  sri->icode = icode;
+	  sri->extra_cost = cost;
+	}
+    }
+
+  return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+   directly (simple move) or via a pattern that uses a single extra temporary
+   (using power8's direct move in this case.  */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+			      enum rs6000_reg_type from_type,
+			      enum machine_mode mode,
+			      secondary_reload_info *sri,
+			      bool altivec_p)
+{
+  /* Fall back to load/store reloads if either type is not a register.  */
+  if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+    return false;
+
+  /* If we haven't allocated registers yet, assume the move can be done for the
+     standard register types.  */
+  if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+      || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+      || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+    return true;
+
+  /* Moves to the same set of registers is a simple move for non-specialized
+     registers.  */
+  if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+    return true;
+
+  /* Check whether a simple move can be done directly.  */
+  if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+    {
+      if (sri)
+	{
+	  sri->icode = CODE_FOR_nothing;
+	  sri->extra_cost = 0;
+	}
+      return true;
+    }
+
+  /* Now check if we can do it in a few steps.  */
+  return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+					      altivec_p);
 }
 
 /* Inform reload about cases where moving X with a mode MODE to a register in
@@ -13995,11 +14442,32 @@ rs6000_secondary_reload (bool in_p,
   bool default_p = false;
 
   sri->icode = CODE_FOR_nothing;
-
-  /* Convert vector loads and stores into gprs to use an additional base
-     register.  */
   icode = rs6000_vector_reload[mode][in_p != false];
-  if (icode != CODE_FOR_nothing)
+
+  if (REG_P (x) || register_operand (x, mode))
+    {
+      enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+      bool altivec_p = (rclass == ALTIVEC_REGS);
+      enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+      if (!in_p)
+	{
+	  enum rs6000_reg_type exchange = to_type;
+	  to_type = from_type;
+	  from_type = exchange;
+	}
+
+      if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+					altivec_p))
+	{
+	  icode = (enum insn_code)sri->icode;
+	  default_p = false;
+	  ret = NO_REGS;
+	}
+    }
+
+  /* Handle vector moves with reload helper functions.  */
+  if (ret == ALL_REGS && icode != CODE_FOR_nothing)
     {
       ret = NO_REGS;
       sri->icode = CODE_FOR_nothing;
@@ -14011,12 +14479,21 @@ rs6000_secondary_reload (bool in_p,
 
 	  /* Loads to and stores from gprs can do reg+offset, and wouldn't need
 	     an extra register in that case, but it would need an extra
-	     register if the addressing is reg+reg or (reg+reg)&(-16).  */
+	     register if the addressing is reg+reg or (reg+reg)&(-16).  Special
+	     case load/store quad.  */
 	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
 	    {
-	      if (!legitimate_indirect_address_p (addr, false)
-		  && !rs6000_legitimate_offset_address_p (PTImode, addr,
-							  false, true))
+	      if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+		  && GET_MODE_SIZE (mode) == 16
+		  && quad_memory_operand (x, mode))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = 2;
+		}
+
+	      else if (!legitimate_indirect_address_p (addr, false)
+		       && !rs6000_legitimate_offset_address_p (PTImode, addr,
+							       false, true))
 		{
 		  sri->icode = icode;
 		  /* account for splitting the loads, and converting the
@@ -14030,7 +14507,7 @@ rs6000_secondary_reload (bool in_p,
          else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
                   && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
                   && (legitimate_indirect_address_p (addr, false)
-                      || legitimate_indirect_address_p (XEXP (addr, 0), false)
+                      || legitimate_indirect_address_p (addr, false)
                       || rs6000_legitimate_offset_address_p (mode, addr,
                                                              false, true)))
 
@@ -14082,12 +14559,12 @@ rs6000_secondary_reload (bool in_p,
 	  else
 	    {
 	      enum reg_class xclass = REGNO_REG_CLASS (regno);
-	      enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
-	      enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+	      enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+	      enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
 
 	      /* If memory is needed, use default_secondary_reload to create the
 		 stack slot.  */
-	      if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+	      if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
 		default_p = true;
 	      else
 		ret = NO_REGS;
@@ -14097,7 +14574,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
     {
@@ -14136,7 +14613,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (!TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
     {
@@ -14699,42 +15176,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
    set and vice versa.  */
 
 static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
-				enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+				enum reg_class to_class,
 				enum machine_mode mode)
 {
-  if (class1 == class2)
-    return false;
-
-  /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
-     ALTIVEC_REGS, and FLOAT_REGS).  We don't need to use memory to copy
-     between these classes.  But we need memory for other things that can go in
-     FLOAT_REGS like SFmode.  */
-  if (TARGET_VSX
-      && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
-      && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
-	  || class1 == FLOAT_REGS))
-    return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
-	    && class2 != FLOAT_REGS);
+  enum rs6000_reg_type from_type, to_type;
+  bool altivec_p = ((from_class == ALTIVEC_REGS)
+		    || (to_class == ALTIVEC_REGS));
 
-  if (class1 == VSX_REGS || class2 == VSX_REGS)
-    return true;
-
-  if (class1 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
+  /* If a simple/direct move is available, we don't need secondary memory  */
+  from_type = reg_class_to_reg_type[(int)from_class];
+  to_type = reg_class_to_reg_type[(int)to_class];
 
-  if (class2 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
+  if (rs6000_secondary_reload_move (to_type, from_type, mode,
+				    (secondary_reload_info *)0, altivec_p))
+    return false;
 
-  if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+  /* If we have a floating point or vector register class, we need to use
+     memory to transfer the data.  */
+  if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
     return true;
 
   return false;
@@ -14742,17 +15202,19 @@ rs6000_secondary_memory_needed (enum reg_class class1,
 
 /* Debug version of rs6000_secondary_memory_needed.  */
 static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
-				      enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+				      enum reg_class to_class,
 				      enum machine_mode mode)
 {
-  bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+  bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
 
   fprintf (stderr,
-	   "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
-	   "class2 = %s, mode = %s\n",
-	   ret ? "true" : "false", reg_class_names[class1],
-	   reg_class_names[class2], GET_MODE_NAME (mode));
+	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+	   "to_class = %s, mode = %s\n",
+	   ret ? "true" : "false",
+	   reg_class_names[from_class],
+	   reg_class_names[to_class],
+	   GET_MODE_NAME (mode));
 
   return ret;
 }
@@ -14958,6 +15420,170 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from,
   return ret;
 }
 
+/* Return a string to do a move operation of 128 bits of data.  */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  enum machine_mode mode = GET_MODE (dest);
+  int dest_regno;
+  int src_regno;
+  bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p;
+  bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p;
+
+  if (REG_P (dest))
+    {
+      dest_regno = REGNO (dest);
+      dest_gpr_p = INT_REGNO_P (dest_regno);
+      dest_fp_p = FP_REGNO_P (dest_regno);
+      dest_av_p = ALTIVEC_REGNO_P (dest_regno);
+      dest_vsx_p = dest_fp_p | dest_av_p;
+    }
+  else
+    {
+      dest_regno = -1;
+      dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false;
+    }
+
+  if (REG_P (src))
+    {
+      src_regno = REGNO (src);
+      src_gpr_p = INT_REGNO_P (src_regno);
+      src_fp_p = FP_REGNO_P (src_regno);
+      src_av_p = ALTIVEC_REGNO_P (src_regno);
+      src_vsx_p = src_fp_p | src_av_p;
+    }
+  else
+    {
+      src_regno = -1;
+      src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false;
+    }
+
+  /* Register moves.  */
+  if (dest_regno >= 0 && src_regno >= 0)
+    {
+      if (dest_gpr_p)
+	{
+	  if (src_gpr_p)
+	    return "#";
+
+	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+	    return "#";
+	}
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (src_vsx_p)
+	    return "xxlor %x0,%x1,%x1";
+
+	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_av_p && src_av_p)
+	return "vor %0,%1,%1";
+
+      else if (dest_fp_p && src_fp_p)
+	return "#";
+    }
+
+  /* Loads.  */
+  else if (dest_regno >= 0 && MEM_P (src))
+    {
+      if (dest_gpr_p)
+	{
+	  if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
+	      && quad_memory_operand (src, mode)
+	      && !reg_overlap_mentioned_p (dest, src))
+	    {
+	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
+	      return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
+	    }
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_av_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "lvx %0,%y1";
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "lxvw4x %x0,%y1";
+	  else
+	    return "lxvd2x %x0,%y1";
+	}
+
+      else if (TARGET_ALTIVEC && dest_av_p)
+	return "lvx %0,%y1";
+
+      else if (dest_fp_p)
+	return "#";
+    }
+
+  /* Stores.  */
+  else if (src_regno >= 0 && MEM_P (dest))
+    {
+      if (src_gpr_p)
+	{
+	  if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
+	      && quad_memory_operand (dest, mode))
+	    {
+	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
+	      return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
+	    }
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && src_av_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "stvx %1,%y0";
+
+      else if (TARGET_VSX && src_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "stxvw4x %x1,%y0";
+	  else
+	    return "stxvd2x %x1,%y0";
+	}
+
+      else if (TARGET_ALTIVEC && src_av_p)
+	return "stvx %1,%y0";
+
+      else if (src_fp_p)
+	return "#";
+    }
+
+  /* Constants.  */
+  else if (dest_regno >= 0
+	   && (GET_CODE (src) == CONST_INT
+	       || GET_CODE (src) == CONST_DOUBLE
+	       || GET_CODE (src) == CONST_VECTOR))
+    {
+      if (dest_gpr_p)
+	return "#";
+
+      else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+	return "xxlxor %x0,%x0,%x0";
+
+      else if (TARGET_ALTIVEC && dest_av_p)
+	return output_vec_const_move (operands);
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n===== Bad 128 bit move:\n");
+      debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+    }
+
+  gcc_unreachable ();
+}
+
+
 /* Given a comparison operation, return the bit number in CCR to test.  We
    know this is a valid comparison.
 
@@ -15674,11 +16300,6 @@ print_operand (FILE *file, rtx x, int code)
 	 TOCs and the like.  */
       gcc_assert (GET_CODE (x) == SYMBOL_REF);
 
-      /* Mark the decl as referenced so that cgraph will output the
-	 function.  */
-      if (SYMBOL_REF_DECL (x))
-	mark_decl_referenced (SYMBOL_REF_DECL (x));
-
       /* For macho, check to see if we need a stub.  */
       if (TARGET_MACHO)
 	{
@@ -16087,16 +16708,41 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
     {
       rtx cmp, or_result, compare_result2;
       enum machine_mode op_mode = GET_MODE (op0);
+      bool reverse_p;
 
       if (op_mode == VOIDmode)
 	op_mode = GET_MODE (op1);
 
+      /* First reverse the condition codes that aren't directly supported.  */
+      switch (code)
+	{
+	  case NE:
+	  case UNLT:
+	  case UNLE:
+	  case UNGT:
+	  case UNGE:
+	    code = reverse_condition_maybe_unordered (code);
+	    reverse_p = true;
+	    break;
+
+	  case EQ:
+	  case LT:
+	  case LE:
+	  case GT:
+	  case GE:
+	    reverse_p = false;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	}
+
       /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
 	 This explains the following mess.  */
 
       switch (code)
 	{
-	case EQ: case UNEQ: case NE: case LTGT:
+	case EQ:
 	  switch (op_mode)
 	    {
 	    case SFmode:
@@ -16122,7 +16768,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	    }
 	  break;
 
-	case GT: case GTU: case UNGT: case UNGE: case GE: case GEU:
+	case GT:
+	case GE:
 	  switch (op_mode)
 	    {
 	    case SFmode:
@@ -16148,7 +16795,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	    }
 	  break;
 
-	case LT: case LTU: case UNLT: case UNLE: case LE: case LEU:
+	case LT: 
+	case LE:
 	  switch (op_mode)
 	    {
 	    case SFmode:
@@ -16173,24 +16821,16 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	      gcc_unreachable ();
 	    }
 	  break;
+
         default:
           gcc_unreachable ();
 	}
 
       /* Synthesize LE and GE from LT/GT || EQ.  */
-      if (code == LE || code == GE || code == LEU || code == GEU)
+      if (code == LE || code == GE)
 	{
 	  emit_insn (cmp);
 
-	  switch (code)
-	    {
-	    case LE: code = LT; break;
-	    case GE: code = GT; break;
-	    case LEU: code = LT; break;
-	    case GEU: code = GT; break;
-	    default: gcc_unreachable ();
-	    }
-
 	  compare_result2 = gen_reg_rtx (CCFPmode);
 
 	  /* Do the EQ.  */
@@ -16217,23 +16857,18 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	    default:
 	      gcc_unreachable ();
 	    }
+
 	  emit_insn (cmp);
 
 	  /* OR them together.  */
 	  or_result = gen_reg_rtx (CCFPmode);
 	  cmp = gen_e500_cr_ior_compare (or_result, compare_result,
-					   compare_result2);
+					 compare_result2);
 	  compare_result = or_result;
-	  code = EQ;
-	}
-      else
-	{
-	  if (code == NE || code == LTGT)
-	    code = NE;
-	  else
-	    code = EQ;
 	}
 
+      code = reverse_p ? NE : EQ;
+
       emit_insn (cmp);
     }
   else
@@ -17113,7 +17748,8 @@ emit_unlikely_jump (rtx cond, rtx label)
 }
 
 /* A subroutine of the atomic operation splitters.  Emit a load-locked
-   instruction in MODE.  */
+   instruction in MODE.  For QI/HImode, possibly use a pattern than includes
+   the zero_extend operation.  */
 
 static void
 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -17122,12 +17758,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_load_lockedqi;
+      break;
+    case HImode:
+      fn = gen_load_lockedhi;
+      break;
     case SImode:
-      fn = gen_load_lockedsi;
+      if (GET_MODE (mem) == QImode)
+	fn = gen_load_lockedqi_si;
+      else if (GET_MODE (mem) == HImode)
+	fn = gen_load_lockedhi_si;
+      else
+	fn = gen_load_lockedsi;
       break;
     case DImode:
       fn = gen_load_lockeddi;
       break;
+    case TImode:
+      fn = gen_load_lockedti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -17144,12 +17794,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_store_conditionalqi;
+      break;
+    case HImode:
+      fn = gen_store_conditionalhi;
+      break;
     case SImode:
       fn = gen_store_conditionalsi;
       break;
     case DImode:
       fn = gen_store_conditionaldi;
       break;
+    case TImode:
+      fn = gen_store_conditionalti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -17246,8 +17905,9 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
   shift = gen_reg_rtx (SImode);
   addr = gen_lowpart (SImode, addr);
   emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
-  shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
-			       shift, 1, OPTAB_LIB_WIDEN);
+  if (WORDS_BIG_ENDIAN)
+    shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
+			         shift, 1, OPTAB_LIB_WIDEN);
   *pshift = shift;
 
   /* Mask for insertion.  */
@@ -17295,7 +17955,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 {
   rtx boolval, retval, mem, oldval, newval, cond;
   rtx label1, label2, x, mask, shift;
-  enum machine_mode mode;
+  enum machine_mode mode, orig_mode;
   enum memmodel mod_s, mod_f;
   bool is_weak;
 
@@ -17307,22 +17967,29 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   is_weak = (INTVAL (operands[5]) != 0);
   mod_s = (enum memmodel) INTVAL (operands[6]);
   mod_f = (enum memmodel) INTVAL (operands[7]);
-  mode = GET_MODE (mem);
+  orig_mode = mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
   if (mode == QImode || mode == HImode)
     {
-      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
-      /* Shift and mask OLDVAL into position with the word.  */
+      /* Before power8, we didn't have access to lbarx/lharx, so generate a
+	 lwarx and shift/mask operations.  With power8, we need to do the
+	 comparison in SImode, but the store is still done in QI/HImode.  */
       oldval = convert_modes (SImode, mode, oldval, 1);
-      oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
-				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
-      /* Shift and mask NEWVAL into position within the word.  */
-      newval = convert_modes (SImode, mode, newval, 1);
-      newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
-				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      if (!TARGET_SYNC_HI_QI)
+	{
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+	  /* Shift and mask OLDVAL into position with the word.  */
+	  oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+	  /* Shift and mask NEWVAL into position within the word.  */
+	  newval = convert_modes (SImode, mode, newval, 1);
+	  newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	}
 
       /* Prepare to adjust the return value.  */
       retval = gen_reg_rtx (SImode);
@@ -17351,7 +18018,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
     }
 
   cond = gen_reg_rtx (CCmode);
-  x = gen_rtx_COMPARE (CCmode, x, oldval);
+  /* If we have TImode, synthesize a comparison.  */
+  if (mode != TImode)
+    x = gen_rtx_COMPARE (CCmode, x, oldval);
+  else
+    {
+      rtx xor1_result = gen_reg_rtx (DImode);
+      rtx xor2_result = gen_reg_rtx (DImode);
+      rtx or_result = gen_reg_rtx (DImode);
+      rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+      rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+      rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+      rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+      emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+      emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+      emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+      x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+    }
+
   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17361,7 +18046,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   if (mask)
     x = rs6000_mask_atomic_subword (retval, newval, mask);
 
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (orig_mode, cond, mem, x);
 
   if (!is_weak)
     {
@@ -17379,6 +18064,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 
   if (shift)
     rs6000_finish_atomic_subword (operands[1], retval, shift);
+  else if (mode != GET_MODE (operands[1]))
+    convert_move (operands[1], retval, 1);
 
   /* In all cases, CR0 contains EQ on success, and NE on failure.  */
   x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -17402,7 +18089,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
   mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
-  if (mode == QImode || mode == HImode)
+  if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
     {
       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
@@ -17451,53 +18138,70 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 {
   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
   enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode store_mode = mode;
   rtx label, x, cond, mask, shift;
   rtx before = orig_before, after = orig_after;
 
   mask = shift = NULL_RTX;
+  /* On power8, we want to use SImode for the operation.  On previous systems,
+     use the operation in a subword and shift/mask to get the proper byte or
+     halfword.  */
   if (mode == QImode || mode == HImode)
     {
-      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
-      /* Shift and mask VAL into position with the word.  */
-      val = convert_modes (SImode, mode, val, 1);
-      val = expand_simple_binop (SImode, ASHIFT, val, shift,
-				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      if (TARGET_SYNC_HI_QI)
+	{
+	  val = convert_modes (SImode, mode, val, 1);
 
-      switch (code)
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  mode = SImode;
+	}
+      else
 	{
-	case IOR:
-	case XOR:
-	  /* We've already zero-extended VAL.  That is sufficient to
-	     make certain that it does not affect other bits.  */
-	  mask = NULL;
-	  break;
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
-	case AND:
-	  /* If we make certain that all of the other bits in VAL are
-	     set, that will be sufficient to not affect other bits.  */
-	  x = gen_rtx_NOT (SImode, mask);
-	  x = gen_rtx_IOR (SImode, x, val);
-	  emit_insn (gen_rtx_SET (VOIDmode, val, x));
-	  mask = NULL;
-	  break;
+	  /* Shift and mask VAL into position with the word.  */
+	  val = convert_modes (SImode, mode, val, 1);
+	  val = expand_simple_binop (SImode, ASHIFT, val, shift,
+				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
-	case NOT:
-	case PLUS:
-	case MINUS:
-	  /* These will all affect bits outside the field and need
-	     adjustment via MASK within the loop.  */
-	  break;
+	  switch (code)
+	    {
+	    case IOR:
+	    case XOR:
+	      /* We've already zero-extended VAL.  That is sufficient to
+		 make certain that it does not affect other bits.  */
+	      mask = NULL;
+	      break;
 
-	default:
-	  gcc_unreachable ();
-	}
+	    case AND:
+	      /* If we make certain that all of the other bits in VAL are
+		 set, that will be sufficient to not affect other bits.  */
+	      x = gen_rtx_NOT (SImode, mask);
+	      x = gen_rtx_IOR (SImode, x, val);
+	      emit_insn (gen_rtx_SET (VOIDmode, val, x));
+	      mask = NULL;
+	      break;
 
-      /* Prepare to adjust the return value.  */
-      before = gen_reg_rtx (SImode);
-      if (after)
-	after = gen_reg_rtx (SImode);
-      mode = SImode;
+	    case NOT:
+	    case PLUS:
+	    case MINUS:
+	      /* These will all affect bits outside the field and need
+		 adjustment via MASK within the loop.  */
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  store_mode = mode = SImode;
+	}
     }
 
   mem = rs6000_pre_atomic_barrier (mem, model);
@@ -17530,9 +18234,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
       x = rs6000_mask_atomic_subword (before, x, mask);
     }
+  else if (store_mode != mode)
+    x = convert_modes (store_mode, mode, x, 1);
 
   cond = gen_reg_rtx (CCmode);
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (store_mode, cond, mem, x);
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
   emit_unlikely_jump (x, label);
@@ -17541,11 +18247,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 
   if (shift)
     {
+      /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+	 then do the calcuations in a SImode register.  */
       if (orig_before)
 	rs6000_finish_atomic_subword (orig_before, before, shift);
       if (orig_after)
 	rs6000_finish_atomic_subword (orig_after, after, shift);
     }
+  else if (store_mode != mode)
+    {
+      /* QImode/HImode on machines with lbarx/lharx where we do the native
+	 operation and then do the calcuations in a SImode register.  */
+      if (orig_before)
+	convert_move (orig_before, before, 1);
+      if (orig_after)
+	convert_move (orig_after, after, 1);
+    }
   else if (orig_after && after != orig_after)
     emit_move_insn (orig_after, after);
 }
@@ -22560,7 +23277,10 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
 	    fputs (DOUBLE_INT_ASM_OP, file);
 	  else
 	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
-	  fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+	  if (WORDS_BIG_ENDIAN)
+	    fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+	  else
+	    fprintf (file, "0x%lx\n", l & 0xffffffff);
 	  return;
 	}
       else
@@ -27197,26 +27917,31 @@ bool
 altivec_expand_vec_perm_const (rtx operands[4])
 {
   struct altivec_perm_insn {
+    HOST_WIDE_INT mask;
     enum insn_code impl;
     unsigned char perm[16];
   };
   static const struct altivec_perm_insn patterns[] = {
-    { CODE_FOR_altivec_vpkuhum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
       {  1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
-    { CODE_FOR_altivec_vpkuwum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
       {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
-    { CODE_FOR_altivec_vmrghb,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb,
       {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
-    { CODE_FOR_altivec_vmrghh,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh,
       {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
-    { CODE_FOR_altivec_vmrghw,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw,
       {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
-    { CODE_FOR_altivec_vmrglb,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb,
       {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
-    { CODE_FOR_altivec_vmrglh,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh,
       {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
-    { CODE_FOR_altivec_vmrglw,
-      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw,
+      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+      {  0,  1,  2,  3, 16, 17, 18, 19,  8,  9, 10, 11, 24, 25, 26, 27 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+      {  4,  5,  6,  7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
   };
 
   unsigned int i, j, elt, which;
@@ -27316,6 +28041,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
     {
       bool swapped;
 
+      if ((patterns[j].mask & rs6000_isa_flags) == 0)
+	continue;
+
       elt = patterns[j].perm[0];
       if (perm[0] == elt)
 	swapped = false;
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 76f3bf99250..021e72a80e3 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -715,6 +715,11 @@ extern unsigned char rs6000_recip_bits[];
    instructions for them.  Might as well be consistent with bits and bytes.  */
 #define WORDS_BIG_ENDIAN 1
 
+/* This says that for the IBM long double the larger magnitude double
+   comes first.  It's really a two element double array, and arrays
+   don't index differently between little- and big-endian.  */
+#define LONG_DOUBLE_LARGE_FIRST 1
+
 #define MAX_BITS_PER_WORD 64
 
 /* Width of a word, in units (bytes).  */
@@ -1114,10 +1119,10 @@ extern unsigned rs6000_pointer_size;
 #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
 
 /* Alternate name for any vector register supporting logical operations, no
-   matter which instruction set(s) are available.  Under VSX, we allow GPRs as
-   well as vector registers on 64-bit systems.  We don't allow 32-bit systems,
-   due to the number of registers involved, and the number of instructions to
-   load/store the values..  */
+   matter which instruction set(s) are available.  For 64-bit mode, we also
+   allow logical operations in the GPRS.  This is to allow atomic quad word
+   builtins not to need the VSX registers for lqarx/stqcx.  It also helps with
+   __int128_t arguments that are passed in GPRs.  */
 #define VLOGICAL_REGNO_P(N)						\
   (ALTIVEC_REGNO_P (N)							\
    || (TARGET_VSX && FP_REGNO_P (N))					\
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 013a0e38551..7fe23bff351 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -127,6 +127,13 @@
    UNSPEC_LFIWZX
    UNSPEC_FCTIWUZ
    UNSPEC_GRP_END_NOP
+   UNSPEC_P8V_FMRGOW
+   UNSPEC_P8V_MTVSRWZ
+   UNSPEC_P8V_RELOAD_FROM_GPR
+   UNSPEC_P8V_MTVSRD
+   UNSPEC_P8V_XXPERMDI
+   UNSPEC_P8V_RELOAD_FROM_VSX
+   UNSPEC_FUSION_GPR
   ])
 
 ;;
@@ -166,9 +173,14 @@
 		(const_int 4)))
 
 ;; Processor type -- this attribute must exactly match the processor_type
-;; enumeration in rs6000.h.
-
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8"
+;; enumeration in rs6000-opts.h.
+(define_attr "cpu"
+  "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,
+   ppc750,ppc7400,ppc7450,
+   ppc403,ppc405,ppc440,ppc476,
+   ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,
+   power4,power5,power6,power7,power8,
+   rs64a,mpccore,cell,ppca2,titan"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
@@ -227,6 +239,12 @@
 ; extend modes for DImode
 (define_mode_iterator QHSI [QI HI SI])
 
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
 ; SImode or DImode, even if DImode doesn't fit in GPRs.
 (define_mode_iterator SDI [SI DI])
 
@@ -268,6 +286,15 @@
 (define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
 				(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
 
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI    "TARGET_VSX_TIMODE")
+				    (V16QI "")
+				    (V8HI  "")
+				    (V4SI  "")
+				    (V4SF  "")
+				    (V2DI  "")
+				    (V2DF  "")])
+
 ; Whether a floating point move is ok, don't allow SD without hardware FP
 (define_mode_attr fmove_ok [(SF "")
 			    (DF "")
@@ -284,11 +311,16 @@
 (define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
 (define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
 (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %0,%y1")	 (SD "lxsiwzx %0,%y1")])
 
 ; Definitions for store from 32-bit fpr register
 (define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
 (define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
 (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %1,%y0")	  (SD "stxsiwzx %1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
 
 ; These modes do not fit in integer registers in 32-bit mode.
 ; but on e500v2, the gpr are 64 bit registers
@@ -368,7 +400,7 @@
 (define_insn "*zero_extend<mode>di2_internal1"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
   "@
    l<wd>z%U1%X1 %0,%1
    rldicl %0,%1,0,<dbits>"
@@ -434,6 +466,29 @@
 		    (const_int 0)))]
   "")
 
+(define_insn "*zero_extendsidi2_lfiwzx"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm")
+	(zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWZX"
+  "@
+   lwz%U1%X1 %0,%1
+   rldicl %0,%1,0,32
+   mtvsrwz %x0,%1
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
 (define_insn "extendqidi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -581,10 +636,33 @@
   "TARGET_POWERPC64"
   "")
 
-(define_insn ""
+(define_insn "*extendsidi2_lfiwax"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm")
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWAX"
+  "@
+   lwa%U1%X1 %0,%1
+   extsw %0,%1
+   mtvsrwa %x0,%1
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
-  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
   "@
    lwa%U1%X1 %0,%1
    extsw %0,%1"
@@ -598,7 +676,7 @@
 	   (const_string "load_ext")))
        (const_string "exts")])])
 
-(define_insn ""
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
   "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
@@ -2035,7 +2113,9 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
   "TARGET_CMPB && TARGET_POPCNTB"
-  "prty<wd> %0,%1")
+  "prty<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "popcnt")])
 
 (define_expand "parity<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "")
@@ -4316,7 +4396,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -4348,7 +4428,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -5104,6 +5184,41 @@
   "frsqrtes %0,%1"
   [(set_attr "type" "fp")])
 
+;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
+;; builtins.c and optabs.c that are not correct for IBM long double
+;; when little-endian.
+(define_expand "signbittf2"
+  [(set (match_dup 2)
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+   (set (match_dup 3)
+   	(subreg:DI (match_dup 2) 0))
+   (set (match_dup 4)
+   	(match_dup 5))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+  	(match_dup 6))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+  operands[3] = gen_reg_rtx (DImode);
+  if (TARGET_POWERPC64)
+    {
+      operands[4] = gen_reg_rtx (DImode);
+      operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63));
+      operands[6] = gen_rtx_SUBREG (SImode, operands[4],
+				    WORDS_BIG_ENDIAN ? 4 : 0);
+    }
+  else
+    {
+      operands[4] = gen_reg_rtx (SImode);
+      operands[5] = gen_rtx_SUBREG (SImode, operands[3],
+				    WORDS_BIG_ENDIAN ? 0 : 4);
+      operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31));
+    }
+})
+
 (define_expand "copysign<mode>3"
   [(set (match_dup 3)
         (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
@@ -5553,12 +5668,15 @@
 ; We don't define lfiwax/lfiwzx with the normal definition, because we
 ; don't want to support putting SImode in FPR registers.
 (define_insn "lfiwax"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWAX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
-  "lfiwax %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1
+   mtvsrwa %x0,%1"
+  [(set_attr "type" "fpload,fpload,mffgpr")])
 
 ; This split must be run before register allocation because it allocates the
 ; memory slot that is needed to move values to/from the FPR.  We don't allocate
@@ -5580,7 +5698,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, false);
   else
     {
@@ -5629,12 +5748,15 @@
    (set_attr "type" "fpload")])
 
 (define_insn "lfiwzx"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWZX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
-  "lfiwzx %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1
+   mtvsrwz %x0,%1"
+  [(set_attr "type" "fpload,fpload,mftgpr")])
 
 (define_insn_and_split "floatunssi<mode>2_lfiwzx"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
@@ -5651,7 +5773,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, true);
   else
     {
@@ -5942,7 +6065,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -6036,7 +6159,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -8285,6 +8408,18 @@
 	(compare:CC (match_dup 0)
 		    (const_int 0)))]
   "")
+
+;; Eqv operation.
+(define_insn "*eqv<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(not:GPR
+	 (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		  (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+  ""
+  "eqv %0,%1,%2"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
 
 ;; Now define ways of moving data around.
 
@@ -8490,7 +8625,7 @@
    cmp<wd>i %2,%0,0
    mr. %0,%1
    #"
-  [(set_attr "type" "cmp,compare,cmp")
+  [(set_attr "type" "cmp,fast_compare,cmp")
    (set_attr "length" "4,4,8")])
 
 (define_split
@@ -8680,8 +8815,8 @@
 }")
 
 (define_insn "mov<mode>_hardfloat"
-  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r")
-	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))]
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -8694,6 +8829,10 @@
    xxlxor %x0,%x0,%x0
    <f32_li>
    <f32_si>
+   <f32_lv>
+   <f32_sv>
+   mtvsrwz %x0,%1
+   mfvsrwz %0,%x1
    mt%0 %1
    mf%1 %0
    nop
@@ -8732,16 +8871,20 @@
 	   (match_test "update_address_mem (operands[0], VOIDmode)")
 	   (const_string "fpstore_u")
 	   (const_string "fpstore")))
+       (const_string "fpload")
+       (const_string "fpstore")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
        (const_string "mtjmpr")
        (const_string "mfjmpr")
        (const_string "*")
        (const_string "*")
        (const_string "*")])
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
 
 (define_insn "*mov<mode>_softfloat"
   [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
-	(match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))]
+	(match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
@@ -8954,8 +9097,8 @@
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*mov<mode>_hardfloat64"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8980,7 +9123,9 @@
    #
    #
    mftgpr %0,%1
-   mffgpr %0,%1"
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
   [(set_attr_alternative "type"
       [(if_then_else
 	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9038,8 +9183,10 @@
        (const_string "*")
        (const_string "*")
        (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
        (const_string "mffgpr")])
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
 
 (define_insn "*mov<mode>_softfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
@@ -9154,8 +9301,8 @@
   "&& reload_completed"
   [(pc)]
 {
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
   emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word),
 		  operands[1]);
   emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word),
@@ -9384,8 +9531,8 @@
    && TARGET_LONG_DOUBLE_128"
   "
 {
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
   operands[3] = gen_reg_rtx (DFmode);
   operands[4] = gen_reg_rtx (CCFPmode);
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -9419,6 +9566,216 @@
 })
 
 
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode.  We use TF mode to get two registers to move the
+;; individual 32-bit parts across.  Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it.  We have two patterns to do the two moves between gprs and
+;; fprs.  There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions.  TFmode is
+;; currently limited to traditional FPR registers.  If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+	(unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+			 UNSPEC_P8V_FMRGOW))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "fmrgow %0,%1,%L1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+  [(set (match_operand:TF 0 "register_operand" "=d")
+	(unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %x0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+  [(set (match_operand:TF 0 "register_operand" "+d")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+	(unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+			 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=d"))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (SImode, src);
+  rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+  emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+  [(set (match_operand:TF 0 "register_operand" "=ws")
+	(unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+  [(set (match_operand:TF 0 "register_operand" "+ws")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+			     UNSPEC_P8V_XXPERMDI))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "xxpermdi %x0,%1,%L1,0"
+  [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+	 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DImode, src);
+  rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+  emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a VSX from a GPR register.  Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+  [(set (match_operand:SF 0 "register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+		   UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:DI 2 "register_operand" "=r"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+  rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_move_insn (op0_di, op2);
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+	 UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+  rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+  emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register.  Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+  emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+  emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+  emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+
 ;; Next come the multi-word integer load and store and the load and store
 ;; multiple insns.
 
@@ -9467,7 +9824,8 @@
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(match_operand:DI 1 "const_int_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 1))]
   "
@@ -9485,13 +9843,14 @@
   [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
         (match_operand:DIFD 1 "input_operand" ""))]
   "reload_completed && !TARGET_POWERPC64
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
 (define_insn "*movdi_internal64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
   "TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
@@ -9513,7 +9872,9 @@
    nop
    xxlxor %x0,%x0,%x0
    mftgpr %0,%1
-   mffgpr %0,%1"
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
   [(set_attr_alternative "type"
       [(if_then_else
 	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9562,8 +9923,10 @@
        (const_string "*")
        (const_string "vecsimple")
        (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
        (const_string "mffgpr")])
-   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")])
 
 ;; Generate all one-bits and clear left or right.
 ;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
@@ -9652,19 +10015,23 @@
 					  (const_string "conditional")))])
 
 (define_insn "*mov<mode>_ppc64"
-  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r")
-	(match_operand:TI2 1 "input_operand" "r,Y,r"))]
-  "(TARGET_POWERPC64
-   && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+  "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode)))"
-  "#"
-  [(set_attr "type" "store,load,*")])
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "store,load,*,*")
+   (set_attr "length" "8")])
 
 (define_split
-  [(set (match_operand:TI2 0 "gpc_reg_operand" "")
+  [(set (match_operand:TI2 0 "int_reg_operand" "")
 	(match_operand:TI2 1 "const_double_operand" ""))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64
+   && (VECTOR_MEM_NONE_P (<MODE>mode)
+       || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
   "
@@ -9691,7 +10058,9 @@
   [(set (match_operand:TI2 0 "nonimmediate_operand" "")
         (match_operand:TI2 1 "input_operand" ""))]
   "reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
@@ -12554,8 +12923,8 @@
    (match_dup 13)]
 {
   REAL_VALUE_TYPE rv;
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
 
   operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word);
   operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word);
diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md
index cec2b430b82..bf10a5dc180 100644
--- a/gcc/config/rs6000/spe.md
+++ b/gcc/config/rs6000/spe.md
@@ -2604,8 +2604,8 @@
    && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
   "
 {
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
   operands[3] = gen_reg_rtx (DFmode);
   operands[4] = gen_reg_rtx (CCFPmode);
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -2627,8 +2627,8 @@
    && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
   "
 {
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
   operands[3] = gen_reg_rtx (DFmode);
   operands[4] = gen_reg_rtx (CCFPmode);
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 252e2690a98..8616b3eca5f 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -18,14 +18,23 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+(define_mode_attr larx [(QI "lbarx")
+			(HI "lharx")
+			(SI "lwarx")
+			(DI "ldarx")
+			(TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+			(HI "sthcx.")
+			(SI "stwcx.")
+			(DI "stdcx.")
+			(TI "stqcx.")])
 
 (define_code_iterator FETCHOP [plus minus ior xor and])
 (define_code_attr fetchop_name
   [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
 (define_code_attr fetchop_pred
-  [(plus "add_operand") (minus "gpc_reg_operand")
+  [(plus "add_operand") (minus "int_reg_operand")
    (ior "logical_operand") (xor "logical_operand") (and "and_operand")])
 
 (define_expand "mem_thread_fence"
@@ -129,16 +138,7 @@
     case MEMMODEL_CONSUME:
     case MEMMODEL_ACQUIRE:
     case MEMMODEL_SEQ_CST:
-      if (GET_MODE (operands[0]) == QImode)
-	emit_insn (gen_loadsync_qi (operands[0]));
-      else if (GET_MODE (operands[0]) == HImode)
-	emit_insn (gen_loadsync_hi (operands[0]));
-      else if (GET_MODE (operands[0]) == SImode)
-	emit_insn (gen_loadsync_si (operands[0]));
-      else if (GET_MODE (operands[0]) == DImode)
-	emit_insn (gen_loadsync_di (operands[0]));
-      else
-	gcc_unreachable ();
+      emit_insn (gen_loadsync_<mode> (operands[0]));
       break;
     default:
       gcc_unreachable ();
@@ -170,35 +170,109 @@
   DONE;
 })
 
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
-;; opcode that is "phased-in".  Not implemented as of Power7, so not yet used,
-;; but let's prepare the macros anyway.
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8.  TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+			      (HI "TARGET_SYNC_HI_QI")
+			      SI
+			      (DI "TARGET_POWERPC64")])
+
+;; Types that we should provide atomic instructions for.
 
-(define_mode_iterator ATOMIC    [SI (DI "TARGET_POWERPC64")])
+(define_mode_iterator AINT [QI
+			    HI
+			    SI
+			    (DI "TARGET_POWERPC64")
+			    (TI "TARGET_SYNC_TI")])
 
 (define_insn "load_locked<mode>"
-  [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
+  [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
 	(unspec_volatile:ATOMIC
          [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
   ""
   "<larx> %0,%y1"
   [(set_attr "type" "load_l")])
 
+(define_insn "load_locked<QHI:mode>_si"
+  [(set (match_operand:SI 0 "int_reg_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_HI_QI"
+  "<QHI:larx> %0,%y1"
+  [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs
+(define_expand "load_lockedti"
+  [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  /* Use a temporary register to force getting an even register for the
+     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
+     extra copy.  */
+  rtx pti = gen_reg_rtx (PTImode);
+  emit_insn (gen_load_lockedpti (pti, operands[1]));
+  emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+  DONE;
+})
+
+(define_insn "load_lockedpti"
+  [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+	(unspec_volatile:PTI
+         [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_TI
+   && !reg_mentioned_p (operands[0], operands[1])
+   && quad_int_reg_operand (operands[0], PTImode)"
+  "lqarx %0,%y1"
+  [(set_attr "type" "load_l")])
+
 (define_insn "store_conditional<mode>"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x")
 	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
    (set (match_operand:ATOMIC 1 "memory_operand" "=Z")
-	(match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
+	(match_operand:ATOMIC 2 "int_reg_operand" "r"))]
   ""
   "<stcx> %2,%y1"
   [(set_attr "type" "store_c")])
 
+(define_expand "store_conditionalti"
+  [(use (match_operand:CC 0 "cc_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))
+   (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
+  rtx pti_op2 = gen_reg_rtx (PTImode);
+
+  /* Use a temporary register to force getting an even register for the
+     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
+     extra copy.  */
+  emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
+  emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+  DONE;
+})
+
+(define_insn "store_conditionalpti"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:PTI 1 "memory_operand" "=Z")
+	(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+  "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+  "stqcx. %2,%y1"
+  [(set_attr "type" "store_c")])
+
 (define_expand "atomic_compare_and_swap<mode>"
-  [(match_operand:SI 0 "gpc_reg_operand" "")		;; bool out
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; val out
-   (match_operand:INT1 2 "memory_operand" "")		;; memory
-   (match_operand:INT1 3 "reg_or_short_operand" "")	;; expected
-   (match_operand:INT1 4 "gpc_reg_operand" "")		;; desired
+  [(match_operand:SI 0 "int_reg_operand" "")		;; bool out
+   (match_operand:AINT 1 "int_reg_operand" "")		;; val out
+   (match_operand:AINT 2 "memory_operand" "")		;; memory
+   (match_operand:AINT 3 "reg_or_short_operand" "")	;; expected
+   (match_operand:AINT 4 "int_reg_operand" "")		;; desired
    (match_operand:SI 5 "const_int_operand" "")		;; is_weak
    (match_operand:SI 6 "const_int_operand" "")		;; model succ
    (match_operand:SI 7 "const_int_operand" "")]		;; model fail
@@ -209,9 +283,9 @@
 })
 
 (define_expand "atomic_exchange<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; input
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; input
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -220,9 +294,9 @@
 })
 
 (define_expand "atomic_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 0)
-     (match_operand:INT1 1 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 0)
+     (match_operand:AINT 1 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -232,8 +306,8 @@
 })
 
 (define_expand "atomic_nand<mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (match_operand:AINT 1 "int_reg_operand" "")		;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -243,10 +317,10 @@
 })
 
 (define_expand "atomic_fetch_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 { 
@@ -256,9 +330,9 @@
 })
 
 (define_expand "atomic_fetch_nand<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -268,10 +342,10 @@
 })
 
 (define_expand "atomic_<fetchop_name>_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -281,9 +355,9 @@
 })
 
 (define_expand "atomic_nand_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux
index 017a293cde3..62a5b941389 100644
--- a/gcc/config/rs6000/t-linux
+++ b/gcc/config/rs6000/t-linux
@@ -2,7 +2,7 @@
 # or soft-float.
 ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float)))
 ifneq (,$(findstring spe,$(target)))
-MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1)
+MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1)
 else
 MULTIARCH_DIRNAME = powerpc-linux-gnu
 endif
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 8b8b3427454..6cfebdeebdc 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -126,7 +126,9 @@
         (match_operand:VEC_L 1 "input_operand" ""))]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -730,9 +732,10 @@
   "")
 
 (define_expand "and<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
+  [(parallel [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+		   (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+			      (match_operand:VEC_L 2 "vlogical_operand" "")))
+	      (clobber (match_scratch:CC 3 ""))])]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && (<MODE>mode != TImode || TARGET_POWERPC64)"
   "")
@@ -746,8 +749,8 @@
 
 (define_expand "nor<mode>3"
   [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-			      (match_operand:VEC_L 2 "vlogical_operand" ""))))]
+	(and:VEC_L (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" ""))
+		   (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))))]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && (<MODE>mode != TImode || TARGET_POWERPC64)"
   "")
@@ -760,6 +763,47 @@
    && (<MODE>mode != TImode || TARGET_POWERPC64)"
   "")
 
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+  [(set (match_operand:VEC_L 0 "register_operand" "")
+	(not:VEC_L
+	 (xor:VEC_L (match_operand:VEC_L 1 "register_operand" "")
+		    (match_operand:VEC_L 2 "register_operand" ""))))]
+  "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+  [(set (match_operand:VEC_L 0 "register_operand" "")
+	(ior:VEC_L
+	 (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+	 (not:VEC_L (match_operand:VEC_L 2 "register_operand" ""))))]
+  "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; The canonical form is to have the negated elment first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+  [(set (match_operand:VEC_L 0 "register_operand" "")
+	(ior:VEC_L
+	 (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+	 (match_operand:VEC_L 2 "register_operand" "")))]
+  "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+	(clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
+
+;; Vector population count
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+        (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
+
+
 ;; Same size conversions
 (define_expand "float<VEC_int><mode>2"
   [(set (match_operand:VEC_F 0 "vfloat_operand" "")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4adf6e5ac55..b87da826a95 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -36,6 +36,10 @@
 ;; Iterator for logical types supported by VSX
 (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
 
+;; Like VSX_L, but don't support TImode for doing logical instructions in
+;; 32-bit
+(define_mode_iterator VSX_L2 [V16QI V8HI V4SI V2DI V4SF V2DF])
+
 ;; Iterator for memory move.  Handle TImode specially to allow
 ;; it to use gprs as well as vsx registers.
 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -191,6 +195,8 @@
    UNSPEC_VSX_CVDPSXWS
    UNSPEC_VSX_CVDPUXWS
    UNSPEC_VSX_CVSPDP
+   UNSPEC_VSX_CVSPDPN
+   UNSPEC_VSX_CVDPSPN
    UNSPEC_VSX_CVSXWDP
    UNSPEC_VSX_CVUXWDP
    UNSPEC_VSX_CVSXDSP
@@ -207,112 +213,31 @@
 
 ;; VSX moves
 (define_insn "*vsx_mov<mode>"
-  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
-	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
   "VECTOR_MEM_VSX_P (<MODE>mode)
    && (register_operand (operands[0], <MODE>mode) 
        || register_operand (operands[1], <MODE>mode))"
 {
-  switch (which_alternative)
-    {
-    case 0:
-    case 3:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stx<VSm>x %x1,%y0";
-
-    case 1:
-    case 4:
-      gcc_assert (MEM_P (operands[1])
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
-      return "lx<VSm>x %x0,%y1";
-
-    case 2:
-    case 5:
-      return "xxlor %x0,%x1,%x1";
-
-    case 6:
-    case 7:
-    case 8:
-    case 11:
-      return "#";
-
-    case 9:
-    case 10:
-      return "xxlxor %x0,%x0,%x0";
-
-    case 12:
-      return output_vec_const_move (operands);
-
-    case 13:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stvx %1,%y0";
-
-    case 14:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "lvx %0,%y1";
-
-    default:
-      gcc_unreachable ();
-    }
+  return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+   (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
 
 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
 ;; use of TImode is for unions.  However for plain data movement, slightly
 ;; favor the vector loads
 (define_insn "*vsx_movti_64bit"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r")
-	(match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+	(match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
    && (register_operand (operands[0], TImode) 
        || register_operand (operands[1], TImode))"
 {
-  switch (which_alternative)
-    {
-    case 0:
-      return "stxvd2x %x1,%y0";
-
-    case 1:
-      return "lxvd2x %x0,%y1";
-
-    case 2:
-      return "xxlor %x0,%x1,%x1";
-
-    case 3:
-      return "xxlxor %x0,%x0,%x0";
-
-    case 4:
-      return output_vec_const_move (operands);
-
-    case 5:
-      return "stvx %1,%y0";
-
-    case 6:
-      return "lvx %0,%y1";
-
-    case 7:
-    case 8:
-    case 9:
-    case 10:
-      return "#";
-
-    default:
-      gcc_unreachable ();
-    }
+  return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*")
-   (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,8,8,8,8")])
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+   (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
 
 (define_insn "*vsx_movti_32bit"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
@@ -1003,6 +928,40 @@
   "xscvspdp %x0,%x1"
   [(set_attr "type" "fp")])
 
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
 ;; Convert from 64-bit to 32-bit types
 ;; Note, favor the Altivec registers since the usual use of these instructions
 ;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1088,70 +1047,368 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 
-;; Logical operations
-;; Do not support TImode logical instructions on 32-bit at present, because the
-;; compiler will see that we have a TImode and when it wanted DImode, and
-;; convert the DImode to TImode, store it on the stack, and load it in a VSX
-;; register.
-(define_insn "*vsx_and<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (and:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+;; Logical operations.  Do not support TImode logical instructions on 32-bit at
+;; present, because the compiler will see that we have a TImode and when it
+;; wanted DImode, and convert the DImode to TImode, store it on the stack, and
+;; load it in a VSX register or generate extra logical instructions in GPR
+;; registers.
+
+;; When we are splitting the operations to GPRs, we use three alternatives, two
+;; where the first/second inputs and output are in the same register, and the
+;; third where the output specifies an early clobber so that we don't have to
+;; worry about overlapping registers.
+
+(define_insn "*vsx_and<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (and:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+		    (match_operand:VSX_L2 2 "vlogical_operand" "wa")))
+   (clobber (match_scratch:CC 3 "X"))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxland %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
 
-(define_insn "*vsx_ior<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-		   (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_and<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+        (and:VSX_L
+	 (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r")
+	 (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))
+   (clobber (match_scratch:CC 3 "X,X,X,X"))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxland %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(parallel [(set (match_dup 4) (and:DI (match_dup 5) (match_dup 6)))
+	      (clobber (match_dup 3))])
+   (parallel [(set (match_dup 7) (and:DI (match_dup 8) (match_dup 9)))
+	      (clobber (match_dup 3))])]
+{
+  operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[7] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[9] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_ior<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (ior:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+		    (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxlor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_ior<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+        (ior:VSX_L
+	 (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+	 (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlor %x0,%x1,%x2
+   #
+   #
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
 
-(define_insn "*vsx_xor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (xor:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+  if (operands[5] == constm1_rtx)
+    emit_move_insn (operands[3], constm1_rtx);
+
+  else if (operands[5] == const0_rtx)
+    {
+      if (!rtx_equal_p (operands[3], operands[4]))
+	emit_move_insn (operands[3], operands[4]);
+    }
+  else
+    emit_insn (gen_iordi3 (operands[3], operands[4], operands[5]));
+
+  if (operands[8] == constm1_rtx)
+    emit_move_insn (operands[8], constm1_rtx);
+
+  else if (operands[8] == const0_rtx)
+    {
+      if (!rtx_equal_p (operands[6], operands[7]))
+	emit_move_insn (operands[6], operands[7]);
+    }
+  else
+    emit_insn (gen_iordi3 (operands[6], operands[7], operands[8]));
+  DONE;
+}
+  [(set_attr "type" "vecsimple,two,two,two,three,three")
+   (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_xor<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+		    (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_POWERPC64"
   "xxlxor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
 
-(define_insn "*vsx_one_cmpl<mode>2"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_xor<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+        (xor:VSX_L
+	 (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+	 (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlxor %x0,%x1,%x2
+   #
+   #
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (xor:DI (match_dup 4) (match_dup 5)))
+   (set (match_dup 6) (xor:DI (match_dup 7) (match_dup 8)))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two,three,three")
+   (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_one_cmpl<mode>2_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxlnor %x0,%x1,%x1"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_one_cmpl<mode>2_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,&?r")
+        (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlnor %x0,%x1,%x1
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 2) (not:DI (match_dup 3)))
+   (set (match_dup 4) (not:DI (match_dup 5)))]
+{
+  operands[2] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[3] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two")
+   (set_attr "length" "4,8,8")])
   
-(define_insn "*vsx_nor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (ior:VSX_L
-	  (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn "*vsx_nor<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(and:VSX_L2
+	 (not:VSX_L2 (match_operand:VSX_L 1 "vlogical_operand" "%wa"))
+	 (not:VSX_L2 (match_operand:VSX_L 2 "vlogical_operand" "wa"))))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxlnor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nor<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+	(and:VSX_L
+	 (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r"))
+	 (not:VSX_L (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlnor %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+   (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_andc<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (and:VSX_L2
+	 (not:VSX_L2
+	  (match_operand:VSX_L2 2 "vlogical_operand" "wa"))
+	 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlandc %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
 
-(define_insn "*vsx_andc<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+(define_insn_and_split "*vsx_andc<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
         (and:VSX_L
 	 (not:VSX_L
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
-  "xxlandc %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+	  (match_operand:VSX_L 2 "vlogical_operand" "wa,0,r,r"))
+	 (match_operand:VSX_L 1 "vlogical_operand" "wa,r,0,r")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlandc %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+;; Power8 vector logical instructions.
+(define_insn "*vsx_eqv<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(not:VSX_L2
+	 (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")
+		     (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+  "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxleqv %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_eqv<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+	(not:VSX_L
+	 (xor:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r")
+		    (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+  "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxleqv %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+   && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (not:DI (xor:DI (match_dup 4) (match_dup 5))))
+   (set (match_dup 6) (not:DI (xor:DI (match_dup 7) (match_dup 8))))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+;; Rewrite nand into canonical form
+(define_insn "*vsx_nand<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(ior:VSX_L2
+	 (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+	 (not:VSX_L2 (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+  "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlnand %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nand<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "register_operand" "=wa,?r,?r,?r")
+	(ior:VSX_L
+	 (not:VSX_L (match_operand:VSX_L 1 "register_operand" "wa,0,r,r"))
+	 (not:VSX_L (match_operand:VSX_L 2 "register_operand" "wa,r,0,r"))))]
+  "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlnand %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+   && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+   (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+;; Rewrite or complement into canonical form, by reversing the arguments
+(define_insn "*vsx_orc<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(ior:VSX_L2
+	 (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+	 (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlorc %x0,%x2,%x1"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_orc<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+	(ior:VSX_L
+	 (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r"))
+	 (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))]
+  "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlorc %x0,%x2,%x1
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+   && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
 
 
 ;; Permute operations
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 30c34901f8d..358345a4437 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2017,14 +2017,18 @@ s390_decompose_address (rtx addr, struct s390_address *out)
 	 Thus we don't check the displacement for validity here.  If after
 	 elimination the displacement turns out to be invalid after all,
 	 this is fixed up by reload in any case.  */
-      if (base != arg_pointer_rtx
-	  && indx != arg_pointer_rtx
-	  && base != return_address_pointer_rtx
-	  && indx != return_address_pointer_rtx
-	  && base != frame_pointer_rtx
-	  && indx != frame_pointer_rtx
-	  && base != virtual_stack_vars_rtx
-	  && indx != virtual_stack_vars_rtx)
+      /* LRA maintains always displacements up to date and we need to
+	 know the displacement is right during all LRA not only at the
+	 final elimination.  */
+      if (lra_in_progress
+	  || (base != arg_pointer_rtx
+	      && indx != arg_pointer_rtx
+	      && base != return_address_pointer_rtx
+	      && indx != return_address_pointer_rtx
+	      && base != frame_pointer_rtx
+	      && indx != frame_pointer_rtx
+	      && base != virtual_stack_vars_rtx
+	      && indx != virtual_stack_vars_rtx))
 	if (!DISP_IN_RANGE (offset))
 	  return false;
     }
@@ -3189,7 +3193,9 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
 
   /* We need a scratch register when loading a PLUS expression which
      is not a legitimate operand of the LOAD ADDRESS instruction.  */
-  if (in_p && s390_plus_operand (x, mode))
+  /* LRA can deal with transformation of plus op very well -- so we
+     don't need to prompt LRA in this case.  */
+  if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
     sri->icode = (TARGET_64BIT ?
 		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
 
@@ -7868,6 +7874,13 @@ s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
   return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 }
 
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+s390_lra_p (void)
+{
+  return s390_lra_flag;
+}
+
 /* Return true if register FROM can be eliminated via register TO.  */
 
 static bool
@@ -11105,6 +11118,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P s390_lra_p
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE s390_can_eliminate
 
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 43e24d5d112..b0e530f0ed4 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -221,7 +221,7 @@ enum processor_flags
 
 /* Alignment on even addresses for LARL instruction.  */
 #define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
-#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
 
 /* Alignment is not required by the hardware.  */
 #define STRICT_ALIGNMENT 0
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index cad4f5f579a..0141b9813ef 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -277,7 +277,8 @@
 (define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12"
   (const (symbol_ref "s390_tune_attr")))
 
-(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12"
+(define_attr "cpu_facility"
+  "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12"
   (const_string "standard"))
 
 (define_attr "enabled" ""
@@ -304,6 +305,10 @@
 	      (match_test "TARGET_DFP"))
 	 (const_int 1)
 
+         (and (eq_attr "cpu_facility" "cpu_zarch")
+              (match_test "TARGET_CPU_ZARCH"))
+	 (const_int 1)
+
          (and (eq_attr "cpu_facility" "z10")
               (match_test "TARGET_Z10"))
 	 (const_int 1)
@@ -2690,7 +2695,7 @@
   "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
   "#"
   [(set_attr "type"         "cs")
-   (set_attr "cpu_facility" "*,*,z10,*")])
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
 
 (define_split
   [(set (match_operand:BLK 0 "memory_operand" "")
@@ -2899,7 +2904,7 @@
   "(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)"
   "#"
   [(set_attr "type" "cs")
-   (set_attr "cpu_facility" "*,*,z10,*")])
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
 
 (define_split
   [(set (match_operand:BLK 0 "memory_operand" "")
@@ -3075,7 +3080,7 @@
   "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
   "#"
   [(set_attr "type" "cs")
-   (set_attr "cpu_facility" "*,*,z10,*")])
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
 
 (define_split
   [(set (reg:CCU CC_REGNUM)
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index b326441173c..ba38e6e14ed 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -149,3 +149,7 @@ Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1)
 Set the branch costs for conditional branch instructions.  Reasonable
 values are small, non-negative integers.  The default branch cost is
 1.
+
+mlra
+Target Report Var(s390_lra_flag) Init(1) Save
+Use LRA instead of reload
author	Caroline Tice <cmtice@chromium.org>	2013-06-21 12:05:49 -0700
committer	Caroline Tice <cmtice@chromium.org>	2013-06-21 12:05:49 -0700
commit	9c1705845241cdea5cf61574218edef282901fbd (patch)
tree	021da661f8a821bc7eea277521fa13f2043879a0 /gcc/config
parent	9bdedf4df4166718fa0a44811c643214c6880471 (diff)
parent	8a46ed36b0449c870d0a938c1010ad610ebcf4f7 (diff)
download	gcc-9c1705845241cdea5cf61574218edef282901fbd.tar.gz