Merge remote-tracking branch 'origin/gomp-4_0-branch' into cilk-in-gomp

Conflicts: gcc/Makefile.in gcc/c-family/c-common.h gcc/c/c-parser.c gcc/cp/Make-lang.in gcc/cp/cp-tree.h gcc/gimple.h gcc/omp-low.c gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp
author: Aldy Hernandez <aldyh@redhat.com> 2013-06-28 09:57:43 -0700
committer: Aldy Hernandez <aldyh@redhat.com> 2013-06-28 09:57:43 -0700
commit: 7fb75753fa7e7c54af3b5e0aea65d8051feac55d (patch)
tree: 568d89cbf5521cbb882c33a3a42fb332ff2e49b8 /gcc/config
parent: db2127098137dea6c246041e0d763a57a174fa3c (diff)
parent: 2814409c2f46b5f71706f08358f395dddc9d8a81 (diff)
download: gcc-7fb75753fa7e7c54af3b5e0aea65d8051feac55d.tar.gz
158 files changed, 9750 insertions, 2818 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 4fdfe247a21..2a0e5fdc391 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1245,6 +1245,16 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 	  return AARCH64_FIND_FRINT_VARIANT (sqrt);
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == SImode && out_n == C \
+   && in_mode == N##Imode && in_n == C)
+        case BUILT_IN_CLZ:
+          {
+            if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+              return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_clzv4si];
+            return NULL_TREE;
+          }
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
   (out_mode == N##Imode && out_n == C \
    && in_mode == N##Fmode && in_n == C)
 	case BUILT_IN_LFLOOR:
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 001842e43b4..e5ae556736c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -68,6 +68,13 @@ enum aarch64_symbol_context
    Each of of these represents a thread-local symbol, and corresponds to the
    thread local storage relocation operator for the symbol being referred to.
 
+   SYMBOL_TINY_ABSOLUTE
+
+   Generate symbol accesses as a PC relative address using a single
+   instruction.  To compute the address of symbol foo, we generate:
+
+   ADR x0, foo
+
    SYMBOL_FORCE_TO_MEM : Global variables are addressed using
    constant pool.  All variable addresses are spilled into constant
    pools.  The constant pools themselves are addressed using PC
@@ -81,6 +88,7 @@ enum aarch64_symbol_type
   SYMBOL_SMALL_TLSDESC,
   SYMBOL_SMALL_GOTTPREL,
   SYMBOL_SMALL_TPREL,
+  SYMBOL_TINY_ABSOLUTE,
   SYMBOL_FORCE_TO_MEM
 };
 
@@ -136,6 +144,8 @@ struct tune_params
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
 bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
+enum aarch64_symbol_type
+aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
 bool aarch64_constant_address_p (rtx);
 bool aarch64_float_const_zero_rtx_p (rtx);
 bool aarch64_function_arg_regno_p (unsigned);
@@ -146,6 +156,10 @@ bool aarch64_is_long_call_p (rtx);
 bool aarch64_label_mentioned_p (rtx);
 bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
+bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
+			    enum machine_mode);
+char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
+char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
 bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
 bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
@@ -154,9 +168,9 @@ bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
 bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
 bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
+bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool,
+				   struct simd_immediate_info *);
 bool aarch64_symbolic_address_p (rtx);
-bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
-				  enum aarch64_symbol_type *);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
 const char *aarch64_output_casesi (rtx *);
 enum aarch64_symbol_type aarch64_classify_symbol (rtx,
@@ -219,6 +233,8 @@ void aarch64_split_128bit_move (rtx, rtx);
 
 bool aarch64_split_128bit_move_p (rtx, rtx);
 
+void aarch64_split_simd_combine (rtx, rtx, rtx);
+
 void aarch64_split_simd_move (rtx, rtx);
 
 /* Check for a legitimate floating point constant for FMOV.  */
@@ -254,6 +270,4 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
 extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 extern bool
 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
-
-char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e4201732bcd..4d9b966d0ac 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -49,6 +49,7 @@
   BUILTIN_VDQF (UNOP, sqrt, 2)
   BUILTIN_VD_BHSI (BINOP, addp, 0)
   VAR1 (UNOP, addp, 0, di)
+  VAR1 (UNOP, clz, 2, v4si)
 
   BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
   BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
@@ -63,7 +64,7 @@
   BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
 
   BUILTIN_VDQ_I (BINOP, dup_lane, 0)
-  BUILTIN_SDQ_I (BINOP, dup_lane, 0)
+  BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0)
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
   BUILTIN_VSDQ_I (BINOP, sqshl, 0)
   BUILTIN_VSDQ_I (BINOP, uqshl, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9069a73c46c..08826b5dd9f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -357,29 +357,18 @@
    (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_insn "aarch64_dup_lane<mode>"
-  [(set (match_operand:ALLX 0 "register_operand" "=w")
+(define_insn "aarch64_dup_lane_scalar<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w, r")
 	(vec_select:<VEL>
-	  (match_operand:<VCON> 1 "register_operand" "w")
-	  (parallel [(match_operand:SI 2 "immediate_operand" "i")])
+	  (match_operand:VDQ 1 "register_operand" "w, w")
+	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])
         ))]
   "TARGET_SIMD"
-  "dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]"
-  [(set_attr "simd_type" "simd_dup")
-   (set_attr "simd_mode" "<MODE>")]
-)
-
-(define_insn "aarch64_dup_lanedi"
-  [(set (match_operand:DI 0 "register_operand" "=w,r")
-	(vec_select:DI
-	  (match_operand:V2DI 1 "register_operand" "w,w")
-	  (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
-  "TARGET_SIMD"
   "@
-   dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]
-   umov\t%0, %1.d[%2]"
-  [(set_attr "simd_type" "simd_dup")
-   (set_attr "simd_mode" "DI")]
+   dup\\t%<Vetype>0, %1.<Vetype>[%2]
+   umov\\t%<vw>0, %1.<Vetype>[%2]"
+  [(set_attr "simd_type" "simd_dup, simd_movgp")
+   (set_attr "simd_mode" "<MODE>")]
 )
 
 (define_insn "aarch64_simd_dup<mode>"
@@ -409,7 +398,7 @@
      case 4: return "ins\t%0.d[0], %1";
      case 5: return "mov\t%0, %1";
      case 6:
-	return aarch64_output_simd_mov_immediate (&operands[1],
+	return aarch64_output_simd_mov_immediate (operands[1],
 						  <MODE>mode, 64);
      default: gcc_unreachable ();
      }
@@ -440,7 +429,7 @@
     case 5:
 	return "#";
     case 6:
-	return aarch64_output_simd_mov_immediate (&operands[1], <MODE>mode, 128);
+	return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
     default:
 	gcc_unreachable ();
     }
@@ -1058,9 +1047,9 @@
 	  (vec_duplicate:<VHALF> (const_int 0))))]
   "TARGET_SIMD"
   "@
-   mov\\t%d0, %d1
-   fmov\t%d0, %1
-   dup\t%d0, %1"
+   dup\\t%d0, %1.d[0]
+   fmov\\t%d0, %1
+   dup\\t%d0, %1"
   [(set_attr "v8type" "*,fmov,*")
    (set_attr "simd_type" "simd_dup,*,simd_dup")
    (set_attr "simd_mode" "<MODE>")
@@ -1190,6 +1179,104 @@
 
 ;; Widening arithmetic.
 
+(define_insn "*aarch64_<su>mlal_lo<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal_hi<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))
+          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_lo<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))))]
+  "TARGET_SIMD"
+  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_hi<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 2 "register_operand" "w")
+                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+                 (match_operand:VQW 4 "register_operand" "w")
+                 (match_dup 3))))))]
+  "TARGET_SIMD"
+  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (plus:<VWIDE>
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 1 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 2 "register_operand" "w")))
+          (match_operand:<VWIDE> 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (minus:<VWIDE>
+          (match_operand:<VWIDE> 1 "register_operand" "0")
+          (mult:<VWIDE>
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 2 "register_operand" "w"))
+            (ANY_EXTEND:<VWIDE>
+              (match_operand:VDW 3 "register_operand" "w")))))]
+  "TARGET_SIMD"
+  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+  [(set_attr "simd_type" "simd_mlal")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
@@ -1611,6 +1698,15 @@
   DONE;
 })
 
+(define_insn "clz<mode>2"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+       (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "clz\\t%0.<Vtype>, %1.<Vtype>"
+ [(set_attr "simd_type" "simd_cls")
+  (set_attr "simd_mode" "<MODE>")]
+)
+
 ;; 'across lanes' max and min ops.
 
 (define_insn "reduc_<maxmin_uns>_<mode>"
@@ -2209,15 +2305,29 @@
    (set_attr "simd_mode" "<MODE>")]
 )
 
-(define_insn "aarch64_combine<mode>"
+(define_insn_and_split "aarch64_combine<mode>"
   [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
         (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
 			   (match_operand:VDC 2 "register_operand" "w")))]
   "TARGET_SIMD"
-  "mov\\t%0.d[0], %1.d[0]\;ins\\t%0.d[1], %2.d[0]"
-  [(set_attr "simd_type" "simd_ins")
-   (set_attr "simd_mode" "<MODE>")]
-)
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "aarch64_simd_combine<mode>"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+        (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+  (match_operand:VDC 2 "register_operand" "w")))]
+  "TARGET_SIMD"
+  {
+    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
+    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
+    DONE;
+  })
 
 ;; <su><addsub>l<q>.
 
@@ -3280,7 +3390,8 @@
 	  (COMPARISONS:DI
 	    (match_operand:DI 1 "register_operand" "w,w,r")
 	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
-	  )))]
+	  )))
+     (clobber (reg:CC CC_REGNUM))]
   "TARGET_SIMD"
   "@
   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
@@ -3291,15 +3402,7 @@
       happening in the 'w' constraint cases.  */
    && GP_REGNUM_P (REGNO (operands[0]))
    && GP_REGNUM_P (REGNO (operands[1]))"
-  [(set (reg:CC CC_REGNUM)
-    (compare:CC
-      (match_dup 1)
-      (match_dup 2)))
-  (set (match_dup 0)
-    (neg:DI
-      (COMPARISONS:DI
-	(match_operand 3 "cc_register" "")
-	(const_int 0))))]
+  [(const_int 0)]
   {
     enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
     rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
@@ -3332,7 +3435,8 @@
 	  (UCOMPARISONS:DI
 	    (match_operand:DI 1 "register_operand" "w,r")
 	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
-	  )))]
+	  )))
+    (clobber (reg:CC CC_REGNUM))]
   "TARGET_SIMD"
   "@
   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
@@ -3342,17 +3446,9 @@
       happening in the 'w' constraint cases.  */
    && GP_REGNUM_P (REGNO (operands[0]))
    && GP_REGNUM_P (REGNO (operands[1]))"
-  [(set (reg:CC CC_REGNUM)
-    (compare:CC
-      (match_dup 1)
-      (match_dup 2)))
-  (set (match_dup 0)
-    (neg:DI
-      (UCOMPARISONS:DI
-	(match_operand 3 "cc_register" "")
-	(const_int 0))))]
+  [(const_int 0)]
   {
-    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+    enum machine_mode mode = CCmode;
     rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
     rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
     emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
@@ -3385,7 +3481,8 @@
 	    (and:DI
 	      (match_operand:DI 1 "register_operand" "w,r")
 	      (match_operand:DI 2 "register_operand" "w,r"))
-	    (const_int 0))))]
+	    (const_int 0))))
+    (clobber (reg:CC CC_REGNUM))]
   "TARGET_SIMD"
   "@
   cmtst\t%d0, %d1, %d2
@@ -3395,16 +3492,7 @@
       happening in the 'w' constraint cases.  */
    && GP_REGNUM_P (REGNO (operands[0]))
    && GP_REGNUM_P (REGNO (operands[1]))"
-   [(set (reg:CC_NZ CC_REGNUM)
-	(compare:CC_NZ
-	 (and:DI (match_dup 1)
-		  (match_dup 2))
-	 (const_int 0)))
-  (set (match_dup 0)
-    (neg:DI
-      (ne:DI
-	(match_operand 3 "cc_register" "")
-	(const_int 0))))]
+  [(const_int 0)]
   {
     rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
     enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a0aff58668b..072f5401279 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1,4 +1,5 @@
-/* Machine description for AArch64 architecture.
+
+1;3201;0c/* Machine description for AArch64 architecture.
    Copyright (C) 2009-2013 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
@@ -87,6 +88,14 @@ struct aarch64_address_info {
   enum aarch64_symbol_type symbol_type;
 };
 
+struct simd_immediate_info
+{
+  rtx value;
+  int shift;
+  int element_width;
+  bool mvn;
+};
+
 /* The current code model.  */
 enum aarch64_code_model aarch64_cmodel;
 
@@ -103,8 +112,6 @@ static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 static void aarch64_override_options_after_change (void);
-static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
-					 int *, unsigned char *, int *, int *);
 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 static unsigned bit_count (unsigned HOST_WIDE_INT);
 static bool aarch64_const_vec_all_same_int_p (rtx,
@@ -524,13 +531,15 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	return;
       }
 
+    case SYMBOL_TINY_ABSOLUTE:
+      emit_insn (gen_rtx_SET (Pmode, dest, imm));
+      return;
+
     case SYMBOL_SMALL_GOT:
       {
 	rtx tmp_reg = dest;
 	if (can_create_pseudo_p ())
-	  {
-	    tmp_reg =  gen_reg_rtx (Pmode);
-	  }
+	  tmp_reg =  gen_reg_rtx (Pmode);
 	emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
 	emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
 	return;
@@ -692,6 +701,49 @@ aarch64_split_128bit_move_p (rtx dst, rtx src)
 	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
 }
 
+/* Split a complex SIMD combine.  */
+
+void
+aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+{
+  enum machine_mode src_mode = GET_MODE (src1);
+  enum machine_mode dst_mode = GET_MODE (dst);
+
+  gcc_assert (VECTOR_MODE_P (dst_mode));
+
+  if (REG_P (dst) && REG_P (src1) && REG_P (src2))
+    {
+      rtx (*gen) (rtx, rtx, rtx);
+
+      switch (src_mode)
+	{
+	case V8QImode:
+	  gen = gen_aarch64_simd_combinev8qi;
+	  break;
+	case V4HImode:
+	  gen = gen_aarch64_simd_combinev4hi;
+	  break;
+	case V2SImode:
+	  gen = gen_aarch64_simd_combinev2si;
+	  break;
+	case V2SFmode:
+	  gen = gen_aarch64_simd_combinev2sf;
+	  break;
+	case DImode:
+	  gen = gen_aarch64_simd_combinedi;
+	  break;
+	case DFmode:
+	  gen = gen_aarch64_simd_combinedf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_insn (gen (dst, src1, src2));
+      return;
+    }
+}
+
 /* Split a complex SIMD move.  */
 
 void
@@ -738,10 +790,10 @@ aarch64_split_simd_move (rtx dst, rtx src)
 }
 
 static rtx
-aarch64_force_temporary (rtx x, rtx value)
+aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
 {
   if (can_create_pseudo_p ())
-    return force_reg (Pmode, value);
+    return force_reg (mode, value);
   else
     {
       x = aarch64_emit_move (x, value);
@@ -753,15 +805,16 @@ aarch64_force_temporary (rtx x, rtx value)
 static rtx
 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
 {
-  if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
+  if (!aarch64_plus_immediate (GEN_INT (offset), mode))
     {
       rtx high;
       /* Load the full offset into a register.  This
          might be improvable in the future.  */
       high = GEN_INT (offset);
       offset = 0;
-      high = aarch64_force_temporary (temp, high);
-      reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
+      high = aarch64_force_temporary (mode, temp, high);
+      reg = aarch64_force_temporary (mode, temp,
+				     gen_rtx_PLUS (mode, high, reg));
     }
   return plus_constant (mode, reg, offset);
 }
@@ -800,7 +853,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	      && targetm.cannot_force_const_mem (mode, imm))
 	    {
 	      gcc_assert(can_create_pseudo_p ());
-	      base = aarch64_force_temporary (dest, base);
+	      base = aarch64_force_temporary (mode, dest, base);
 	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 	      aarch64_emit_move (dest, base);
 	      return;
@@ -817,7 +870,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	  if (offset != const0_rtx)
 	    {
 	      gcc_assert(can_create_pseudo_p ());
-	      base = aarch64_force_temporary (dest, base);
+	      base = aarch64_force_temporary (mode, dest, base);
 	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
 	      aarch64_emit_move (dest, base);
 	      return;
@@ -826,6 +879,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 
         case SYMBOL_SMALL_TPREL:
 	case SYMBOL_SMALL_ABSOLUTE:
+	case SYMBOL_TINY_ABSOLUTE:
 	  aarch64_load_symref_appropriately (dest, imm, sty);
 	  return;
 
@@ -2634,12 +2688,14 @@ static bool
 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
   rtx base, offset;
+
   if (GET_CODE (x) == HIGH)
     return true;
 
   split_const (x, &base, &offset);
   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
-    return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
+    return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
+	    != SYMBOL_FORCE_TO_MEM);
 
   return aarch64_tls_referenced_p (x);
 }
@@ -3077,10 +3133,13 @@ aarch64_symbolic_address_p (rtx x)
 
 /* Classify the base of symbolic expression X, given that X appears in
    context CONTEXT.  */
-static enum aarch64_symbol_type
-aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
+
+enum aarch64_symbol_type
+aarch64_classify_symbolic_expression (rtx x,
+				      enum aarch64_symbol_context context)
 {
   rtx offset;
+
   split_const (x, &x, &offset);
   return aarch64_classify_symbol (x, context);
 }
@@ -3428,13 +3487,13 @@ aarch64_print_operand (FILE *f, rtx x, char code)
       break;
 
     case 'X':
-      /* Print integer constant in hex.  */
+      /* Print bottom 16 bits of integer constant in hex.  */
       if (GET_CODE (x) != CONST_INT)
 	{
 	  output_operand_lossage ("invalid operand for '%%%c'", code);
 	  return;
 	}
-      asm_fprintf (f, "0x%wx", UINTVAL (x));
+      asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
       break;
 
     case 'w':
@@ -5016,6 +5075,7 @@ aarch64_classify_tls_symbol (rtx x)
 
 /* Return the method that should be used to access SYMBOL_REF or
    LABEL_REF X in context CONTEXT.  */
+
 enum aarch64_symbol_type
 aarch64_classify_symbol (rtx x,
 			 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
@@ -5029,6 +5089,8 @@ aarch64_classify_symbol (rtx x,
 
 	case AARCH64_CMODEL_TINY_PIC:
 	case AARCH64_CMODEL_TINY:
+	  return SYMBOL_TINY_ABSOLUTE;
+
 	case AARCH64_CMODEL_SMALL_PIC:
 	case AARCH64_CMODEL_SMALL:
 	  return SYMBOL_SMALL_ABSOLUTE;
@@ -5038,70 +5100,46 @@ aarch64_classify_symbol (rtx x,
 	}
     }
 
-  gcc_assert (GET_CODE (x) == SYMBOL_REF);
-
-  switch (aarch64_cmodel)
+  if (GET_CODE (x) == SYMBOL_REF)
     {
-    case AARCH64_CMODEL_LARGE:
-      return SYMBOL_FORCE_TO_MEM;
-
-    case AARCH64_CMODEL_TINY:
-    case AARCH64_CMODEL_SMALL:
-
-      /* This is needed to get DFmode, TImode constants to be loaded off
-         the constant pool.  Is it necessary to dump TImode values into
-         the constant pool.  We don't handle TImode constant loads properly
-         yet and hence need to use the constant pool.  */
-      if (CONSTANT_POOL_ADDRESS_P (x))
+      if (aarch64_cmodel == AARCH64_CMODEL_LARGE
+	  || CONSTANT_POOL_ADDRESS_P (x))
 	return SYMBOL_FORCE_TO_MEM;
 
       if (aarch64_tls_symbol_p (x))
 	return aarch64_classify_tls_symbol (x);
 
-      if (SYMBOL_REF_WEAK (x))
-	return SYMBOL_FORCE_TO_MEM;
-
-      return SYMBOL_SMALL_ABSOLUTE;
-
-    case AARCH64_CMODEL_TINY_PIC:
-    case AARCH64_CMODEL_SMALL_PIC:
-
-      if (CONSTANT_POOL_ADDRESS_P (x))
-	return SYMBOL_FORCE_TO_MEM;
+      switch (aarch64_cmodel)
+	{
+	case AARCH64_CMODEL_TINY:
+	  if (SYMBOL_REF_WEAK (x))
+	    return SYMBOL_FORCE_TO_MEM;
+	  return SYMBOL_TINY_ABSOLUTE;
 
-      if (aarch64_tls_symbol_p (x))
-	return aarch64_classify_tls_symbol (x);
+	case AARCH64_CMODEL_SMALL:
+	  if (SYMBOL_REF_WEAK (x))
+	    return SYMBOL_FORCE_TO_MEM;
+	  return SYMBOL_SMALL_ABSOLUTE;
 
-      if (!aarch64_symbol_binds_local_p (x))
-	return SYMBOL_SMALL_GOT;
+	case AARCH64_CMODEL_TINY_PIC:
+	  if (!aarch64_symbol_binds_local_p (x))
+	    return SYMBOL_SMALL_GOT;
+	  return SYMBOL_TINY_ABSOLUTE;
 
-      return SYMBOL_SMALL_ABSOLUTE;
+	case AARCH64_CMODEL_SMALL_PIC:
+	  if (!aarch64_symbol_binds_local_p (x))
+	    return SYMBOL_SMALL_GOT;
+	  return SYMBOL_SMALL_ABSOLUTE;
 
-    default:
-      gcc_unreachable ();
+	default:
+	  gcc_unreachable ();
+	}
     }
+
   /* By default push everything into the constant pool.  */
   return SYMBOL_FORCE_TO_MEM;
 }
 
-/* Return true if X is a symbolic constant that can be used in context
-   CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
-
-bool
-aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
-			     enum aarch64_symbol_type *symbol_type)
-{
-  rtx offset;
-  split_const (x, &x, &offset);
-  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
-    *symbol_type = aarch64_classify_symbol (x, context);
-  else
-    return false;
-
-  /* No checking of offset at this point.  */
-  return true;
-}
-
 bool
 aarch64_constant_address_p (rtx x)
 {
@@ -5152,8 +5190,7 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
   /* This could probably go away because
      we now decompose CONST_INTs according to expand_mov_immediate.  */
   if ((GET_CODE (x) == CONST_VECTOR
-       && aarch64_simd_valid_immediate (x, mode, false,
-					NULL, NULL, NULL, NULL, NULL) != -1)
+       && aarch64_simd_valid_immediate (x, mode, false, NULL))
       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
 	return !targetm.cannot_force_const_mem (mode, x);
 
@@ -5984,32 +6021,57 @@ aarch64_vector_mode_supported_p (enum machine_mode mode)
   return false;
 }
 
-/* Return quad mode as the preferred SIMD mode.  */
+/* Return appropriate SIMD container
+   for MODE within a vector of WIDTH bits.  */
 static enum machine_mode
-aarch64_preferred_simd_mode (enum machine_mode mode)
+aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
 {
+  gcc_assert (width == 64 || width == 128);
   if (TARGET_SIMD)
-    switch (mode)
-      {
-      case DFmode:
-        return V2DFmode;
-      case SFmode:
-        return V4SFmode;
-      case SImode:
-        return V4SImode;
-      case HImode:
-        return V8HImode;
-      case QImode:
-        return V16QImode;
-      case DImode:
-          return V2DImode;
-        break;
-
-      default:;
-      }
+    {
+      if (width == 128)
+	switch (mode)
+	  {
+	  case DFmode:
+	    return V2DFmode;
+	  case SFmode:
+	    return V4SFmode;
+	  case SImode:
+	    return V4SImode;
+	  case HImode:
+	    return V8HImode;
+	  case QImode:
+	    return V16QImode;
+	  case DImode:
+	    return V2DImode;
+	  default:
+	    break;
+	  }
+      else
+	switch (mode)
+	  {
+	  case SFmode:
+	    return V2SFmode;
+	  case SImode:
+	    return V2SImode;
+	  case HImode:
+	    return V4HImode;
+	  case QImode:
+	    return V8QImode;
+	  default:
+	    break;
+	  }
+    }
   return word_mode;
 }
 
+/* Return 128-bit container as the preferred SIMD mode for MODE.  */
+static enum machine_mode
+aarch64_preferred_simd_mode (enum machine_mode mode)
+{
+  return aarch64_simd_container_mode (mode, 128);
+}
+
 /* Return the bitmask of possible vector sizes for the vectorizer
    to iterate over.  */
 static unsigned int
@@ -6097,7 +6159,7 @@ aarch64_mangle_type (const_tree type)
 }
 
 /* Return the equivalent letter for size.  */
-static unsigned char
+static char
 sizetochar (int size)
 {
   switch (size)
@@ -6144,15 +6206,10 @@ aarch64_vect_float_const_representable_p (rtx x)
   return aarch64_float_const_representable_p (x0);
 }
 
-/* TODO: This function returns values similar to those
-   returned by neon_valid_immediate in gcc/config/arm/arm.c
-   but the API here is different enough that these magic numbers
-   are not used.  It should be sufficient to return true or false.  */
-static int
-aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
-			      rtx *modconst, int *elementwidth,
-			      unsigned char *elementchar,
-			      int *mvn, int *shift)
+/* Return true for valid and false for invalid.  */
+bool
+aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
+			      struct simd_immediate_info *info)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -6163,7 +6220,6 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
     {							\
       immtype = (CLASS);				\
       elsize = (ELSIZE);				\
-      elchar = sizetochar (elsize);			\
       eshift = (SHIFT);					\
       emvn = (NEG);					\
       break;						\
@@ -6172,36 +6228,25 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
   unsigned char bytes[16];
-  unsigned char elchar = 0;
   int immtype = -1, matches;
   unsigned int invmask = inverse ? 0xff : 0;
   int eshift, emvn;
 
   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
     {
-      bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
-      int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
-
-      if (!(simd_imm_zero
-	    || aarch64_vect_float_const_representable_p (op)))
-	return -1;
-
-	if (modconst)
-	  *modconst = CONST_VECTOR_ELT (op, 0);
-
-	if (elementwidth)
-	  *elementwidth = elem_width;
-
-	if (elementchar)
-	  *elementchar = sizetochar (elem_width);
+      if (! (aarch64_simd_imm_zero_p (op, mode)
+	     || aarch64_vect_float_const_representable_p (op)))
+	return false;
 
-	if (shift)
-	  *shift = 0;
+      if (info)
+	{
+	  info->value = CONST_VECTOR_ELT (op, 0);
+	  info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
+	  info->mvn = false;
+	  info->shift = 0;
+	}
 
-	if (simd_imm_zero)
-	  return 19;
-	else
-	  return 18;
+      return true;
     }
 
   /* Splat vector constant out into a byte vector.  */
@@ -6299,23 +6344,14 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
   if (immtype == -1
       || (immtype >= 12 && immtype <= 15)
       || immtype == 18)
-    return -1;
-
-
-  if (elementwidth)
-    *elementwidth = elsize;
-
-  if (elementchar)
-    *elementchar = elchar;
-
-  if (mvn)
-    *mvn = emvn;
-
-  if (shift)
-    *shift = eshift;
+    return false;
 
-  if (modconst)
+  if (info)
     {
+      info->element_width = elsize;
+      info->mvn = emvn != 0;
+      info->shift = eshift;
+
       unsigned HOST_WIDE_INT imm = 0;
 
       /* Un-invert bytes of recognized vector, if necessary.  */
@@ -6332,68 +6368,27 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
 	      << (i * BITS_PER_UNIT);
 
-          *modconst = GEN_INT (imm);
-        }
-      else
-        {
-          unsigned HOST_WIDE_INT imm = 0;
 
-          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
-            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+	  info->value = GEN_INT (imm);
+	}
+      else
+	{
+	  for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+	    imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
 
 	  /* Construct 'abcdefgh' because the assembler cannot handle
-	     generic constants.  */
-	  gcc_assert (shift != NULL && mvn != NULL);
-	  if (*mvn)
+	     generic constants.	 */
+	  if (info->mvn)
 	    imm = ~imm;
-	  imm = (imm >> *shift) & 0xff;
-          *modconst = GEN_INT (imm);
-        }
+	  imm = (imm >> info->shift) & 0xff;
+	  info->value = GEN_INT (imm);
+	}
     }
 
-  return immtype;
+  return true;
 #undef CHECK
 }
 
-/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
-   (or, implicitly, MVNI) immediate.  Write back width per element
-   to *ELEMENTWIDTH, and a modified constant (whatever should be output
-   for a MOVI instruction) in *MODCONST.  */
-int
-aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
-				       rtx *modconst, int *elementwidth,
-				       unsigned char *elementchar,
-				       int *mvn, int *shift)
-{
-  rtx tmpconst;
-  int tmpwidth;
-  unsigned char tmpwidthc;
-  int tmpmvn = 0, tmpshift = 0;
-  int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
-					     &tmpwidth, &tmpwidthc,
-					     &tmpmvn, &tmpshift);
-
-  if (retval == -1)
-    return 0;
-
-  if (modconst)
-    *modconst = tmpconst;
-
-  if (elementwidth)
-    *elementwidth = tmpwidth;
-
-  if (elementchar)
-    *elementchar = tmpwidthc;
-
-  if (mvn)
-    *mvn = tmpmvn;
-
-  if (shift)
-    *shift = tmpshift;
-
-  return 1;
-}
-
 static bool
 aarch64_const_vec_all_same_int_p (rtx x,
 				  HOST_WIDE_INT minval,
@@ -6455,6 +6450,25 @@ aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
   return true;
 }
 
+bool
+aarch64_mov_operand_p (rtx x,
+		       enum aarch64_symbol_context context,
+		       enum machine_mode mode)
+{
+  if (GET_CODE (x) == HIGH
+      && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
+    return true;
+
+  if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
+    return true;
+
+  if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
+    return true;
+
+  return aarch64_classify_symbolic_expression (x, context)
+    == SYMBOL_TINY_ABSOLUTE;
+}
+
 /* Return a const_int vector of VAL.  */
 rtx
 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
@@ -6479,9 +6493,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
   gcc_assert (!VECTOR_MODE_P (mode));
   vmode = aarch64_preferred_simd_mode (mode);
   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
-  int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
-						      NULL, NULL, NULL, NULL);
-  return retval;
+  return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
 }
 
 /* Construct and return a PARALLEL RTX vector.  */
@@ -6709,8 +6721,7 @@ aarch64_simd_make_constant (rtx vals)
     gcc_unreachable ();
 
   if (const_vec != NULL_RTX
-      && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
-						NULL, NULL, NULL))
+      && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
     /* Load using MOVI/MVNI.  */
     return const_vec;
   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
@@ -7268,49 +7279,78 @@ aarch64_float_const_representable_p (rtx x)
 }
 
 char*
-aarch64_output_simd_mov_immediate (rtx *const_vector,
+aarch64_output_simd_mov_immediate (rtx const_vector,
 				   enum machine_mode mode,
 				   unsigned width)
 {
-  int is_valid;
-  unsigned char widthc;
-  int lane_width_bits;
+  bool is_valid;
   static char templ[40];
-  int shift = 0, mvn = 0;
   const char *mnemonic;
   unsigned int lane_count = 0;
+  char element_char;
+
+  struct simd_immediate_info info;
 
-  is_valid =
-    aarch64_simd_immediate_valid_for_move (*const_vector, mode,
-					   const_vector, &lane_width_bits,
-					   &widthc, &mvn, &shift);
+  /* This will return true to show const_vector is legal for use as either
+     a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
+     also update INFO to show how the immediate should be generated.  */
+  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
   gcc_assert (is_valid);
 
+  element_char = sizetochar (info.element_width);
+  lane_count = width / info.element_width;
+
   mode = GET_MODE_INNER (mode);
   if (mode == SFmode || mode == DFmode)
     {
-      bool zero_p =
-	aarch64_float_const_zero_rtx_p (*const_vector);
-      gcc_assert (shift == 0);
-      mnemonic = zero_p ? "movi" : "fmov";
+      gcc_assert (info.shift == 0 && ! info.mvn);
+      if (aarch64_float_const_zero_rtx_p (info.value))
+        info.value = GEN_INT (0);
+      else
+	{
+#define buf_size 20
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
+	  char float_buf[buf_size] = {'\0'};
+	  real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
+#undef buf_size
+
+	  if (lane_count == 1)
+	    snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
+	  else
+	    snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
+		      lane_count, element_char, float_buf);
+	  return templ;
+	}
     }
-  else
-    mnemonic = mvn ? "mvni" : "movi";
 
-  gcc_assert (lane_width_bits != 0);
-  lane_count = width / lane_width_bits;
+  mnemonic = info.mvn ? "mvni" : "movi";
 
   if (lane_count == 1)
-    snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
-  else if (shift)
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
-	      mnemonic, lane_count, widthc, shift);
+    snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+	      mnemonic, UINTVAL (info.value));
+  else if (info.shift)
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
+	      ", lsl %d", mnemonic, lane_count, element_char,
+	      UINTVAL (info.value), info.shift);
   else
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
-	      mnemonic, lane_count, widthc);
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
+	      mnemonic, lane_count, element_char, UINTVAL (info.value));
   return templ;
 }
 
+char*
+aarch64_output_scalar_simd_mov_immediate (rtx immediate,
+					  enum machine_mode mode)
+{
+  enum machine_mode vmode;
+
+  gcc_assert (!VECTOR_MODE_P (mode));
+  vmode = aarch64_simd_container_mode (mode, 64);
+  rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
+  return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+}
+
 /* Split operands into moves from op[1] + op[2] into op[0].  */
 
 void
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b27bcdaa97c..e88e5be894e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -774,17 +774,34 @@
         (match_operand:SHORT 1 "general_operand"      " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
   "(register_operand (operands[0], <MODE>mode)
     || aarch64_reg_or_zero (operands[1], <MODE>mode))"
-  "@
-   mov\\t%w0, %w1
-   mov\\t%w0, %1
-   movi\\t%0.<Vallxd>, %1
-   ldr<size>\\t%w0, %1
-   ldr\\t%<size>0, %1
-   str<size>\\t%w1, %0
-   str\\t%<size>1, %0
-   umov\\t%w0, %1.<v>[0]
-   dup\\t%0.<Vallxd>, %w1
-   dup\\t%0, %1.<v>[0]"
+{
+   switch (which_alternative)
+     {
+     case 0:
+       return "mov\t%w0, %w1";
+     case 1:
+       return "mov\t%w0, %1";
+     case 2:
+       return aarch64_output_scalar_simd_mov_immediate (operands[1],
+							<MODE>mode);
+     case 3:
+       return "ldr<size>\t%w0, %1";
+     case 4:
+       return "ldr\t%<size>0, %1";
+     case 5:
+       return "str<size>\t%w1, %0";
+     case 6:
+       return "str\t%<size>1, %0";
+     case 7:
+       return "umov\t%w0, %1.<v>[0]";
+     case 8:
+       return "dup\t%0.<Vallxd>, %w1";
+     case 9:
+       return "dup\t%0, %1.<v>[0]";
+     default:
+       gcc_unreachable ();
+     }
+}
   [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
    (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
    (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
@@ -829,8 +846,8 @@
 )
 
 (define_insn "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,  r,  *w, r,*w,w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,m, m,rZ,*w,Usa,Ush,rZ,*w,*w,Dd"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   "@
@@ -850,7 +867,8 @@
    movi\\t%d0, %1"
   [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
    (set_attr "mode" "DI")
-   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,yes")]
+   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
 
 (define_insn "insv_imm<mode>"
@@ -858,9 +876,8 @@
 			  (const_int 16)
 			  (match_operand:GPI 1 "const_int_operand" "n"))
 	(match_operand:GPI 2 "const_int_operand" "n"))]
-  "INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
-   && INTVAL (operands[1]) % 16 == 0
-   && UINTVAL (operands[2]) <= 0xffff"
+  "UINTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
+   && UINTVAL (operands[1]) % 16 == 0"
   "movk\\t%<w>0, %X2, lsl %1"
   [(set_attr "v8type" "movk")
    (set_attr "mode" "<MODE>")]
@@ -3164,6 +3181,50 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; Bitfield Insert (insv)
+(define_expand "insv<mode>"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
+			  (match_operand 1 "const_int_operand")
+			  (match_operand 2 "const_int_operand"))
+	(match_operand:GPI 3 "general_operand"))]
+  ""
+{
+  unsigned HOST_WIDE_INT width = UINTVAL (operands[1]);
+  unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]);
+  rtx value = operands[3];
+
+  if (width == 0 || (pos + width) > GET_MODE_BITSIZE (<MODE>mode))
+    FAIL;
+
+  if (CONST_INT_P (value))
+    {
+      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1;
+
+      /* Prefer AND/OR for inserting all zeros or all ones.  */
+      if ((UINTVAL (value) & mask) == 0
+	   || (UINTVAL (value) & mask) == mask)
+	FAIL;
+
+      /* 16-bit aligned 16-bit wide insert is handled by insv_imm.  */
+      if (width == 16 && (pos % 16) == 0)
+	DONE;
+    }
+  operands[3] = force_reg (<MODE>mode, value);
+})
+
+(define_insn "*insv_reg<mode>"
+  [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+			  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 2 "const_int_operand" "n"))
+	(match_operand:GPI 3 "register_operand" "r"))]
+  "!(UINTVAL (operands[1]) == 0
+     || (UINTVAL (operands[2]) + UINTVAL (operands[1])
+	 > GET_MODE_BITSIZE (<MODE>mode)))"
+  "bfi\\t%<w>0, %<w>3, %2, %1"
+  [(set_attr "v8type" "bfm")
+   (set_attr "mode" "<MODE>")]
+)
+
 (define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(ashift:GPI (ANY_EXTEND:GPI
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 608db35b3dd..760ba3dc1e1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -20234,49 +20234,49 @@ vcvtpq_u64_f64 (float64x2_t __a)
 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
 vdupb_lane_s8 (int8x16_t a, int const b)
 {
-  return __builtin_aarch64_dup_laneqi (a, b);
+  return __builtin_aarch64_dup_lane_scalarv16qi (a, b);
 }
 
 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
 vdupb_lane_u8 (uint8x16_t a, int const b)
 {
-  return (uint8x1_t) __builtin_aarch64_dup_laneqi ((int8x16_t) a, b);
+  return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b);
 }
 
 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
 vduph_lane_s16 (int16x8_t a, int const b)
 {
-  return __builtin_aarch64_dup_lanehi (a, b);
+  return __builtin_aarch64_dup_lane_scalarv8hi (a, b);
 }
 
 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
 vduph_lane_u16 (uint16x8_t a, int const b)
 {
-  return (uint16x1_t) __builtin_aarch64_dup_lanehi ((int16x8_t) a, b);
+  return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b);
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
 vdups_lane_s32 (int32x4_t a, int const b)
 {
-  return __builtin_aarch64_dup_lanesi (a, b);
+  return __builtin_aarch64_dup_lane_scalarv4si (a, b);
 }
 
 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
 vdups_lane_u32 (uint32x4_t a, int const b)
 {
-  return (uint32x1_t) __builtin_aarch64_dup_lanesi ((int32x4_t) a, b);
+  return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vdupd_lane_s64 (int64x2_t a, int const b)
 {
-  return __builtin_aarch64_dup_lanedi (a, b);
+  return __builtin_aarch64_dup_lane_scalarv2di (a, b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vdupd_lane_u64 (uint64x2_t a, int const b)
 {
-  return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b);
+  return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b);
 }
 
 /* vldn */
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 18ac16a3160..7cafc08fdd9 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -75,11 +75,6 @@
   "Integer constant zero."
   (match_test "op == const0_rtx"))
 
-(define_constraint "Usa"
-  "A constraint that matches an absolute symbolic address."
-  (and (match_code "const,symbol_ref")
-       (match_test "aarch64_symbolic_address_p (op)")))
-
 (define_constraint "Ush"
   "A constraint that matches an absolute symbolic address high part."
   (and (match_code "high")
@@ -148,9 +143,8 @@
   "@internal
  A constraint that matches vector of immediates."
  (and (match_code "const_vector")
-      (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op),
-							  NULL, NULL, NULL,
-							  NULL, NULL) != 0")))
+      (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
+						 false, NULL)")))
 
 (define_constraint "Dh"
   "@internal
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 860d4d9a187..8e40c5de5d4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -385,7 +385,8 @@
 ;; Double modes of vector modes (lower case).
 (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
 			(V2SI "v4si")  (V2SF "v4sf")
-			(SI   "v2si")  (DI   "v2di")])
+			(SI   "v2si")  (DI   "v2di")
+			(DF   "v2df")])
 
 ;; Narrowed modes for VDN.
 (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 8514e8f8fbd..3e2b6b34357 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -115,16 +115,11 @@
        (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
 					       0)")))
 
-(define_predicate "aarch64_const_address"
-  (and (match_code "symbol_ref")
-       (match_test "mode == DImode && CONSTANT_ADDRESS_P (op)")))
-
 (define_predicate "aarch64_valid_symref"
   (match_code "const, symbol_ref, label_ref")
 {
-  enum aarch64_symbol_type symbol_type;
-  return (aarch64_symbolic_constant_p (op, SYMBOL_CONTEXT_ADR, &symbol_type)
-	 && symbol_type != SYMBOL_FORCE_TO_MEM);
+  return (aarch64_classify_symbolic_expression (op, SYMBOL_CONTEXT_ADR)
+	  != SYMBOL_FORCE_TO_MEM);
 })
 
 (define_predicate "aarch64_tls_ie_symref"
@@ -170,15 +165,10 @@
 })
 
 (define_predicate "aarch64_mov_operand"
-  (and (match_code "reg,subreg,mem,const_int,symbol_ref,high")
+  (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high")
        (ior (match_operand 0 "register_operand")
 	    (ior (match_operand 0 "memory_operand")
-		 (ior (match_test "GET_CODE (op) == HIGH
-				   && aarch64_valid_symref (XEXP (op, 0),
-							    GET_MODE (XEXP (op, 0)))")
-		      (ior (match_test "CONST_INT_P (op)
-					&& aarch64_move_imm (INTVAL (op), mode)")
-			   (match_test "aarch64_const_address (op, mode)")))))))
+		 (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)")))))
 
 (define_predicate "aarch64_movti_operand"
   (and (match_code "reg,subreg,mem,const_int")
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 69007a93385..5f5b33e347b 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -2700,12 +2700,12 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
       break;
 
     case GE:  case GT:  case GEU:  case GTU:
-      /* These must be swapped.  */
-      if (op1 != CONST0_RTX (cmp_mode))
-	{
-	  code = swap_condition (code);
-	  tem = op0, op0 = op1, op1 = tem;
-	}
+      /* These normally need swapping, but for integer zero we have
+	 special patterns that recognize swapped operands.  */
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      code = swap_condition (code);
+      tem = op0, op0 = op1, op1 = tem;
       break;
 
     default:
@@ -3067,12 +3067,9 @@ alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
   operands[1] = op1;
   out = gen_reg_rtx (DImode);
 
-  /* What's actually returned is -1,0,1, not a proper boolean value,
-     so use an EXPR_LIST as with a generic libcall instead of a 
-     comparison type expression.  */
-  note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX);
-  note = gen_rtx_EXPR_LIST (VOIDmode, op0, note);
-  note = gen_rtx_EXPR_LIST (VOIDmode, func, note);
+  /* What's actually returned is -1,0,1, not a proper boolean value.  */
+  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
+  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
   alpha_emit_xfloating_libcall (func, out, operands, 2, note);
 
   return out;
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 439752780a0..b020b457df2 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -23,6 +23,7 @@
 ;; Uses of UNSPEC in this file:
 
 (define_c_enum "unspec" [
+  UNSPEC_XFLT_COMPARE
   UNSPEC_ARG_HOME
   UNSPEC_LDGP1
   UNSPEC_INSXH
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index 10da396ab66..12bbbaf9083 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -19,12 +19,13 @@
 ;; This file contains ARM instructions that support fixed-point operations.
 
 (define_insn "add<mode>3"
-  [(set (match_operand:FIXED 0 "s_register_operand" "=r")
-	(plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
-		    (match_operand:FIXED 2 "s_register_operand" "r")))]
+  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+	(plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+		    (match_operand:FIXED 2 "s_register_operand" "l,r")))]
   "TARGET_32BIT"
   "add%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")])
 
 (define_insn "add<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -32,7 +33,8 @@
 		     (match_operand:ADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "sadd<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "usadd<mode>3"
   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -40,7 +42,8 @@
 			  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "uqadd<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "ssadd<mode>3"
   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -48,15 +51,17 @@
 			 (match_operand:QADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "qadd<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "sub<mode>3"
-  [(set (match_operand:FIXED 0 "s_register_operand" "=r")
-	(minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
-		     (match_operand:FIXED 2 "s_register_operand" "r")))]
+  [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+	(minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+		     (match_operand:FIXED 2 "s_register_operand" "l,r")))]
   "TARGET_32BIT"
   "sub%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")])
 
 (define_insn "sub<mode>3"
   [(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -64,7 +69,8 @@
 		      (match_operand:ADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "ssub<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "ussub<mode>3"
   [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -73,7 +79,8 @@
 	  (match_operand:UQADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "uqsub<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "sssub<mode>3"
   [(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -81,7 +88,8 @@
 			  (match_operand:QADDSUB 2 "s_register_operand" "r")))]
   "TARGET_INT_SIMD"
   "qsub<qaddsub_suf>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 ;; Fractional multiplies.
 
@@ -374,6 +382,7 @@
   "TARGET_32BIT && arm_arch6"
   "ssat%?\\t%0, #16, %2%S1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "insn" "sat")
    (set_attr "shift" "1")
    (set_attr "type" "alu_shift")])
@@ -384,4 +393,5 @@
   "TARGET_INT_SIMD"
   "usat%?\\t%0, #16, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "insn" "sat")])
diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md
index 9705f751ae6..8a3335055d1 100644
--- a/gcc/config/arm/arm-generic.md
+++ b/gcc/config/arm/arm-generic.md
@@ -114,7 +114,9 @@
 
 (define_insn_reservation "mult" 16
   (and (eq_attr "generic_sched" "yes")
-       (and (eq_attr "ldsched" "no") (eq_attr "type" "mult")))
+       (and (eq_attr "ldsched" "no")
+	    (ior (eq_attr "mul32" "yes")
+		 (eq_attr "mul64" "yes"))))
   "core*16")
 
 (define_insn_reservation "mult_ldsched_strongarm" 3
@@ -122,7 +124,8 @@
        (and (eq_attr "ldsched" "yes") 
 	    (and (eq_attr "tune"
 		  "strongarm,strongarm110,strongarm1100,strongarm1110")
-	         (eq_attr "type" "mult"))))
+		 (ior (eq_attr "mul32" "yes")
+		      (eq_attr "mul64" "yes")))))
   "core*2")
 
 (define_insn_reservation "mult_ldsched" 4
@@ -130,13 +133,17 @@
        (and (eq_attr "ldsched" "yes") 
 	    (and (eq_attr "tune"
 		  "!strongarm,strongarm110,strongarm1100,strongarm1110")
-	         (eq_attr "type" "mult"))))
+	         (ior (eq_attr "mul32" "yes")
+		      (eq_attr "mul64" "yes")))))
   "core*4")
 
 (define_insn_reservation "multi_cycle" 32
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "core_cycles" "multi")
-            (eq_attr "type" "!mult,load_byte,load1,load2,load3,load4,store1,store2,store3,store4")))
+            (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\
+                                  store1,store2,store3,store4")
+		 (not (ior (eq_attr "mul32" "yes")
+			   (eq_attr "mul64" "yes"))))))
   "core*32")
 
 (define_insn_reservation "single_cycle" 1
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
index 2bc9702bee2..e615437b125 100644
--- a/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -146,12 +146,15 @@ let can_thumb addrmode update is_store =
   | IA, true, true -> true
   | _ -> false
 
+exception InvalidAddrMode of string;;
+
 let target addrmode thumb =
   match addrmode, thumb with
     IA, true -> "TARGET_THUMB1"
   | IA, false -> "TARGET_32BIT"
   | DB, false -> "TARGET_32BIT"
   | _, false -> "TARGET_ARM"
+  | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.")
 
 let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
   let astr = string_of_addrmode addrmode in
@@ -181,8 +184,10 @@ let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
   done;
   Printf.printf "}\"\n";
   Printf.printf "  [(set_attr \"type\" \"%s%d\")" ls nregs;
-  begin if not thumb then
+  if not thumb then begin
     Printf.printf "\n   (set_attr \"predicable\" \"yes\")";
+    if addrmode == IA || addrmode == DB then
+      Printf.printf "\n   (set_attr \"predicable_short_it\" \"no\")";
   end;
   Printf.printf "])\n\n"
 
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c791341f69b..ef94bbcea25 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -24,12 +24,13 @@
 
 extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
 extern int use_return_insn (int, rtx);
+extern bool use_simple_return_p (void);
 extern enum reg_class arm_regno_class (int);
 extern void arm_load_pic_register (unsigned long);
 extern int arm_volatile_func (void);
 extern void arm_expand_prologue (void);
 extern void arm_expand_epilogue (bool);
-extern void thumb2_expand_return (void);
+extern void thumb2_expand_return (bool);
 extern const char *arm_strip_name_encoding (const char *);
 extern void arm_asm_output_labelref (FILE *, const char *);
 extern void thumb2_asm_output_opcode (FILE *);
@@ -94,7 +95,7 @@ extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
 extern bool arm_tls_referenced_p (rtx);
 
 extern int arm_coproc_mem_operand (rtx, bool);
-extern int neon_vector_mem_operand (rtx, int);
+extern int neon_vector_mem_operand (rtx, int, bool);
 extern int neon_struct_mem_operand (rtx);
 extern int arm_no_early_store_addr_dep (rtx, rtx);
 extern int arm_early_store_addr_dep (rtx, rtx);
@@ -227,6 +228,8 @@ extern const char *arm_mangle_type (const_tree);
 
 extern void arm_order_regs_for_local_alloc (void);
 
+extern int arm_max_conditional_execute ();
+
 /* Vectorizer cost model implementation.  */
 struct cpu_vec_costs {
   const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
@@ -256,8 +259,7 @@ struct tune_params
   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
   bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
   int constant_limit;
-  /* Maximum number of instructions to conditionalise in
-     arm_final_prescan_insn.  */
+  /* Maximum number of instructions to conditionalise.  */
   int max_insns_skipped;
   int num_prefetch_slots;
   int l1_cache_size;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 44286926eb6..e6fd42079cb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23,6 +23,7 @@
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "hash-table.h"
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
@@ -661,6 +662,10 @@ static const struct attribute_spec arm_attribute_table[] =
 #undef TARGET_ASAN_SHADOW_OFFSET
 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 
+#undef MAX_INSN_PER_IT_BLOCK
+#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Obstack for minipool constant handling.  */
@@ -1054,7 +1059,7 @@ const struct tune_params arm_cortex_a15_tune =
   arm_9e_rtx_costs,
   NULL,
   1,						/* Constant limit.  */
-  5,						/* Max cond insns.  */
+  2,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
@@ -1870,6 +1875,11 @@ arm_option_override (void)
   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+  if (arm_restrict_it == 2)
+    arm_restrict_it = arm_arch8 && TARGET_THUMB2;
+
+  if (!TARGET_THUMB2)
+    arm_restrict_it = 0;
 
   /* If we are not using the default (ARM mode) section anchor offset
      ranges, then set the correct ranges now.  */
@@ -2168,6 +2178,14 @@ arm_option_override (void)
                          global_options.x_param_values,
                          global_options_set.x_param_values);
 
+  /* Disable shrink-wrap when optimizing function for size, since it tends to
+     generate additional returns.  */
+  if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
+    flag_shrink_wrap = false;
+  /* TBD: Dwarf info for apcs frame is not handled yet.  */
+  if (TARGET_APCS_FRAME)
+    flag_shrink_wrap = false;
+
   /* Register global variables with the garbage collector.  */
   arm_add_gc_roots ();
 }
@@ -2517,6 +2535,18 @@ use_return_insn (int iscond, rtx sibling)
   return 1;
 }
 
+/* Return TRUE if we should try to use a simple_return insn, i.e. perform
+   shrink-wrapping if possible.  This is the case if we need to emit a
+   prologue, which we can test by looking at the offsets.  */
+bool
+use_simple_return_p (void)
+{
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+  return offsets->outgoing_args != 0;
+}
+
 /* Return TRUE if int I is a valid immediate ARM constant.  */
 
 int
@@ -2656,6 +2686,8 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
   switch (code)
     {
     case AND:
+    case IOR:
+    case XOR:
       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
     case PLUS:
@@ -3816,36 +3848,48 @@ arm_function_value(const_tree type, const_tree func,
   return arm_libcall_value_1 (mode);
 }
 
-static int
-libcall_eq (const void *p1, const void *p2)
+/* libcall hashtable helpers.  */
+
+struct libcall_hasher : typed_noop_remove <rtx_def>
+{
+  typedef rtx_def value_type;
+  typedef rtx_def compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+  static inline void remove (value_type *);
+};
+
+inline bool
+libcall_hasher::equal (const value_type *p1, const compare_type *p2)
 {
-  return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
+  return rtx_equal_p (p1, p2);
 }
 
-static hashval_t
-libcall_hash (const void *p1)
+inline hashval_t
+libcall_hasher::hash (const value_type *p1)
 {
-  return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
+  return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
 }
 
+typedef hash_table <libcall_hasher> libcall_table_type;
+
 static void
-add_libcall (htab_t htab, rtx libcall)
+add_libcall (libcall_table_type htab, rtx libcall)
 {
-  *htab_find_slot (htab, libcall, INSERT) = libcall;
+  *htab.find_slot (libcall, INSERT) = libcall;
 }
 
 static bool
 arm_libcall_uses_aapcs_base (const_rtx libcall)
 {
   static bool init_done = false;
-  static htab_t libcall_htab;
+  static libcall_table_type libcall_htab;
 
   if (!init_done)
     {
       init_done = true;
 
-      libcall_htab = htab_create (31, libcall_hash, libcall_eq,
-				  NULL);
+      libcall_htab.create (31);
       add_libcall (libcall_htab,
 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
       add_libcall (libcall_htab,
@@ -3904,7 +3948,7 @@ arm_libcall_uses_aapcs_base (const_rtx libcall)
 							DFmode));
     }
 
-  return libcall && htab_find (libcall_htab, libcall) != NULL;
+  return libcall && libcall_htab.find (libcall) != NULL;
 }
 
 static rtx
@@ -7819,7 +7863,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
 	  && GET_CODE (SET_SRC (x)) == VEC_SELECT)
 	{
 	  *total = rtx_cost (SET_DEST (x), code, 0, speed);
-	  if (!neon_vector_mem_operand (SET_DEST (x), 2))
+	  if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
 	    *total += COSTS_N_INSNS (1);
 	  return true;
 	}
@@ -7830,7 +7874,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
 	{
 	  rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
 	  *total = rtx_cost (mem, code, 0, speed);
-	  if (!neon_vector_mem_operand (mem, 2))
+	  if (!neon_vector_mem_operand (mem, 2, true))
 	    *total += COSTS_N_INSNS (1);
 	  return true;
 	}
@@ -9101,6 +9145,12 @@ arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
   return cost;
 }
 
+int
+arm_max_conditional_execute (void)
+{
+  return max_insns_skipped;
+}
+
 static int
 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
 {
@@ -10002,7 +10052,7 @@ arm_coproc_mem_operand (rtx op, bool wb)
     2 - Element/structure loads (vld1)
  */
 int
-neon_vector_mem_operand (rtx op, int type)
+neon_vector_mem_operand (rtx op, int type, bool strict)
 {
   rtx ind;
 
@@ -10014,7 +10064,7 @@ neon_vector_mem_operand (rtx op, int type)
 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
-    return FALSE;
+    return !strict;
 
   /* Constants are converted into offsets from labels.  */
   if (!MEM_P (op))
@@ -10124,7 +10174,7 @@ coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
     {
       if (!TARGET_NEON_FP16)
 	return GENERAL_REGS;
-      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
+      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
 	return NO_REGS;
       return GENERAL_REGS;
     }
@@ -16135,25 +16185,34 @@ arm_compute_save_reg0_reg12_mask (void)
   return save_reg_mask;
 }
 
+/* Return true if r3 is live at the start of the function.  */
+
+static bool
+arm_r3_live_at_start_p (void)
+{
+  /* Just look at cfg info, which is still close enough to correct at this
+     point.  This gives false positives for broken functions that might use
+     uninitialized data that happens to be allocated in r3, but who cares?  */
+  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3);
+}
 
 /* Compute the number of bytes used to store the static chain register on the
-   stack, above the stack frame. We need to know this accurately to get the
-   alignment of the rest of the stack frame correct. */
+   stack, above the stack frame.  We need to know this accurately to get the
+   alignment of the rest of the stack frame correct.  */
 
-static int arm_compute_static_chain_stack_bytes (void)
+static int
+arm_compute_static_chain_stack_bytes (void)
 {
-  unsigned long func_type = arm_current_func_type ();
-  int static_chain_stack_bytes = 0;
-
-  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
-      IS_NESTED (func_type) &&
-      df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
-    static_chain_stack_bytes = 4;
+  /* See the defining assertion in arm_expand_prologue.  */
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
+      && IS_NESTED (arm_current_func_type ())
+      && arm_r3_live_at_start_p ()
+      && crtl->args.pretend_args_size == 0)
+    return 4;
 
-  return static_chain_stack_bytes;
+  return 0;
 }
 
-
 /* Compute a bit mask of which registers need to be
    saved on the stack for the current function.
    This is used by arm_get_frame_offsets, which may add extra registers.  */
@@ -17122,6 +17181,19 @@ emit_multi_reg_push (unsigned long mask)
   return par;
 }
 
+/* Add a REG_CFA_ADJUST_CFA REG note to INSN.
+   SIZE is the offset to be adjusted.
+   DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
+static void
+arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
+{
+  rtx dwarf;
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
+  add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
+}
+
 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
    SAVED_REGS_MASK shows which registers need to be restored.
 
@@ -17212,6 +17284,9 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
     par = emit_insn (par);
 
   REG_NOTES (par) = dwarf;
+  if (!return_in_pc)
+    arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
+				 stack_pointer_rtx, stack_pointer_rtx);
 }
 
 /* Generate and emit an insn pattern that we will recognize as a pop_multi
@@ -17282,6 +17357,9 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
 
   par = emit_insn (par);
   REG_NOTES (par) = dwarf;
+
+  arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
+			       base_reg, base_reg);
 }
 
 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
@@ -17357,6 +17435,7 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
                pattern can be emitted now.  */
             par = emit_insn (par);
             REG_NOTES (par) = dwarf;
+	    RTX_FRAME_RELATED_P (par) = 1;
           }
 
         i++;
@@ -17373,7 +17452,12 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
                      stack_pointer_rtx,
                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
   RTX_FRAME_RELATED_P (tmp) = 1;
-  emit_insn (tmp);
+  tmp = emit_insn (tmp);
+  if (!return_in_pc)
+    {
+      arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
+				   stack_pointer_rtx, stack_pointer_rtx);
+    }
 
   dwarf = NULL_RTX;
 
@@ -17407,9 +17491,11 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
       else
         {
           par = emit_insn (tmp);
+	  REG_NOTES (par) = dwarf;
+	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
+				       stack_pointer_rtx, stack_pointer_rtx);
         }
 
-      REG_NOTES (par) = dwarf;
     }
   else if ((num_regs % 2) == 1 && return_in_pc)
     {
@@ -17568,11 +17654,27 @@ thumb_force_lr_save (void)
 	     || df_regs_ever_live_p (LR_REGNUM));
 }
 
+/* We do not know if r3 will be available because
+   we do have an indirect tailcall happening in this
+   particular case.  */
+static bool
+is_indirect_tailcall_p (rtx call)
+{
+  rtx pat = PATTERN (call);
+
+  /* Indirect tail call.  */
+  pat = XVECEXP (pat, 0, 0);
+  if (GET_CODE (pat) == SET)
+    pat = SET_SRC (pat);
+
+  pat = XEXP (XEXP (pat, 0), 0);
+  return REG_P (pat);
+}
 
 /* Return true if r3 is used by any of the tail call insns in the
    current function.  */
 static bool
-any_sibcall_uses_r3 (void)
+any_sibcall_could_use_r3 (void)
 {
   edge_iterator ei;
   edge e;
@@ -17586,7 +17688,8 @@ any_sibcall_uses_r3 (void)
 	if (!CALL_P (call))
 	  call = prev_nonnote_nondebug_insn (call);
 	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
-	if (find_regno_fusage (call, USE, 3))
+	if (find_regno_fusage (call, USE, 3)
+	    || is_indirect_tailcall_p (call))
 	  return true;
       }
   return false;
@@ -17753,7 +17856,7 @@ arm_get_frame_offsets (void)
 	  /* If it is safe to use r3, then do so.  This sometimes
 	     generates better code on Thumb-2 by avoiding the need to
 	     use 32-bit push/pop instructions.  */
-          if (! any_sibcall_uses_r3 ()
+          if (! any_sibcall_could_use_r3 ()
 	      && arm_size_return_regs () <= 12
 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
               && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
@@ -18064,16 +18167,16 @@ arm_expand_prologue (void)
 	}
       else if (IS_NESTED (func_type))
 	{
-	  /* The Static chain register is the same as the IP register
+	  /* The static chain register is the same as the IP register
 	     used as a scratch register during stack frame creation.
 	     To get around this need to find somewhere to store IP
 	     whilst the frame is being created.  We try the following
 	     places in order:
 
-	       1. The last argument register.
+	       1. The last argument register r3.
 	       2. A slot on the stack above the frame.  (This only
 	          works if the function is not a varargs function).
-	       3. Register r3, after pushing the argument registers
+	       3. Register r3 again, after pushing the argument registers
 	          onto the stack.
 
 	     Note - we only need to tell the dwarf2 backend about the SP
@@ -18081,7 +18184,7 @@ arm_expand_prologue (void)
 	     doesn't need to be unwound, as it doesn't contain a value
 	     inherited from the caller.  */
 
-	  if (df_regs_ever_live_p (3) == false)
+	  if (!arm_r3_live_at_start_p ())
 	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
 	  else if (args_to_push == 0)
 	    {
@@ -18222,8 +18325,7 @@ arm_expand_prologue (void)
 	  if (IS_NESTED (func_type))
 	    {
 	      /* Recover the static chain register.  */
-	      if (!df_regs_ever_live_p (3)
-		  || saved_pretend_args)
+	      if (!arm_r3_live_at_start_p () || saved_pretend_args)
 		insn = gen_rtx_REG (SImode, 3);
 	      else /* if (crtl->args.pretend_args_size == 0) */
 		{
@@ -19471,6 +19573,13 @@ thumb2_final_prescan_insn (rtx insn)
   enum arm_cond_code code;
   int n;
   int mask;
+  int max;
+
+  /* Maximum number of conditionally executed instructions in a block
+     is minimum of the two max values: maximum allowed in an IT block
+     and maximum that is beneficial according to the cost model and tune.  */
+  max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
+    max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
 
   /* Remove the previous insn from the count of insns to be output.  */
   if (arm_condexec_count)
@@ -19513,9 +19622,9 @@ thumb2_final_prescan_insn (rtx insn)
       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
       if (GET_CODE (body) != COND_EXEC)
 	break;
-      /* Allow up to 4 conditionally executed instructions in a block.  */
+      /* Maximum number of conditionally executed instructions in a block.  */
       n = get_attr_ce_count (insn);
-      if (arm_condexec_masklen + n > 4)
+      if (arm_condexec_masklen + n > max)
 	break;
 
       predicate = COND_EXEC_TEST (body);
@@ -23978,7 +24087,7 @@ thumb1_expand_prologue (void)
    all we really need to check here is if single register is to be
    returned, or multiple register return.  */
 void
-thumb2_expand_return (void)
+thumb2_expand_return (bool simple_return)
 {
   int i, num_regs;
   unsigned long saved_regs_mask;
@@ -23991,7 +24100,7 @@ thumb2_expand_return (void)
     if (saved_regs_mask & (1 << i))
       num_regs++;
 
-  if (saved_regs_mask)
+  if (!simple_return && saved_regs_mask)
     {
       if (num_regs == 1)
         {
@@ -24269,6 +24378,7 @@ arm_expand_epilogue (bool really_return)
 
   if (frame_pointer_needed)
     {
+      rtx insn;
       /* Restore stack pointer if necessary.  */
       if (TARGET_ARM)
         {
@@ -24279,9 +24389,12 @@ arm_expand_epilogue (bool really_return)
           /* Force out any pending memory operations that reference stacked data
              before stack de-allocation occurs.  */
           emit_insn (gen_blockage ());
-          emit_insn (gen_addsi3 (stack_pointer_rtx,
-                                 hard_frame_pointer_rtx,
-                                 GEN_INT (amount)));
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+			    hard_frame_pointer_rtx,
+			    GEN_INT (amount)));
+	  arm_add_cfa_adjust_cfa_note (insn, amount,
+				       stack_pointer_rtx,
+				       hard_frame_pointer_rtx);
 
           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
              deleted.  */
@@ -24291,16 +24404,25 @@ arm_expand_epilogue (bool really_return)
         {
           /* In Thumb-2 mode, the frame pointer points to the last saved
              register.  */
-          amount = offsets->locals_base - offsets->saved_regs;
-          if (amount)
-            emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
-                                   hard_frame_pointer_rtx,
-                                   GEN_INT (amount)));
+	  amount = offsets->locals_base - offsets->saved_regs;
+	  if (amount)
+	    {
+	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+				hard_frame_pointer_rtx,
+				GEN_INT (amount)));
+	      arm_add_cfa_adjust_cfa_note (insn, amount,
+					   hard_frame_pointer_rtx,
+					   hard_frame_pointer_rtx);
+	    }
 
           /* Force out any pending memory operations that reference stacked data
              before stack de-allocation occurs.  */
           emit_insn (gen_blockage ());
-          emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
+				       hard_frame_pointer_rtx));
+	  arm_add_cfa_adjust_cfa_note (insn, 0,
+				       stack_pointer_rtx,
+				       hard_frame_pointer_rtx);
           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
              deleted.  */
           emit_insn (gen_force_register_use (stack_pointer_rtx));
@@ -24313,12 +24435,15 @@ arm_expand_epilogue (bool really_return)
       amount = offsets->outgoing_args - offsets->saved_regs;
       if (amount)
         {
+	  rtx tmp;
           /* Force out any pending memory operations that reference stacked data
              before stack de-allocation occurs.  */
           emit_insn (gen_blockage ());
-          emit_insn (gen_addsi3 (stack_pointer_rtx,
-                                 stack_pointer_rtx,
-                                 GEN_INT (amount)));
+	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (amount)));
+	  arm_add_cfa_adjust_cfa_note (tmp, amount,
+				       stack_pointer_rtx, stack_pointer_rtx);
           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
              not deleted.  */
           emit_insn (gen_force_register_use (stack_pointer_rtx));
@@ -24371,6 +24496,8 @@ arm_expand_epilogue (bool really_return)
           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
                                              gen_rtx_REG (V2SImode, i),
                                              NULL_RTX);
+	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
+				       stack_pointer_rtx, stack_pointer_rtx);
         }
 
   if (saved_regs_mask)
@@ -24418,6 +24545,9 @@ arm_expand_epilogue (bool really_return)
                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
                                                        gen_rtx_REG (SImode, i),
                                                        NULL_RTX);
+		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
+						 stack_pointer_rtx,
+						 stack_pointer_rtx);
                   }
               }
         }
@@ -24442,9 +24572,33 @@ arm_expand_epilogue (bool really_return)
     }
 
   if (crtl->args.pretend_args_size)
-    emit_insn (gen_addsi3 (stack_pointer_rtx,
-                           stack_pointer_rtx,
-                           GEN_INT (crtl->args.pretend_args_size)));
+    {
+      int i, j;
+      rtx dwarf = NULL_RTX;
+      rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   GEN_INT (crtl->args.pretend_args_size)));
+
+      RTX_FRAME_RELATED_P (tmp) = 1;
+
+      if (cfun->machine->uses_anonymous_args)
+	{
+	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
+	     pretend_args in stack.  */
+	  int num_regs = crtl->args.pretend_args_size / 4;
+	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
+	  for (j = 0, i = 0; j < num_regs; i++)
+	    if (saved_regs_mask & (1 << i))
+	      {
+		rtx reg = gen_rtx_REG (SImode, i);
+		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+		j++;
+	      }
+	  REG_NOTES (tmp) = dwarf;
+	}
+      arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
+				   stack_pointer_rtx, stack_pointer_rtx);
+    }
 
   if (!really_return)
     return;
@@ -25861,9 +26015,8 @@ arm_dwarf_register_span (rtx rtl)
 
   nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
-  regno = (regno - FIRST_VFP_REGNUM) / 2;
   for (i = 0; i < nregs; i++)
-    XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
+    XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
 
   return p;
 }
@@ -26113,9 +26266,17 @@ arm_unwind_emit (FILE * asm_out_file, rtx insn)
 	  handled_one = true;
 	  break;
 
+	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
+	   to get correct dwarf information for shrink-wrap.  We should not
+	   emit unwind information for it because these are used either for
+	   pretend arguments or notes to adjust sp and restore registers from
+	   stack.  */
+	case REG_CFA_ADJUST_CFA:
+	case REG_CFA_RESTORE:
+	  return;
+
 	case REG_CFA_DEF_CFA:
 	case REG_CFA_EXPRESSION:
-	case REG_CFA_ADJUST_CFA:
 	case REG_CFA_OFFSET:
 	  /* ??? Only handling here what we actually emit.  */
 	  gcc_unreachable ();
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3a49a90c184..387d2717431 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -183,6 +183,11 @@ extern arm_cc arm_current_cc;
 
 #define ARM_INVERSE_CONDITION_CODE(X)  ((arm_cc) (((int)X) ^ 1))
 
+/* The maximaum number of instructions that is beneficial to
+   conditionally execute. */
+#undef MAX_CONDITIONAL_EXECUTE
+#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute ()
+
 extern int arm_target_label;
 extern int arm_ccfsm_state;
 extern GTY(()) rtx arm_target_insn;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 4b45c984bf4..c464eddebd4 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -93,6 +93,15 @@
 ; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
 (define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
 
+; We use this attribute to disable alternatives that can produce 32-bit
+; instructions inside an IT-block in Thumb2 state.  ARMv8 deprecates IT blocks
+; that contain 32-bit instructions.
+(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes"))
+
+; This attribute is used to disable a predicated alternative when we have
+; arm_restrict_it.
+(define_attr "predicable_short_it" "no,yes" (const_string "yes"))
+
 ;; Operand number of an input operand that is shifted.  Zero if the
 ;; given instruction does not shift one of its input operands.
 (define_attr "shift" "" (const_int 0))
@@ -103,6 +112,8 @@
 (define_attr "fpu" "none,vfp"
   (const (symbol_ref "arm_fpu_attr")))
 
+(define_attr "predicated" "yes,no" (const_string "no"))
+
 ; LENGTH of an instruction (in bytes)
 (define_attr "length" ""
   (const_int 4))
@@ -190,6 +201,15 @@
    (cond [(eq_attr "insn_enabled" "no")
 	  (const_string "no")
 
+	  (and (eq_attr "predicable_short_it" "no")
+	       (and (eq_attr "predicated" "yes")
+	            (match_test "arm_restrict_it")))
+	  (const_string "no")
+
+	  (and (eq_attr "enabled_for_depr_it" "no")
+	       (match_test "arm_restrict_it"))
+	  (const_string "no")
+
 	  (eq_attr "arch_enabled" "no")
 	  (const_string "no")
 
@@ -230,53 +250,91 @@
 ;; scheduling information.
 
 (define_attr "insn"
-        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,sat,other"
+        "mov,mvn,clz,mrs,msr,xtab,sat,other"
         (const_string "other"))
 
-; TYPE attribute is used to detect floating point instructions which, if
-; running on a co-processor can run in parallel with other, basic instructions
-; If write-buffer scheduling is enabled then it can also be used in the
-; scheduling of writes.
-
-; Classification of each insn
-; Note: vfp.md has different meanings for some of these, and some further
-; types as well.  See that file for details.
-; simple_alu_imm  a simple alu instruction that doesn't hit memory or fp
-;               regs or have a shifted source operand and has an immediate
-;               operand. This currently only tracks very basic immediate
-;               alu operations.
-; alu_reg       any alu instruction that doesn't hit memory or fp
-;               regs or have a shifted source operand
-;               and does not have an immediate operand. This is
-;               also the default
-; simple_alu_shift covers UXTH, UXTB, SXTH, SXTB
-; alu_shift	any data instruction that doesn't hit memory or fp
-;		regs, but has a source operand shifted by a constant
-; alu_shift_reg	any data instruction that doesn't hit memory or fp
-;		regs, but has a source operand shifted by a register value
-; mult		a multiply instruction
-; block		blockage insn, this blocks all functional units
-; float		a floating point arithmetic operation (subject to expansion)
-; fdivd		DFmode floating point division
-; fdivs		SFmode floating point division
-; f_load[sd]	A single/double load from memory. Used for VFP unit.
-; f_store[sd]	A single/double store to memory. Used for VFP unit.
-; f_flag	a transfer of co-processor flags to the CPSR
-; f_2_r		transfer float to core (no memory needed)
-; r_2_f		transfer core to float
-; f_cvt		convert floating<->integral
-; branch	a branch
-; call		a subroutine call
-; load_byte	load byte(s) from memory to arm registers
-; load1		load 1 word from memory to arm registers
-; load2         load 2 words from memory to arm registers
-; load3         load 3 words from memory to arm registers
-; load4         load 4 words from memory to arm registers
-; store		store 1 word to memory from arm registers
-; store2	store 2 words
-; store3	store 3 words
-; store4	store 4 (or more) words
+; TYPE attribute is used to classify instructions for use in scheduling.
 ;
+; Instruction classification:
+;
+; alu_reg            any alu instruction that doesn't hit memory or fp
+;                    regs or have a shifted source operand and does not have
+;                    an immediate operand.  This is also the default.
+; alu_shift          any data instruction that doesn't hit memory or fp.
+;                    regs, but has a source operand shifted by a constant.
+; alu_shift_reg      any data instruction that doesn't hit memory or fp.
+; block              blockage insn, this blocks all functional units.
+; branch             branch.
+; call               subroutine call.
+; f_2_r              transfer from float to core (no memory needed).
+; f_cvt              conversion between float and integral.
+; f_flag             transfer of co-processor flags to the CPSR.
+; f_load[d,s]        double/single load from memory.  Used for VFP unit.
+; f_minmax[d,s]      double/single floating point minimum/maximum.
+; f_rint[d,s]        double/single floating point rount to integral.
+; f_sel[d,s]         double/single floating byte select.
+; f_store[d,s]       double/single store to memory.  Used for VFP unit.
+; fadd[d,s]          double/single floating-point scalar addition.
+; fcmp[d,s]          double/single floating-point compare.
+; fconst[d,s]        double/single load immediate.
+; fcpys              single precision floating point cpy.
+; fdiv[d,s]          double/single precision floating point division.
+; ffarith[d,s]       double/single floating point abs/neg/cpy.
+; ffma[d,s]          double/single floating point fused multiply-accumulate.
+; float              floating point arithmetic operation.
+; fmac[d,s]          double/single floating point multiply-accumulate.
+; fmul[d,s]          double/single floating point multiply.
+; load_byte          load byte(s) from memory to arm registers.
+; load1              load 1 word from memory to arm registers.
+; load2              load 2 words from memory to arm registers.
+; load3              load 3 words from memory to arm registers.
+; load4              load 4 words from memory to arm registers.
+; mla                integer multiply accumulate.
+; mlas               integer multiply accumulate, flag setting.
+; mov                integer move.
+; mul                integer multiply.
+; muls               integer multiply, flag setting.
+; r_2_f              transfer from core to float.
+; sdiv               signed division.
+; simple_alu_imm     simple alu instruction that doesn't hit memory or fp
+;                    regs or have a shifted source operand and has an
+;                    immediate operand.  This currently only tracks very basic
+;                    immediate alu operations.
+; simple_alu_shift   simple alu instruction with a shifted source operand.
+; smlad              signed multiply accumulate dual.
+; smladx             signed multiply accumulate dual reverse.
+; smlal              signed multiply accumulate long.
+; smlald             signed multiply accumulate long dual.
+; smlals             signed multiply accumulate long, flag setting.
+; smlalxy            signed multiply accumulate, 16x16-bit, 64-bit accumulate.
+; smlawx             signed multiply accumulate, 32x16-bit, 32-bit accumulate.
+; smlawy             signed multiply accumulate wide, 32x16-bit,
+;                    32-bit accumulate.
+; smlaxy             signed multiply accumulate, 16x16-bit, 32-bit accumulate.
+; smlsd              signed multiply subtract dual.
+; smlsdx             signed multiply subtract dual reverse.
+; smlsld             signed multiply subtract long dual.
+; smmla              signed most significant word multiply accumulate.
+; smmul              signed most significant word multiply.
+; smmulr             signed most significant word multiply, rounded.
+; smuad              signed dual multiply add.
+; smuadx             signed dual multiply add reverse.
+; smull              signed multiply long.
+; smulls             signed multiply long, flag setting.
+; smulwy             signed multiply wide, 32x16-bit, 32-bit accumulate.
+; smulxy             signed multiply, 16x16-bit, 32-bit accumulate.
+; smusd              signed dual multiply subtract.
+; smusdx             signed dual multiply subtract reverse.
+; store1             store 1 word to memory from arm registers.
+; store2             store 2 words to memory from arm registers.
+; store3             store 3 words to memory from arm registers.
+; store4             store 4 (or more) words to memory from arm registers.
+; udiv               unsigned division.
+; umaal              unsigned multiply accumulate accumulate long.
+; umlal              unsigned multiply accumulate long.
+; umlals             unsigned multiply accumulate long, flag setting.
+; umull              unsigned multiply long.
+; umulls             unsigned multiply long, flag setting.
 
 (define_attr "type"
  "simple_alu_imm,\
@@ -284,7 +342,6 @@
   simple_alu_shift,\
   alu_shift,\
   alu_shift_reg,\
-  mult,\
   block,\
   float,\
   fdivd,\
@@ -328,18 +385,57 @@
   ffarithd,\
   fcmps,\
   fcmpd,\
-  fcpys"
- (if_then_else 
-    (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,\
-	     	     umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
-    (const_string "mult")
-    (const_string "alu_reg")))
+  fcpys,\
+  smulxy,\
+  smlaxy,\
+  smlalxy,\
+  smulwy,\
+  smlawx,\
+  mul,\
+  muls,\
+  mla,\
+  mlas,\
+  umull,\
+  umulls,\
+  umlal,\
+  umlals,\
+  smull,\
+  smulls,\
+  smlal,\
+  smlals,\
+  smlawy,\
+  smuad,\
+  smuadx,\
+  smlad,\
+  smladx,\
+  smusd,\
+  smusdx,\
+  smlsd,\
+  smlsdx,\
+  smmul,\
+  smmulr,\
+  smmla,\
+  umaal,\
+  smlald,\
+  smlsld,\
+  sdiv,\
+  udiv"
+  (const_string "alu_reg"))
+
+; Is this an (integer side) multiply with a 32-bit (or smaller) result?
+(define_attr "mul32" "no,yes"
+  (if_then_else
+    (eq_attr "type"
+     "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\
+      smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld")
+    (const_string "yes")
+    (const_string "no")))
 
 ; Is this an (integer side) multiply with a 64-bit result?
 (define_attr "mul64" "no,yes"
   (if_then_else
-    (eq_attr "insn"
-     "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
+    (eq_attr "type"
+     "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals")
     (const_string "yes")
     (const_string "no")))
 
@@ -1464,18 +1560,21 @@
 		 (match_operand:SI 1 "s_register_operand" "%0,r")))]
   "TARGET_32BIT && !arm_arch6"
   "mul%?\\t%0, %2, %1"
-  [(set_attr "insn" "mul")
+  [(set_attr "type" "mul")
    (set_attr "predicable" "yes")]
 )
 
 (define_insn "*arm_mulsi3_v6"
-  [(set (match_operand:SI          0 "s_register_operand" "=r")
-	(mult:SI (match_operand:SI 1 "s_register_operand" "r")
-		 (match_operand:SI 2 "s_register_operand" "r")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=l,l,r")
+	(mult:SI (match_operand:SI 1 "s_register_operand" "0,l,r")
+		 (match_operand:SI 2 "s_register_operand" "l,0,r")))]
   "TARGET_32BIT && arm_arch6"
   "mul%?\\t%0, %1, %2"
-  [(set_attr "insn" "mul")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "mul")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "t2,t2,*")
+   (set_attr "length" "4")
+   (set_attr "predicable_short_it" "yes,yes,no")]
 )
 
 ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
@@ -1495,7 +1594,7 @@
     return \"mul\\t%0, %2\";
   "
   [(set_attr "length" "4,4,2")
-   (set_attr "insn" "mul")]
+   (set_attr "type" "muls")]
 )
 
 (define_insn "*thumb_mulsi3_v6"
@@ -1508,7 +1607,7 @@
    mul\\t%0, %1
    mul\\t%0, %1"
   [(set_attr "length" "2")
-   (set_attr "insn" "mul")]
+   (set_attr "type" "muls")]
 )
 
 (define_insn "*mulsi3_compare0"
@@ -1522,7 +1621,7 @@
   "TARGET_ARM && !arm_arch6"
   "mul%.\\t%0, %2, %1"
   [(set_attr "conds" "set")
-   (set_attr "insn" "muls")]
+   (set_attr "type" "muls")]
 )
 
 (define_insn "*mulsi3_compare0_v6"
@@ -1536,7 +1635,7 @@
   "TARGET_ARM && arm_arch6 && optimize_size"
   "mul%.\\t%0, %2, %1"
   [(set_attr "conds" "set")
-   (set_attr "insn" "muls")]
+   (set_attr "type" "muls")]
 )
 
 (define_insn "*mulsi_compare0_scratch"
@@ -1549,7 +1648,7 @@
   "TARGET_ARM && !arm_arch6"
   "mul%.\\t%0, %2, %1"
   [(set_attr "conds" "set")
-   (set_attr "insn" "muls")]
+   (set_attr "type" "muls")]
 )
 
 (define_insn "*mulsi_compare0_scratch_v6"
@@ -1562,7 +1661,7 @@
   "TARGET_ARM && arm_arch6 && optimize_size"
   "mul%.\\t%0, %2, %1"
   [(set_attr "conds" "set")
-   (set_attr "insn" "muls")]
+   (set_attr "type" "muls")]
 )
 
 ;; Unnamed templates to match MLA instruction.
@@ -1575,7 +1674,7 @@
 	  (match_operand:SI 3 "s_register_operand" "r,r,0,0")))]
   "TARGET_32BIT && !arm_arch6"
   "mla%?\\t%0, %2, %1, %3"
-  [(set_attr "insn" "mla")
+  [(set_attr "type" "mla")
    (set_attr "predicable" "yes")]
 )
 
@@ -1587,8 +1686,9 @@
 	  (match_operand:SI 3 "s_register_operand" "r")))]
   "TARGET_32BIT && arm_arch6"
   "mla%?\\t%0, %2, %1, %3"
-  [(set_attr "insn" "mla")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "mla")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*mulsi3addsi_compare0"
@@ -1605,7 +1705,7 @@
   "TARGET_ARM && arm_arch6"
   "mla%.\\t%0, %2, %1, %3"
   [(set_attr "conds" "set")
-   (set_attr "insn" "mlas")]
+   (set_attr "type" "mlas")]
 )
 
 (define_insn "*mulsi3addsi_compare0_v6"
@@ -1622,7 +1722,7 @@
   "TARGET_ARM && arm_arch6 && optimize_size"
   "mla%.\\t%0, %2, %1, %3"
   [(set_attr "conds" "set")
-   (set_attr "insn" "mlas")]
+   (set_attr "type" "mlas")]
 )
 
 (define_insn "*mulsi3addsi_compare0_scratch"
@@ -1637,7 +1737,7 @@
   "TARGET_ARM && !arm_arch6"
   "mla%.\\t%0, %2, %1, %3"
   [(set_attr "conds" "set")
-   (set_attr "insn" "mlas")]
+   (set_attr "type" "mlas")]
 )
 
 (define_insn "*mulsi3addsi_compare0_scratch_v6"
@@ -1652,7 +1752,7 @@
   "TARGET_ARM && arm_arch6 && optimize_size"
   "mla%.\\t%0, %2, %1, %3"
   [(set_attr "conds" "set")
-   (set_attr "insn" "mlas")]
+   (set_attr "type" "mlas")]
 )
 
 (define_insn "*mulsi3subsi"
@@ -1663,8 +1763,9 @@
 		   (match_operand:SI 1 "s_register_operand" "r"))))]
   "TARGET_32BIT && arm_arch_thumb2"
   "mls%?\\t%0, %2, %1, %3"
-  [(set_attr "insn" "mla")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "mla")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_expand "maddsidi4"
@@ -1686,7 +1787,7 @@
 	 (match_operand:DI 1 "s_register_operand" "0")))]
   "TARGET_32BIT && arm_arch3m && !arm_arch6"
   "smlal%?\\t%Q0, %R0, %3, %2"
-  [(set_attr "insn" "smlal")
+  [(set_attr "type" "smlal")
    (set_attr "predicable" "yes")]
 )
 
@@ -1699,8 +1800,9 @@
 	 (match_operand:DI 1 "s_register_operand" "0")))]
   "TARGET_32BIT && arm_arch6"
   "smlal%?\\t%Q0, %R0, %3, %2"
-  [(set_attr "insn" "smlal")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smlal")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 ;; 32x32->64 widening multiply.
@@ -1725,7 +1827,7 @@
 	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
   "TARGET_32BIT && arm_arch3m && !arm_arch6"
   "smull%?\\t%Q0, %R0, %1, %2"
-  [(set_attr "insn" "smull")
+  [(set_attr "type" "smull")
    (set_attr "predicable" "yes")]
 )
 
@@ -1736,8 +1838,9 @@
 	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
   "TARGET_32BIT && arm_arch6"
   "smull%?\\t%Q0, %R0, %1, %2"
-  [(set_attr "insn" "smull")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_expand "umulsidi3"
@@ -1756,7 +1859,7 @@
 	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
   "TARGET_32BIT && arm_arch3m && !arm_arch6"
   "umull%?\\t%Q0, %R0, %1, %2"
-  [(set_attr "insn" "umull")
+  [(set_attr "type" "umull")
    (set_attr "predicable" "yes")]
 )
 
@@ -1767,8 +1870,9 @@
 	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
   "TARGET_32BIT && arm_arch6"
   "umull%?\\t%Q0, %R0, %1, %2"
-  [(set_attr "insn" "umull")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "umull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_expand "umaddsidi4"
@@ -1790,7 +1894,7 @@
 	 (match_operand:DI 1 "s_register_operand" "0")))]
   "TARGET_32BIT && arm_arch3m && !arm_arch6"
   "umlal%?\\t%Q0, %R0, %3, %2"
-  [(set_attr "insn" "umlal")
+  [(set_attr "type" "umlal")
    (set_attr "predicable" "yes")]
 )
 
@@ -1803,8 +1907,9 @@
 	 (match_operand:DI 1 "s_register_operand" "0")))]
   "TARGET_32BIT && arm_arch6"
   "umlal%?\\t%Q0, %R0, %3, %2"
-  [(set_attr "insn" "umlal")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "umlal")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_expand "smulsi3_highpart"
@@ -1832,7 +1937,7 @@
    (clobber (match_scratch:SI 3 "=&r,&r"))]
   "TARGET_32BIT && arm_arch3m && !arm_arch6"
   "smull%?\\t%3, %0, %2, %1"
-  [(set_attr "insn" "smull")
+  [(set_attr "type" "smull")
    (set_attr "predicable" "yes")]
 )
 
@@ -1847,8 +1952,9 @@
    (clobber (match_scratch:SI 3 "=r"))]
   "TARGET_32BIT && arm_arch6"
   "smull%?\\t%3, %0, %2, %1"
-  [(set_attr "insn" "smull")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_expand "umulsi3_highpart"
@@ -1876,7 +1982,7 @@
    (clobber (match_scratch:SI 3 "=&r,&r"))]
   "TARGET_32BIT && arm_arch3m && !arm_arch6"
   "umull%?\\t%3, %0, %2, %1"
-  [(set_attr "insn" "umull")
+  [(set_attr "type" "umull")
    (set_attr "predicable" "yes")]
 )
 
@@ -1891,8 +1997,9 @@
    (clobber (match_scratch:SI 3 "=r"))]
   "TARGET_32BIT && arm_arch6"
   "umull%?\\t%3, %0, %2, %1"
-  [(set_attr "insn" "umull")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "umull")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "mulhisi3"
@@ -1903,7 +2010,7 @@
 		  (match_operand:HI 2 "s_register_operand" "r"))))]
   "TARGET_DSP_MULTIPLY"
   "smulbb%?\\t%0, %1, %2"
-  [(set_attr "insn" "smulxy")
+  [(set_attr "type" "smulxy")
    (set_attr "predicable" "yes")]
 )
 
@@ -1916,8 +2023,9 @@
 		  (match_operand:HI 2 "s_register_operand" "r"))))]
   "TARGET_DSP_MULTIPLY"
   "smultb%?\\t%0, %1, %2"
-  [(set_attr "insn" "smulxy")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smulxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*mulhisi3bt"
@@ -1929,8 +2037,9 @@
 		  (const_int 16))))]
   "TARGET_DSP_MULTIPLY"
   "smulbt%?\\t%0, %1, %2"
-  [(set_attr "insn" "smulxy")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smulxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*mulhisi3tt"
@@ -1943,8 +2052,9 @@
 		  (const_int 16))))]
   "TARGET_DSP_MULTIPLY"
   "smultt%?\\t%0, %1, %2"
-  [(set_attr "insn" "smulxy")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smulxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "maddhisi4"
@@ -1956,8 +2066,9 @@
 		 (match_operand:SI 3 "s_register_operand" "r")))]
   "TARGET_DSP_MULTIPLY"
   "smlabb%?\\t%0, %1, %2, %3"
-  [(set_attr "insn" "smlaxy")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 ;; Note: there is no maddhisi4ibt because this one is canonical form
@@ -1971,8 +2082,9 @@
 		 (match_operand:SI 3 "s_register_operand" "r")))]
   "TARGET_DSP_MULTIPLY"
   "smlatb%?\\t%0, %1, %2, %3"
-  [(set_attr "insn" "smlaxy")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*maddhisi4tt"
@@ -1986,22 +2098,24 @@
 		 (match_operand:SI 3 "s_register_operand" "r")))]
   "TARGET_DSP_MULTIPLY"
   "smlatt%?\\t%0, %1, %2, %3"
-  [(set_attr "insn" "smlaxy")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "smlaxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
 	  (mult:DI (sign_extend:DI
-	 	    (match_operand:HI 1 "s_register_operand" "r"))
+		    (match_operand:HI 1 "s_register_operand" "r"))
 		   (sign_extend:DI
 		    (match_operand:HI 2 "s_register_operand" "r")))
 	  (match_operand:DI 3 "s_register_operand" "0")))]
   "TARGET_DSP_MULTIPLY"
   "smlalbb%?\\t%Q0, %R0, %1, %2"
-  [(set_attr "insn" "smlalxy")
-   (set_attr "predicable" "yes")])
+  [(set_attr "type" "smlalxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 ;; Note: there is no maddhidi4ibt because this one is canonical form
 (define_insn "*maddhidi4tb"
@@ -2016,8 +2130,9 @@
 	  (match_operand:DI 3 "s_register_operand" "0")))]
   "TARGET_DSP_MULTIPLY"
   "smlaltb%?\\t%Q0, %R0, %1, %2"
-  [(set_attr "insn" "smlalxy")
-   (set_attr "predicable" "yes")])
+  [(set_attr "type" "smlalxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*maddhidi4tt"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -2033,8 +2148,9 @@
 	  (match_operand:DI 3 "s_register_operand" "0")))]
   "TARGET_DSP_MULTIPLY"
   "smlaltt%?\\t%Q0, %R0, %1, %2"
-  [(set_attr "insn" "smlalxy")
-   (set_attr "predicable" "yes")])
+  [(set_attr "type" "smlalxy")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_expand "mulsf3"
   [(set (match_operand:SF          0 "s_register_operand" "")
@@ -2163,29 +2279,28 @@
 )
 
 (define_insn_and_split "*anddi3_insn"
-  [(set (match_operand:DI         0 "s_register_operand"     "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w")
-	(and:DI (match_operand:DI 1 "s_register_operand"     "%0 ,r ,0,r ,w,0 ,0  ,r  ,w ,0")
-		(match_operand:DI 2 "arm_anddi_operand_neon" "r  ,r ,De,De,w,DL,r  ,r  ,w ,DL")))]
+  [(set (match_operand:DI         0 "s_register_operand"     "=w,w ,&r,&r,&r,&r,?w,?w")
+        (and:DI (match_operand:DI 1 "s_register_operand"     "%w,0 ,0 ,r ,0 ,r ,w ,0")
+                (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))]
   "TARGET_32BIT && !TARGET_IWMMXT"
 {
   switch (which_alternative)
     {
-    case 0:
-    case 1:
+    case 0: /* fall through */
+    case 6: return "vand\t%P0, %P1, %P2";
+    case 1: /* fall through */
+    case 7: return neon_output_logic_immediate ("vand", &operands[2],
+                    DImode, 1, VALID_NEON_QREG_MODE (DImode));
     case 2:
-    case 3: /* fall through */
-      return "#";
-    case 4: /* fall through */
-    case 8: return "vand\t%P0, %P1, %P2";
+    case 3:
+    case 4:
     case 5: /* fall through */
-    case 9: return neon_output_logic_immediate ("vand", &operands[2],
-                    DImode, 1, VALID_NEON_QREG_MODE (DImode));
-    case 6: return "#";
-    case 7: return "#";
+      return "#";
     default: gcc_unreachable ();
     }
 }
-  "TARGET_32BIT && !TARGET_IWMMXT"
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
   [(set (match_dup 3) (match_dup 4))
    (set (match_dup 5) (match_dup 6))]
   "
@@ -2201,19 +2316,11 @@
                                            gen_highpart_mode (SImode, DImode, operands[2]));
 
   }"
-  [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
-   (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*,
+  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,
                      avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "length" "8,8,8,8,*,*,8,8,*,*")
-   (set (attr "insn_enabled") (if_then_else
-                                (lt (symbol_ref "which_alternative")
-                                    (const_int 4))
-                                (if_then_else (match_test "!TARGET_NEON")
-                                              (const_string "yes")
-                                              (const_string "no"))
-                                (if_then_else (match_test "TARGET_NEON")
-                                              (const_string "yes")
-                                              (const_string "no"))))]
+   (set_attr "length" "*,*,8,8,8,8,*,*")
+  ]
 )
 
 (define_insn_and_split "*anddi_zesidi_di"
@@ -2399,7 +2506,7 @@
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV (zero_extract:SI
 			  (match_operand:SI 0 "s_register_operand" "r")
-		 	  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 1 "const_int_operand" "n")
 			  (match_operand 2 "const_int_operand" "n"))
 			 (const_int 0)))]
   "TARGET_32BIT
@@ -2415,6 +2522,7 @@
   "
   [(set_attr "conds" "set")
    (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "simple_alu_imm")]
 )
 
@@ -2842,7 +2950,8 @@
   "arm_arch_thumb2"
   "bfc%?\t%0, %2, %1"
   [(set_attr "length" "4")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "insv_t2"
@@ -2853,7 +2962,8 @@
   "arm_arch_thumb2"
   "bfi%?\t%0, %3, %2, %1"
   [(set_attr "length" "4")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 ; constants for op 2 will never be given to these patterns.
@@ -2880,7 +2990,7 @@
   [(set_attr "length" "8")
    (set_attr "predicable" "yes")]
 )
-  
+
 (define_insn_and_split "*anddi_notzesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(and:DI (not:DI (zero_extend:DI
@@ -2905,9 +3015,10 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }"
   [(set_attr "length" "4,8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
-  
+
 (define_insn_and_split "*anddi_notsesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(and:DI (not:DI (sign_extend:DI
@@ -2928,16 +3039,18 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }"
   [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
-  
+
 (define_insn "andsi_notsi_si"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
 		(match_operand:SI 1 "s_register_operand" "r")))]
   "TARGET_32BIT"
   "bic%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "thumb1_bicsi3"
@@ -2997,14 +3110,47 @@
   ""
 )
 
-(define_insn "*iordi3_insn"
-  [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
-	(ior:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
-		(match_operand:DI 2 "s_register_operand"   "r,r")))]
-  "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
-  "#"
-  [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+(define_insn_and_split "*iordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand"     "=w,w ,&r,&r,&r,&r,?w,?w")
+	(ior:DI (match_operand:DI 1 "s_register_operand"     "%w,0 ,0 ,r ,0 ,r ,w ,0")
+		(match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))]
+  "TARGET_32BIT && !TARGET_IWMMXT"
+  {
+  switch (which_alternative)
+    {
+    case 0: /* fall through */
+    case 6: return "vorr\t%P0, %P1, %P2";
+    case 1: /* fall through */
+    case 7: return neon_output_logic_immediate ("vorr", &operands[2],
+		     DImode, 0, VALID_NEON_QREG_MODE (DImode));
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return "#";
+    default: gcc_unreachable ();
+    }
+  }
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+  {
+    operands[3] = gen_lowpart (SImode, operands[0]);
+    operands[5] = gen_highpart (SImode, operands[0]);
+
+    operands[4] = simplify_gen_binary (IOR, SImode,
+                                           gen_lowpart (SImode, operands[1]),
+                                           gen_lowpart (SImode, operands[2]));
+    operands[6] = simplify_gen_binary (IOR, SImode,
+                                           gen_highpart (SImode, operands[1]),
+                                           gen_highpart_mode (SImode, DImode, operands[2]));
+
+  }"
+  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+   (set_attr "length" "*,*,8,8,8,8,*,*")
+   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
 )
 
 (define_insn "*iordi_zesidi_di"
@@ -3017,7 +3163,8 @@
    orr%?\\t%Q0, %Q1, %2
    #"
   [(set_attr "length" "4,8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*iordi_sesidi_di"
@@ -3137,19 +3284,49 @@
 (define_expand "xordi3"
   [(set (match_operand:DI         0 "s_register_operand" "")
 	(xor:DI (match_operand:DI 1 "s_register_operand" "")
-		(match_operand:DI 2 "s_register_operand" "")))]
+		(match_operand:DI 2 "arm_xordi_operand" "")))]
   "TARGET_32BIT"
   ""
 )
 
-(define_insn "*xordi3_insn"
-  [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
-	(xor:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
-		(match_operand:DI 2 "s_register_operand"   "r,r")))]
-  "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
-  "#"
-  [(set_attr "length" "8")
-   (set_attr "predicable" "yes")]
+(define_insn_and_split "*xordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand" "=w,&r,&r,&r,&r,?w")
+	(xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w")
+		(match_operand:DI 2 "arm_xordi_operand"  "w ,r ,r ,Dg,Dg,w")))]
+  "TARGET_32BIT && !TARGET_IWMMXT"
+{
+  switch (which_alternative)
+    {
+    case 1:
+    case 2:
+    case 3:
+    case 4:  /* fall through */
+      return "#";
+    case 0: /* fall through */
+    case 5: return "veor\t%P0, %P1, %P2";
+    default: gcc_unreachable ();
+    }
+}
+  "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+   && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+  {
+    operands[3] = gen_lowpart (SImode, operands[0]);
+    operands[5] = gen_highpart (SImode, operands[0]);
+
+    operands[4] = simplify_gen_binary (XOR, SImode,
+                                           gen_lowpart (SImode, operands[1]),
+                                           gen_lowpart (SImode, operands[2]));
+    operands[6] = simplify_gen_binary (XOR, SImode,
+                                           gen_highpart (SImode, operands[1]),
+                                           gen_highpart_mode (SImode, DImode, operands[2]));
+
+  }"
+  [(set_attr "length" "*,8,8,8,8,*")
+   (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1")
+   (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")]
 )
 
 (define_insn "*xordi_zesidi_di"
@@ -3162,7 +3339,8 @@
    eor%?\\t%Q0, %Q1, %2
    #"
   [(set_attr "length" "4,8")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*xordi_sesidi_di"
@@ -3292,7 +3470,8 @@
   ""
   [(set_attr "length" "8")
    (set_attr "ce_count" "2")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 ; ??? Are these four splitters still beneficial when the Thumb-2 bitfield
@@ -3428,7 +3607,8 @@
 		 (const_int 0)))]
   "TARGET_32BIT"
   "bic%?\\t%0, %1, %1, asr #31"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*smax_m1"
@@ -3437,7 +3617,8 @@
 		 (const_int -1)))]
   "TARGET_32BIT"
   "orr%?\\t%0, %1, %1, asr #31"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn_and_split "*arm_smax_insn"
@@ -3485,7 +3666,8 @@
 		 (const_int 0)))]
   "TARGET_32BIT"
   "and%?\\t%0, %1, %1, asr #31"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn_and_split "*arm_smin_insn"
@@ -4160,6 +4342,7 @@
   "TARGET_32BIT"
   "mvn%?\\t%0, %1%S3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "shift" "1")
    (set_attr "insn" "mvn")
    (set_attr "arch" "32,a")
@@ -4373,6 +4556,7 @@
   [(set_attr "arch" "t2,any")
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
    (set_attr "type" "load1")])
 
 (define_insn "unaligned_loadhis"
@@ -4385,6 +4569,7 @@
   [(set_attr "arch" "t2,any")
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
    (set_attr "type" "load_byte")])
 
 (define_insn "unaligned_loadhiu"
@@ -4397,6 +4582,7 @@
   [(set_attr "arch" "t2,any")
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
    (set_attr "type" "load_byte")])
 
 (define_insn "unaligned_storesi"
@@ -4408,6 +4594,7 @@
   [(set_attr "arch" "t2,any")
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
    (set_attr "type" "store1")])
 
 (define_insn "unaligned_storehi"
@@ -4419,6 +4606,7 @@
   [(set_attr "arch" "t2,any")
    (set_attr "length" "2,4")
    (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
    (set_attr "type" "store1")])
 
 ;; Unaligned double-word load and store.
@@ -4487,7 +4675,8 @@
   "arm_arch_thumb2"
   "sbfx%?\t%0, %1, %3, %2"
   [(set_attr "length" "4")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "extzv_t2"
@@ -4498,7 +4687,8 @@
   "arm_arch_thumb2"
   "ubfx%?\t%0, %1, %3, %2"
   [(set_attr "length" "4")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 
@@ -4510,7 +4700,8 @@
   "TARGET_IDIV"
   "sdiv%?\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "insn" "sdiv")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "sdiv")]
 )
 
 (define_insn "udivsi3"
@@ -4520,7 +4711,8 @@
   "TARGET_IDIV"
   "udiv%?\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "insn" "udiv")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "udiv")]
 )
 
 
@@ -4582,11 +4774,14 @@
 )
 
 (define_insn "*arm_negsi2"
-  [(set (match_operand:SI         0 "s_register_operand" "=r")
-	(neg:SI (match_operand:SI 1 "s_register_operand" "r")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,r")
+	(neg:SI (match_operand:SI 1 "s_register_operand" "l,r")))]
   "TARGET_32BIT"
   "rsb%?\\t%0, %1, #0"
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "4")]
 )
 
 (define_insn "*thumb1_negsi2"
@@ -4904,11 +5099,14 @@
 )
 
 (define_insn "*arm_one_cmplsi2"
-  [(set (match_operand:SI         0 "s_register_operand" "=r")
-	(not:SI (match_operand:SI 1 "s_register_operand"  "r")))]
+  [(set (match_operand:SI         0 "s_register_operand" "=l,r")
+	(not:SI (match_operand:SI 1 "s_register_operand"  "l,r")))]
   "TARGET_32BIT"
   "mvn%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "yes,no")
+   (set_attr "arch" "t2,*")
+   (set_attr "length" "4")
    (set_attr "insn" "mvn")]
 )
 
@@ -5234,7 +5432,8 @@
   "TARGET_INT_SIMD"
   "uxtah%?\\t%0, %2, %1"
   [(set_attr "type" "alu_shift")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_expand "zero_extendqisi2"
@@ -5327,6 +5526,7 @@
   "TARGET_INT_SIMD"
   "uxtab%?\\t%0, %2, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "insn" "xtab")
    (set_attr "type" "alu_shift")]
 )
@@ -5379,7 +5579,8 @@
   "TARGET_32BIT"
   "tst%?\\t%0, #255"
   [(set_attr "conds" "set")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_expand "extendhisi2"
@@ -5565,6 +5766,7 @@
    ldr%(sh%)\\t%0, %1"
   [(set_attr "type" "simple_alu_shift,load_byte")
    (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "pool_range" "*,256")
    (set_attr "neg_pool_range" "*,244")]
 )
@@ -5677,7 +5879,8 @@
   "sxtab%?\\t%0, %2, %1"
   [(set_attr "type" "alu_shift")
    (set_attr "insn" "xtab")
-   (set_attr "predicable" "yes")]
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_split
@@ -6133,6 +6336,7 @@
   "arm_arch_thumb2"
   "movt%?\t%0, #:upper16:%c2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "length" "4")]
 )
 
@@ -7012,26 +7216,28 @@
   "
 )
 
-
 (define_insn "*arm_movqi_insn"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,Uu,r,m")
-	(match_operand:QI 1 "general_operand" "r,I,K,Uu,l,m,r"))]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,r,l,Uu,r,m")
+	(match_operand:QI 1 "general_operand" "r,r,I,Py,K,Uu,l,m,r"))]
   "TARGET_32BIT
    && (   register_operand (operands[0], QImode)
        || register_operand (operands[1], QImode))"
   "@
    mov%?\\t%0, %1
    mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
    mvn%?\\t%0, #%B1
    ldr%(b%)\\t%0, %1
    str%(b%)\\t%1, %0
    ldr%(b%)\\t%0, %1
    str%(b%)\\t%1, %0"
-  [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1")
-   (set_attr "insn" "mov,mov,mvn,*,*,*,*")
+  [(set_attr "type" "*,*,simple_alu_imm,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1")
+   (set_attr "insn" "mov,mov,mov,mov,mvn,*,*,*,*")
    (set_attr "predicable" "yes")
-   (set_attr "arch" "any,any,any,t2,t2,any,any")
-   (set_attr "length" "4,4,4,2,2,4,4")]
+   (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no")
+   (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any")
+   (set_attr "length" "2,4,4,2,4,2,2,4,4")]
 )
 
 (define_insn "*thumb1_movqi_insn"
@@ -8701,7 +8907,7 @@
 
 (define_expand "movsfcc"
   [(set (match_operand:SF 0 "s_register_operand" "")
-	(if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
+	(if_then_else:SF (match_operand 1 "arm_cond_move_operator" "")
 			 (match_operand:SF 2 "s_register_operand" "")
 			 (match_operand:SF 3 "s_register_operand" "")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -8723,7 +8929,7 @@
 
 (define_expand "movdfcc"
   [(set (match_operand:DF 0 "s_register_operand" "")
-	(if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
+	(if_then_else:DF (match_operand 1 "arm_cond_move_operator" "")
 			 (match_operand:DF 2 "s_register_operand" "")
 			 (match_operand:DF 3 "s_register_operand" "")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
@@ -9276,17 +9482,17 @@
   [(set_attr "type" "call")]
 )
 
-(define_expand "return"
-  [(return)]
+(define_expand "<return_str>return"
+  [(returns)]
   "(TARGET_ARM || (TARGET_THUMB2
                    && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
                    && !IS_STACKALIGN (arm_current_func_type ())))
-    && USE_RETURN_INSN (FALSE)"
+    <return_cond_false>"
   "
   {
     if (TARGET_THUMB2)
       {
-        thumb2_expand_return ();
+        thumb2_expand_return (<return_simple_p>);
         DONE;
       }
   }
@@ -9311,13 +9517,13 @@
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*cond_return"
+(define_insn "*cond_<return_str>return"
   [(set (pc)
         (if_then_else (match_operator 0 "arm_comparison_operator"
 		       [(match_operand 1 "cc_register" "") (const_int 0)])
-                      (return)
+                      (returns)
                       (pc)))]
-  "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+  "TARGET_ARM  <return_cond_true>"
   "*
   {
     if (arm_ccfsm_state == 2)
@@ -9325,20 +9531,21 @@
         arm_ccfsm_state += 2;
         return \"\";
       }
-    return output_return_instruction (operands[0], true, false, false);
+    return output_return_instruction (operands[0], true, false,
+				      <return_simple_p>);
   }"
   [(set_attr "conds" "use")
    (set_attr "length" "12")
    (set_attr "type" "load1")]
 )
 
-(define_insn "*cond_return_inverted"
+(define_insn "*cond_<return_str>return_inverted"
   [(set (pc)
         (if_then_else (match_operator 0 "arm_comparison_operator"
 		       [(match_operand 1 "cc_register" "") (const_int 0)])
                       (pc)
-		      (return)))]
-  "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+		      (returns)))]
+  "TARGET_ARM <return_cond_true>"
   "*
   {
     if (arm_ccfsm_state == 2)
@@ -9346,7 +9553,8 @@
         arm_ccfsm_state += 2;
         return \"\";
       }
-    return output_return_instruction (operands[0], true, true, false);
+    return output_return_instruction (operands[0], true, true,
+				      <return_simple_p>);
   }"
   [(set_attr "conds" "use")
    (set_attr "length" "12")
@@ -9908,6 +10116,16 @@
 	(eq:SI (match_operand:SI 1 "s_register_operand" "")
 	       (const_int 0)))
    (clobber (reg:CC CC_REGNUM))]
+  "arm_arch5 && TARGET_32BIT"
+  [(set (match_dup 0) (clz:SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(eq:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && reload_completed"
   [(parallel
     [(set (reg:CC CC_REGNUM)
@@ -9948,7 +10166,7 @@
 	      (set (match_dup 0) (const_int 1)))])
 
 (define_insn_and_split "*compare_scc"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts")
 	(match_operator:SI 1 "arm_comparison_operator"
 	 [(match_operand:SI 2 "s_register_operand" "r,r")
 	  (match_operand:SI 3 "arm_add_operand" "rI,L")]))
@@ -9977,29 +10195,87 @@
 
 ;; Attempt to improve the sequence generated by the compare_scc splitters
 ;; not to use conditional execution.
+
+;; Rd = (eq (reg1) (const_int0))  // ARMv5
+;;	clz Rd, reg1
+;;	lsr Rd, Rd, #5
 (define_peephole2
   [(set (reg:CC CC_REGNUM)
 	(compare:CC (match_operand:SI 1 "register_operand" "")
-		    (match_operand:SI 2 "arm_rhs_operand" "")))
+		    (const_int 0)))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))]
+  "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+  [(set (match_dup 0) (clz:SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+;; Rd = (eq (reg1) (const_int0))  // !ARMv5
+;;	negs Rd, reg1
+;;	adc  Rd, Rd, reg1
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (const_int 0)))
    (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
 	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
    (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
 	      (set (match_dup 0) (const_int 1)))
-   (match_scratch:SI 3 "r")]
-  "TARGET_32BIT"
+   (match_scratch:SI 2 "r")]
+  "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
   [(parallel
     [(set (reg:CC CC_REGNUM)
-	  (compare:CC (match_dup 1) (match_dup 2)))
-     (set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))])
+	  (compare:CC (const_int 0) (match_dup 1)))
+     (set (match_dup 2) (minus:SI (const_int 0) (match_dup 1)))])
+   (set (match_dup 0)
+	(plus:SI (plus:SI (match_dup 1) (match_dup 2))
+		 (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
+)
+
+;; Rd = (eq (reg1) (reg2/imm))	// ARMv5
+;;	sub  Rd, Reg1, reg2
+;;	clz  Rd, Rd
+;;	lsr  Rd, Rd, #5
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "arm_rhs_operand" "")))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))]
+  "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+  [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (clz:SI (match_dup 0)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+
+;; Rd = (eq (reg1) (reg2/imm))	// ! ARMv5
+;;	sub  T1, Reg1, reg2
+;;	negs Rd, T1
+;;	adc  Rd, Rd, T1
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "arm_rhs_operand" "")))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))
+   (match_scratch:SI 3 "r")]
+  "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+  [(set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))
    (parallel
     [(set (reg:CC CC_REGNUM)
 	  (compare:CC (const_int 0) (match_dup 3)))
      (set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))])
-   (parallel
-    [(set (match_dup 0)
-	  (plus:SI (plus:SI (match_dup 0) (match_dup 3))
-		   (geu:SI (reg:CC CC_REGNUM) (const_int 0))))
-     (clobber (reg:CC CC_REGNUM))])])
+   (set (match_dup 0)
+	(plus:SI (plus:SI (match_dup 0) (match_dup 3))
+		 (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
+)
 
 (define_insn "*cond_move"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
@@ -10400,7 +10676,7 @@
 )
 
 (define_insn_and_split "*ior_scc_scc"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
 	(ior:SI (match_operator:SI 3 "arm_comparison_operator"
 		 [(match_operand:SI 1 "s_register_operand" "r")
 		  (match_operand:SI 2 "arm_add_operand" "rIL")])
@@ -10438,7 +10714,7 @@
 			  [(match_operand:SI 4 "s_register_operand" "r")
 			   (match_operand:SI 5 "arm_add_operand" "rIL")]))
 		 (const_int 0)))
-   (set (match_operand:SI 7 "s_register_operand" "=r")
+   (set (match_operand:SI 7 "s_register_operand" "=Ts")
 	(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
 		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
   "TARGET_32BIT"
@@ -10456,7 +10732,7 @@
    (set_attr "length" "16")])
 
 (define_insn_and_split "*and_scc_scc"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
 	(and:SI (match_operator:SI 3 "arm_comparison_operator"
 		 [(match_operand:SI 1 "s_register_operand" "r")
 		  (match_operand:SI 2 "arm_add_operand" "rIL")])
@@ -10496,7 +10772,7 @@
 			  [(match_operand:SI 4 "s_register_operand" "r")
 			   (match_operand:SI 5 "arm_add_operand" "rIL")]))
 		 (const_int 0)))
-   (set (match_operand:SI 7 "s_register_operand" "=r")
+   (set (match_operand:SI 7 "s_register_operand" "=Ts")
 	(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
 		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
   "TARGET_32BIT"
@@ -10518,7 +10794,7 @@
 ;; need only zero the value if false (if true, then the value is already
 ;; correct).
 (define_insn_and_split "*and_scc_scc_nodom"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
+  [(set (match_operand:SI 0 "s_register_operand" "=&Ts,&Ts,&Ts")
 	(and:SI (match_operator:SI 3 "arm_comparison_operator"
 		 [(match_operand:SI 1 "s_register_operand" "r,r,0")
 		  (match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")])
@@ -12095,6 +12371,7 @@
      (const_int 0)])]
   "TARGET_32BIT"
   ""
+[(set_attr "predicated" "yes")]
 )
 
 (define_insn "force_register_use"
@@ -12365,7 +12642,8 @@
                                   false, true))"
   "ldrd%?\t%0, %3, [%1, %2]"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb2_ldrd_base"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -12379,7 +12657,8 @@
                                   operands[1], 0, false, true))"
   "ldrd%?\t%0, %2, [%1]"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb2_ldrd_base_neg"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -12393,7 +12672,8 @@
                                   operands[1], -4, false, true))"
   "ldrd%?\t%0, %2, [%1, #-4]"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb2_strd"
   [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
@@ -12410,7 +12690,8 @@
                                   false, false))"
   "strd%?\t%2, %4, [%0, %1]"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb2_strd_base"
   [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk"))
@@ -12424,7 +12705,8 @@
                                   operands[0], 0, false, false))"
   "strd%?\t%1, %2, [%0]"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb2_strd_base_neg"
   [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
@@ -12438,7 +12720,8 @@
                                   operands[0], -4, false, false))"
   "strd%?\t%1, %2, [%0, #-4]"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 
 ;; Load the load/store double peephole optimizations.
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index afb42421c06..b9ae2b09682 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -239,6 +239,10 @@ mword-relocations
 Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
 Only generate absolute relocations on word sized values.
 
+mrestrict-it
+Target Report Var(arm_restrict_it) Init(2)
+Generate IT blocks appropriate for ARMv8.
+
 mfix-cortex-m3-ldrd
 Target Report Var(fix_cm3_ldrd) Init(2)
 Avoid overlapping destination and address registers on LDRD instructions
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index ab65978aebc..94e8c35f839 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -96,7 +96,7 @@
 ;; until after the memory stage.
 (define_insn_reservation "1020mult1" 2
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "insn" "smulxy,smulwy"))
+      (eq_attr "type" "smulxy,smulwy"))
  "1020a_e,1020a_m,1020a_w")
 
 ;; The "smlaxy" and "smlawx" instructions require two iterations through
@@ -104,7 +104,7 @@
 ;; the execute stage.
 (define_insn_reservation "1020mult2" 2
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
+      (eq_attr "type" "smlaxy,smlalxy,smlawx"))
  "1020a_e*2,1020a_m,1020a_w")
 
 ;; The "smlalxy", "mul", and "mla" instructions require two iterations
@@ -112,7 +112,7 @@
 ;; the memory stage.
 (define_insn_reservation "1020mult3" 3
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "insn" "smlalxy,mul,mla"))
+      (eq_attr "type" "smlalxy,mul,mla"))
  "1020a_e*2,1020a_m,1020a_w")
 
 ;; The "muls" and "mlas" instructions loop in the execute stage for
@@ -120,7 +120,7 @@
 ;; available after three iterations.
 (define_insn_reservation "1020mult4" 3
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "insn" "muls,mlas"))
+      (eq_attr "type" "muls,mlas"))
  "1020a_e*4,1020a_m,1020a_w")
 
 ;; Long multiply instructions that produce two registers of
@@ -135,7 +135,7 @@
 ;; available after the memory cycle.
 (define_insn_reservation "1020mult5" 4
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "insn" "umull,umlal,smull,smlal"))
+      (eq_attr "type" "umull,umlal,smull,smlal"))
  "1020a_e*3,1020a_m,1020a_w")
 
 ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
@@ -143,7 +143,7 @@
 ;; The value result is available after four iterations.
 (define_insn_reservation "1020mult6" 4
  (and (eq_attr "tune" "arm1020e,arm1022e")
-      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+      (eq_attr "type" "umulls,umlals,smulls,smlals"))
  "1020a_e*5,1020a_m,1020a_w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md
index 3fa4bd0c378..67b985ce68e 100644
--- a/gcc/config/arm/arm1026ejs.md
+++ b/gcc/config/arm/arm1026ejs.md
@@ -96,7 +96,7 @@
 ;; until after the memory stage.
 (define_insn_reservation "mult1" 2
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "insn" "smulxy,smulwy"))
+      (eq_attr "type" "smulxy,smulwy"))
  "a_e,a_m,a_w")
 
 ;; The "smlaxy" and "smlawx" instructions require two iterations through
@@ -104,7 +104,7 @@
 ;; the execute stage.
 (define_insn_reservation "mult2" 2
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
+      (eq_attr "type" "smlaxy,smlalxy,smlawx"))
  "a_e*2,a_m,a_w")
 
 ;; The "smlalxy", "mul", and "mla" instructions require two iterations
@@ -112,7 +112,7 @@
 ;; the memory stage.
 (define_insn_reservation "mult3" 3
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "insn" "smlalxy,mul,mla"))
+      (eq_attr "type" "smlalxy,mul,mla"))
  "a_e*2,a_m,a_w")
 
 ;; The "muls" and "mlas" instructions loop in the execute stage for
@@ -120,7 +120,7 @@
 ;; available after three iterations.
 (define_insn_reservation "mult4" 3
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "insn" "muls,mlas"))
+      (eq_attr "type" "muls,mlas"))
  "a_e*4,a_m,a_w")
 
 ;; Long multiply instructions that produce two registers of
@@ -135,7 +135,7 @@
 ;; available after the memory cycle.
 (define_insn_reservation "mult5" 4
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "insn" "umull,umlal,smull,smlal"))
+      (eq_attr "type" "umull,umlal,smull,smlal"))
  "a_e*3,a_m,a_w")
 
 ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
@@ -143,7 +143,7 @@
 ;; The value result is available after four iterations.
 (define_insn_reservation "mult6" 4
  (and (eq_attr "tune" "arm1026ejs")
-      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+      (eq_attr "type" "umulls,umlals,smulls,smlals"))
  "a_e*5,a_m,a_w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md
index b5802e03919..3030182acca 100644
--- a/gcc/config/arm/arm1136jfs.md
+++ b/gcc/config/arm/arm1136jfs.md
@@ -129,13 +129,13 @@
 ;; Multiply and multiply-accumulate results are available after four stages.
 (define_insn_reservation "11_mult1" 4
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "insn" "mul,mla"))
+      (eq_attr "type" "mul,mla"))
  "e_1*2,e_2,e_3,e_wb")
 
 ;; The *S variants set the condition flags, which requires three more cycles.
 (define_insn_reservation "11_mult2" 4
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "insn" "muls,mlas"))
+      (eq_attr "type" "muls,mlas"))
  "e_1*2,e_2,e_3,e_wb")
 
 (define_bypass 3 "11_mult1,11_mult2"
@@ -160,13 +160,13 @@
 ;; the two multiply-accumulate instructions.
 (define_insn_reservation "11_mult3" 5
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "insn" "smull,umull,smlal,umlal"))
+      (eq_attr "type" "smull,umull,smlal,umlal"))
  "e_1*3,e_2,e_3,e_wb*2")
 
 ;; The *S variants set the condition flags, which requires three more cycles.
 (define_insn_reservation "11_mult4" 5
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "insn" "smulls,umulls,smlals,umlals"))
+      (eq_attr "type" "smulls,umulls,smlals,umlals"))
  "e_1*3,e_2,e_3,e_wb*2")
 
 (define_bypass 4 "11_mult3,11_mult4"
@@ -190,7 +190,8 @@
 ;; cycles.
 (define_insn_reservation "11_mult5" 3
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "insn" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx"))
+      (eq_attr "type" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,\
+                       smusd,smusdx,smlsd,smlsdx"))
  "e_1,e_2,e_3,e_wb")
 
 (define_bypass 2 "11_mult5"
@@ -211,14 +212,14 @@
 ;; The same idea, then the 32-bit result is added to a 64-bit quantity.
 (define_insn_reservation "11_mult6" 4
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "insn" "smlalxy"))
+      (eq_attr "type" "smlalxy"))
  "e_1*2,e_2,e_3,e_wb*2")
 
 ;; Signed 32x32 multiply, then the most significant 32 bits are extracted
 ;; and are available after the memory stage.
 (define_insn_reservation "11_mult7" 4
  (and (eq_attr "tune" "arm1136js,arm1136jfs")
-      (eq_attr "insn" "smmul,smmulr"))
+      (eq_attr "type" "smmul,smmulr"))
  "e_1*2,e_2,e_3,e_wb")
 
 (define_bypass 3 "11_mult6,11_mult7"
diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md
index 1fc82d3db7f..4db404e766f 100644
--- a/gcc/config/arm/arm926ejs.md
+++ b/gcc/config/arm/arm926ejs.md
@@ -81,32 +81,32 @@
 
 (define_insn_reservation "9_mult1" 3
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "insn" "smlalxy,mul,mla"))
+      (eq_attr "type" "smlalxy,mul,mla"))
  "e*2,m,w")
 
 (define_insn_reservation "9_mult2" 4
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "insn" "muls,mlas"))
+      (eq_attr "type" "muls,mlas"))
  "e*3,m,w")
 
 (define_insn_reservation "9_mult3" 4
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "insn" "umull,umlal,smull,smlal"))
+      (eq_attr "type" "umull,umlal,smull,smlal"))
  "e*3,m,w")
 
 (define_insn_reservation "9_mult4" 5
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+      (eq_attr "type" "umulls,umlals,smulls,smlals"))
  "e*4,m,w")
 
 (define_insn_reservation "9_mult5" 2
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "insn" "smulxy,smlaxy,smlawx"))
+      (eq_attr "type" "smulxy,smlaxy,smlawx"))
  "e,m,w")
 
 (define_insn_reservation "9_mult6" 3
  (and (eq_attr "tune" "arm926ejs")
-      (eq_attr "insn" "smlalxy"))
+      (eq_attr "type" "smlalxy"))
  "e*2,m,w")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 7e7b3e69e0a..251d4975b7c 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -260,6 +260,18 @@
  (and (match_code "const_int")
       (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
 
+(define_constraint "Df"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn iordi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)")))
+
+(define_constraint "Dg"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn xordi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
@@ -317,6 +329,9 @@
   (and (match_code "const_double")
        (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)")))
 
+(define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS"
+ "For arm_restrict_it the core registers @code{r0}-@code{r7}.  GENERAL_REGS otherwise.")
+
 (define_memory_constraint "Ua"
  "@internal
   An address valid for loading/storing register exclusive"
@@ -346,21 +361,21 @@
   In ARM/Thumb-2 state a valid address for Neon doubleword vector
   load/store instructions."
  (and (match_code "mem")
-      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)")))
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0, true)")))
 
 (define_memory_constraint "Um"
  "@internal
   In ARM/Thumb-2 state a valid address for Neon element and structure
   load/store instructions."
  (and (match_code "mem")
-      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)")))
 
 (define_memory_constraint "Us"
  "@internal
   In ARM/Thumb-2 state a valid address for non-offset loads/stores of
   quad-word values in four ARM registers."
  (and (match_code "mem")
-      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)")))
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1, true)")))
 
 (define_memory_constraint "Uq"
  "@internal
diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md
index f0c1985fab5..981d055c668 100644
--- a/gcc/config/arm/cortex-a15.md
+++ b/gcc/config/arm/cortex-a15.md
@@ -87,28 +87,26 @@
 ;; 32-bit multiplies
 (define_insn_reservation "cortex_a15_mult32" 3
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "mult")
-	    (and (eq_attr "neon_type" "none")
-		 (eq_attr "mul64" "no"))))
+       (and (eq_attr "mul32" "yes")
+	    (eq_attr "neon_type" "none")))
   "ca15_issue1,ca15_mx")
 
 ;; 64-bit multiplies
 (define_insn_reservation "cortex_a15_mult64" 4
   (and (eq_attr "tune" "cortexa15")
-       (and (eq_attr "type" "mult")
-	    (and (eq_attr "neon_type" "none")
-		 (eq_attr "mul64" "yes"))))
+       (and (eq_attr "mul64" "yes")
+	    (eq_attr "neon_type" "none")))
   "ca15_issue1,ca15_mx*2")
 
 ;; Integer divide
 (define_insn_reservation "cortex_a15_udiv" 9
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "insn" "udiv"))
+       (eq_attr "type" "udiv"))
   "ca15_issue1,ca15_mx")
 
 (define_insn_reservation "cortex_a15_sdiv" 10
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "insn" "sdiv"))
+       (eq_attr "type" "sdiv"))
   "ca15_issue1,ca15_mx")
 
 ;; Block all issue pipes for a cycle
diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md
index 41a2c37e8fa..963d5babd7b 100644
--- a/gcc/config/arm/cortex-a5.md
+++ b/gcc/config/arm/cortex-a5.md
@@ -80,7 +80,8 @@
 
 (define_insn_reservation "cortex_a5_mul" 2
   (and (eq_attr "tune" "cortexa5")
-       (eq_attr "type" "mult"))
+       (ior (eq_attr "mul32" "yes")
+	    (eq_attr "mul64" "yes")))
   "cortex_a5_ex1")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index b6a291e017b..e67fe55ecd3 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -89,7 +89,8 @@
 
 (define_insn_reservation "cortex_a53_mul" 3
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "type" "mult"))
+       (ior (eq_attr "mul32" "yes")
+            (eq_attr "mul64" "yes")))
   "cortex_a53_single_issue")
 
 ;; A multiply with a single-register result or an MLA, followed by an
@@ -103,12 +104,12 @@
 ;; Punt with a high enough latency for divides.
 (define_insn_reservation "cortex_a53_udiv" 8
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "insn" "udiv"))
+       (eq_attr "type" "udiv"))
   "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
 
 (define_insn_reservation "cortex_a53_sdiv" 9
   (and (eq_attr "tune" "cortexa53")
-       (eq_attr "insn" "sdiv"))
+       (eq_attr "type" "sdiv"))
   "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
 
 
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
index 3750f74f2c6..960174fb90a 100644
--- a/gcc/config/arm/cortex-a7.md
+++ b/gcc/config/arm/cortex-a7.md
@@ -127,8 +127,9 @@
 
 (define_insn_reservation "cortex_a7_mul" 2
   (and (eq_attr "tune" "cortexa7")
-       (and (eq_attr "type" "mult")
-            (eq_attr "neon_type" "none")))
+       (and (eq_attr "neon_type" "none")
+            (ior (eq_attr "mul32" "yes")
+                 (eq_attr "mul64" "yes"))))
   "cortex_a7_both")
 
 ;; Forward the result of a multiply operation to the accumulator 
@@ -140,7 +141,7 @@
 ;; The latency depends on the operands, so we use an estimate here.
 (define_insn_reservation "cortex_a7_idiv" 5
   (and (eq_attr "tune" "cortexa7")
-       (eq_attr "insn" "udiv,sdiv"))
+       (eq_attr "type" "udiv,sdiv"))
   "cortex_a7_both*5")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
index bd1132a18c3..8d3e98734ce 100644
--- a/gcc/config/arm/cortex-a8.md
+++ b/gcc/config/arm/cortex-a8.md
@@ -139,22 +139,22 @@
 
 (define_insn_reservation "cortex_a8_mul" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "insn" "mul,smulxy,smmul"))
+       (eq_attr "type" "mul,smulxy,smmul"))
   "cortex_a8_multiply_2")
 
 (define_insn_reservation "cortex_a8_mla" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
+       (eq_attr "type" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
   "cortex_a8_multiply_2")
 
 (define_insn_reservation "cortex_a8_mull" 7
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))
+       (eq_attr "type" "smull,umull,smlal,umlal,umaal,smlalxy"))
   "cortex_a8_multiply_3")
 
 (define_insn_reservation "cortex_a8_smulwy" 5
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "insn" "smulwy,smuad,smusd"))
+       (eq_attr "type" "smulwy,smuad,smusd"))
   "cortex_a8_multiply")
 
 ;; smlald and smlsld are multiply-accumulate instructions but do not
@@ -162,7 +162,7 @@
 ;; cannot go in cortex_a8_mla above.  (See below for bypass details.)
 (define_insn_reservation "cortex_a8_smlald" 6
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "insn" "smlald,smlsld"))
+       (eq_attr "type" "smlald,smlsld"))
   "cortex_a8_multiply_2")
 
 ;; A multiply with a single-register result or an MLA, followed by an
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
index abbaa8d4e1e..05c114dc366 100644
--- a/gcc/config/arm/cortex-a9.md
+++ b/gcc/config/arm/cortex-a9.md
@@ -130,29 +130,29 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
 ;; We get 16*16 multiply / mac results in 3 cycles.
 (define_insn_reservation "cortex_a9_mult16" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "insn" "smulxy"))
+       (eq_attr "type" "smulxy"))
        "cortex_a9_mult16")
 
 ;; The 16*16 mac is slightly different that it
 ;; reserves M1 and M2 in the same cycle.
 (define_insn_reservation "cortex_a9_mac16" 3
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "insn" "smlaxy"))
+       (eq_attr "type" "smlaxy"))
   "cortex_a9_mac16")
 
 (define_insn_reservation "cortex_a9_multiply" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "insn" "mul,smmul,smmulr"))
+       (eq_attr "type" "mul,smmul,smmulr"))
        "cortex_a9_mult")
 
 (define_insn_reservation "cortex_a9_mac" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "insn" "mla,smmla"))
+       (eq_attr "type" "mla,smmla"))
        "cortex_a9_mac")
 
 (define_insn_reservation "cortex_a9_multiply_long" 5
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
+       (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
        "cortex_a9_mult_long")
 
 ;; An instruction with a result in E2 can be forwarded
diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md
index 47b03644f73..dc3a3299572 100644
--- a/gcc/config/arm/cortex-m4.md
+++ b/gcc/config/arm/cortex-m4.md
@@ -31,7 +31,10 @@
 ;; ALU and multiply is one cycle.
 (define_insn_reservation "cortex_m4_alu" 1
   (and (eq_attr "tune" "cortexm4")
-       (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg,mult"))
+       (ior (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,\
+                             alu_shift,alu_shift_reg")
+	    (ior (eq_attr "mul32" "yes")
+		 (eq_attr "mul64" "yes"))))
   "cortex_m4_ex")
 
 ;; Byte, half-word and word load is two cycles.
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 84e4a3a1e60..6d37079f2b3 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -128,32 +128,32 @@
 
 (define_insn_reservation "cortex_r4_mul_4" 4
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "mul,smmul"))
+       (eq_attr "type" "mul,smmul"))
   "cortex_r4_mul_2")
 
 (define_insn_reservation "cortex_r4_mul_3" 3
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
+       (eq_attr "type" "smulxy,smulwy,smuad,smusd"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_mla_4" 4
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "mla,smmla"))
+       (eq_attr "type" "mla,smmla"))
   "cortex_r4_mul_2")
 
 (define_insn_reservation "cortex_r4_mla_3" 3
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
+       (eq_attr "type" "smlaxy,smlawy,smlad,smlsd"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_smlald" 3
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "smlald,smlsld"))
+       (eq_attr "type" "smlald,smlsld"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_mull" 4
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "smull,umull,umlal,umaal"))
+       (eq_attr "type" "smull,umull,umlal,umaal"))
   "cortex_r4_mul_2")
 
 ;; A multiply or an MLA with a single-register result, followed by an
@@ -196,12 +196,12 @@
 ;; This gives a latency of nine for udiv and ten for sdiv.
 (define_insn_reservation "cortex_r4_udiv" 9
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "udiv"))
+       (eq_attr "type" "udiv"))
   "cortex_r4_div_9")
 
 (define_insn_reservation "cortex_r4_sdiv" 10
   (and (eq_attr "tune_cortexr4" "yes")
-       (eq_attr "insn" "sdiv"))
+       (eq_attr "type" "sdiv"))
   "cortex_r4_div_10")
 
 ;; Branches.  We assume correct prediction.
diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md
index e03894aa61c..efc6a1db959 100644
--- a/gcc/config/arm/fa526.md
+++ b/gcc/config/arm/fa526.md
@@ -76,12 +76,12 @@
 
 (define_insn_reservation "526_mult1" 2
  (and (eq_attr "tune" "fa526")
-      (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy"))
+      (eq_attr "type" "smlalxy,smulxy,smlaxy,smlalxy"))
  "fa526_core")
 
 (define_insn_reservation "526_mult2" 5
  (and (eq_attr "tune" "fa526")
-      (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
+      (eq_attr "type" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
                        umlals,smulls,smlals,smlawx"))
  "fa526_core*4")
 
diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md
index d53617a78e3..dec26c5c3ac 100644
--- a/gcc/config/arm/fa606te.md
+++ b/gcc/config/arm/fa606te.md
@@ -71,22 +71,22 @@
 
 (define_insn_reservation "606te_mult1" 2
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "insn" "smlalxy"))
+      (eq_attr "type" "smlalxy"))
  "fa606te_core")
 
 (define_insn_reservation "606te_mult2" 3
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy"))
+      (eq_attr "type" "smlaxy,smulxy,smulwy,smlawy"))
  "fa606te_core*2")
 
 (define_insn_reservation "606te_mult3" 4
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "insn" "mul,mla,muls,mlas"))
+      (eq_attr "type" "mul,mla,muls,mlas"))
  "fa606te_core*3")
 
 (define_insn_reservation "606te_mult4" 5
  (and (eq_attr "tune" "fa606te")
-      (eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
+      (eq_attr "type" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
  "fa606te_core*4")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md
index 690cb46d878..818ad607b47 100644
--- a/gcc/config/arm/fa626te.md
+++ b/gcc/config/arm/fa626te.md
@@ -82,22 +82,22 @@
 
 (define_insn_reservation "626te_mult1" 2
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
+      (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
  "fa626te_core")
 
 (define_insn_reservation "626te_mult2" 2
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "insn" "mul,mla"))
+      (eq_attr "type" "mul,mla"))
  "fa626te_core")
 
 (define_insn_reservation "626te_mult3" 3
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+      (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
  "fa626te_core*2")
 
 (define_insn_reservation "626te_mult4" 4
  (and (eq_attr "tune" "fa626,fa626te")
-      (eq_attr "insn" "smulls,smlals,umulls,umlals"))
+      (eq_attr "type" "smulls,smlals,umulls,umlals"))
  "fa626te_core*3")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md
index 07ab018f667..8790b035aa5 100644
--- a/gcc/config/arm/fa726te.md
+++ b/gcc/config/arm/fa726te.md
@@ -115,7 +115,7 @@
 
 (define_insn_reservation "726te_mult_op" 3
  (and (eq_attr "tune" "fa726te")
-      (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
+      (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
                        umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
  "fa726te_issue+fa726te_mac_pipe")
 
diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md
index 8691450c3c7..f3b7dadcba2 100644
--- a/gcc/config/arm/fmp626.md
+++ b/gcc/config/arm/fmp626.md
@@ -77,22 +77,22 @@
 
 (define_insn_reservation "mp626_mult1" 2
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
+      (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
  "fmp626_core")
 
 (define_insn_reservation "mp626_mult2" 2
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "insn" "mul,mla"))
+      (eq_attr "type" "mul,mla"))
  "fmp626_core")
 
 (define_insn_reservation "mp626_mult3" 3
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+      (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
  "fmp626_core*2")
 
 (define_insn_reservation "mp626_mult4" 4
  (and (eq_attr "tune" "fmp626")
-      (eq_attr "insn" "smulls,smlals,umulls,umlals"))
+      (eq_attr "type" "smulls,smlals,umulls,umlals"))
  "fmp626_core*3")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b3ad42b376f..d84929f3d1f 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -496,3 +496,11 @@
 (define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
                                 (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
                                 (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
+;; Both kinds of return insn.
+(define_code_iterator returns [return simple_return])
+(define_code_attr return_str [(return "") (simple_return "simple_")])
+(define_code_attr return_simple_p [(return "false") (simple_return "true")])
+(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
+                               (simple_return " && use_simple_return_p ()")])
+(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)")
+                               (simple_return " && use_simple_return_p ()")])
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 8ebdfc81761..ad137d492e4 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -37,7 +37,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -74,7 +75,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm4_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -108,7 +110,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(ia%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm4_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -125,7 +128,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_stm4_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -302,7 +306,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(db%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*ldm4_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -323,7 +328,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "ldm%(db%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "load4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm4_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -338,7 +344,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(db%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm4_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -355,7 +362,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stm%(db%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -477,7 +485,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(ia%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm3_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -508,7 +517,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm3_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -537,7 +547,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(ia%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm3_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -552,7 +563,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_stm3_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -704,7 +716,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(db%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*ldm3_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -722,7 +735,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "ldm%(db%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "load3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm3_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -735,7 +749,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(db%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm3_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -750,7 +765,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(db%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
@@ -855,7 +871,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "ldm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm2_ia"
   [(match_parallel 0 "load_multiple_operation"
@@ -880,7 +897,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_ldm2_ia_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -904,7 +922,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm2_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -917,7 +936,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*thumb_stm2_ia_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -1044,7 +1064,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "ldm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*ldm2_db_update"
   [(match_parallel 0 "load_multiple_operation"
@@ -1059,7 +1080,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "ldm%(db%)\t%3!, {%1, %2}"
   [(set_attr "type" "load2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm2_db"
   [(match_parallel 0 "store_multiple_operation"
@@ -1070,7 +1092,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "*stm2_db_update"
   [(match_parallel 0 "store_multiple_operation"
@@ -1083,7 +1106,8 @@
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(db%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
-   (set_attr "predicable" "yes")])
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_peephole2
   [(set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/config/arm/marvell-pj4.md b/gcc/config/arm/marvell-pj4.md
index 39f4c584515..4004fa59409 100644
--- a/gcc/config/arm/marvell-pj4.md
+++ b/gcc/config/arm/marvell-pj4.md
@@ -95,10 +95,14 @@
                  "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp")
 
 (define_insn_reservation "pj4_ir_mul" 3
-  (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "mult")) "pj4_is,pj4_mul,nothing*2,pj4_cp")
+  (and (eq_attr "tune" "marvell_pj4")
+       (ior (eq_attr "mul32" "yes")
+            (eq_attr "mul64" "yes")))
+                     "pj4_is,pj4_mul,nothing*2,pj4_cp")
 
 (define_insn_reservation "pj4_ir_div" 20
-  (and (eq_attr "tune" "marvell_pj4") (eq_attr "insn" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp")
+  (and (eq_attr "tune" "marvell_pj4") 
+       (eq_attr "type" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp")
 
 ;; Branches and calls.
 
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index f91a6f7d08b..2761adb286a 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -241,8 +241,8 @@
 })
 
 (define_expand "movmisalign<mode>"
-  [(set (match_operand:VDQX 0 "neon_struct_or_register_operand")
-	(unspec:VDQX [(match_operand:VDQX 1 "neon_struct_or_register_operand")]
+  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
+	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
 		     UNSPEC_MISALIGNED_ACCESS))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
 {
@@ -255,7 +255,7 @@
 })
 
 (define_insn "*movmisalign<mode>_neon_store"
-  [(set (match_operand:VDX 0 "neon_struct_operand"	       "=Um")
+  [(set (match_operand:VDX 0 "neon_permissive_struct_operand"	"=Um")
 	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
 		    UNSPEC_MISALIGNED_ACCESS))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
@@ -263,15 +263,16 @@
   [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
 
 (define_insn "*movmisalign<mode>_neon_load"
-  [(set (match_operand:VDX 0 "s_register_operand"		"=w")
-	(unspec:VDX [(match_operand:VDX 1 "neon_struct_operand" " Um")]
+  [(set (match_operand:VDX 0 "s_register_operand"			"=w")
+	(unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
+									" Um")]
 		    UNSPEC_MISALIGNED_ACCESS))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
   "vld1.<V_sz_elem>\t{%P0}, %A1"
   [(set_attr "neon_type" "neon_vld1_1_2_regs")])
 
 (define_insn "*movmisalign<mode>_neon_store"
-  [(set (match_operand:VQX 0 "neon_struct_operand"	       "=Um")
+  [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
 	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
 		    UNSPEC_MISALIGNED_ACCESS))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
@@ -279,8 +280,9 @@
   [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
 
 (define_insn "*movmisalign<mode>_neon_load"
-  [(set (match_operand:VQX 0 "s_register_operand"	         "=w")
-	(unspec:VQX [(match_operand:VQX 1 "neon_struct_operand" " Um")]
+  [(set (match_operand:VQX 0 "s_register_operand"			"=w")
+	(unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
+									" Um")]
 		    UNSPEC_MISALIGNED_ACCESS))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
   "vld1.<V_sz_elem>\t{%q0}, %A1"
@@ -679,29 +681,6 @@
   [(set_attr "neon_type" "neon_int_1")]
 )
 
-(define_insn "iordi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
-        (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
-		(match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))]
-  "TARGET_NEON"
-{
-  switch (which_alternative)
-    {
-    case 0: /* fall through */
-    case 4: return "vorr\t%P0, %P1, %P2";
-    case 1: /* fall through */
-    case 5: return neon_output_logic_immediate ("vorr", &operands[2],
-		     DImode, 0, VALID_NEON_QREG_MODE (DImode));
-    case 2: return "#";
-    case 3: return "#";
-    default: gcc_unreachable ();
-    }
-}
-  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
-   (set_attr "length" "*,*,8,8,*,*")
-   (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
-)
-
 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
 ;; vorr. We support the pseudo-instruction vand instead, because that
 ;; corresponds to the canonical form the middle-end expects to use for
@@ -805,21 +784,6 @@
   [(set_attr "neon_type" "neon_int_1")]
 )
 
-(define_insn "xordi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w")
-	        (match_operand:DI 2 "s_register_operand" "w,r,r,w")))]
-  "TARGET_NEON"
-  "@
-   veor\t%P0, %P1, %P2
-   #
-   #
-   veor\t%P0, %P1, %P2"
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
-)
-
 (define_insn "one_cmpl<mode>2"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
@@ -5617,7 +5581,7 @@
    (match_operand:SI 3 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  emit_insn (gen_ior<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+  emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -5628,7 +5592,7 @@
    (match_operand:SI 3 "immediate_operand" "")]
   "TARGET_NEON"
 {
-  emit_insn (gen_xor<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+  emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
   DONE;
 })
 
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92de9fe8bd9..f4a4515fa39 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -42,6 +42,17 @@
   (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
        (match_operand 0 "s_register_operand")))
 
+(define_predicate "imm_for_neon_logic_operand"
+  (match_code "const_vector")
+{
+  return (TARGET_NEON
+          && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
+})
+
+(define_predicate "neon_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_logic_operand")
+       (match_operand 0 "s_register_operand")))
+
 ;; Any hard register.
 (define_predicate "arm_hard_register_operand"
   (match_code "reg")
@@ -162,6 +173,17 @@
 	    (match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
        (match_operand 0 "neon_inv_logic_op2")))
 
+(define_predicate "arm_iordi_operand_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)"))
+       (match_operand 0 "neon_logic_op2")))
+
+(define_predicate "arm_xordi_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)"))))
+
 (define_predicate "arm_adddi_operand"
   (ior (match_operand 0 "s_register_operand")
        (and (match_code "const_int")
@@ -299,6 +321,12 @@
                     || maybe_get_arm_condition_code (op) == ARM_NE
                     || maybe_get_arm_condition_code (op) == ARM_VC")))
 
+(define_special_predicate "arm_cond_move_operator"
+  (if_then_else (match_test "arm_restrict_it")
+                (and (match_test "TARGET_FPU_ARMV8")
+                     (match_operand 0 "arm_vsel_comparison_operator"))
+                (match_operand 0 "expandable_comparison_operator")))
+
 (define_special_predicate "noov_comparison_operator"
   (match_code "lt,ge,eq,ne"))
 
@@ -535,17 +563,6 @@
   (ior (match_operand 0 "s_register_operand")
        (match_operand 0 "imm_for_neon_rshift_operand")))
 
-(define_predicate "imm_for_neon_logic_operand"
-  (match_code "const_vector")
-{
-  return (TARGET_NEON
-          && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
-})
-
-(define_predicate "neon_logic_op2"
-  (ior (match_operand 0 "imm_for_neon_logic_operand")
-       (match_operand 0 "s_register_operand")))
-
 ;; Predicates for named expanders that overlap multiple ISAs.
 
 (define_predicate "cmpdi_operand"
@@ -623,10 +640,14 @@
 
 (define_predicate "neon_struct_operand"
   (and (match_code "mem")
-       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)")))
+
+(define_predicate "neon_permissive_struct_operand"
+  (and (match_code "mem")
+       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, false)")))
 
-(define_predicate "neon_struct_or_register_operand"
-  (ior (match_operand 0 "neon_struct_operand")
+(define_predicate "neon_perm_struct_or_reg_operand"
+  (ior (match_operand 0 "neon_permissive_struct_operand")
        (match_operand 0 "s_register_operand")))
 
 (define_special_predicate "add_operator"
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 980234836c9..8f7bd71c317 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -124,7 +124,8 @@
 		   UNSPEC_LL))]
   "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN"
   "ldrexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "s_register_operand" "")		;; bool out
@@ -361,7 +362,8 @@
 	    VUNSPEC_LL)))]
   "TARGET_HAVE_LDREXBH"
   "ldrex<sync_sfx>%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_acquire_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -371,7 +373,8 @@
 	    VUNSPEC_LAX)))]
   "TARGET_HAVE_LDACQ"
   "ldaex<sync_sfx>%?\\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_exclusivesi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -380,7 +383,8 @@
 	  VUNSPEC_LL))]
   "TARGET_HAVE_LDREX"
   "ldrex%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_acquire_exclusivesi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -389,7 +393,8 @@
 	  VUNSPEC_LAX))]
   "TARGET_HAVE_LDACQ"
   "ldaex%?\t%0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_exclusivedi"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -398,7 +403,8 @@
 	  VUNSPEC_LL))]
   "TARGET_HAVE_LDREXD"
   "ldrexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_load_acquire_exclusivedi"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -407,7 +413,8 @@
 	  VUNSPEC_LAX))]
   "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
   "ldaexd%?\t%0, %H0, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_store_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -431,7 +438,8 @@
       }
     return "strex<sync_sfx>%?\t%0, %2, %C1";
   }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_store_release_exclusivedi"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -448,7 +456,8 @@
     operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
     return "stlexd%?\t%0, %2, %3, %C1";
   }
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
 
 (define_insn "arm_store_release_exclusive<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -459,4 +468,5 @@
 	  VUNSPEC_SLX))]
   "TARGET_HAVE_LDACQ"
   "stlex<sync_sfx>%?\t%0, %2, %C1"
-  [(set_attr "predicable" "yes")])
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")])
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index fe075e5862a..246f0f5b540 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -78,8 +78,8 @@ $(srcdir)/config/arm/arm-tables.opt: $(srcdir)/config/arm/genopt.sh \
 	$(SHELL) $(srcdir)/config/arm/genopt.sh $(srcdir)/config/arm > \
 		$(srcdir)/config/arm/arm-tables.opt
 
-arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
-  $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
   insn-config.h conditions.h output.h dumpfile.h \
   $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
   $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index ca4eedb037b..cd5837480b8 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -267,8 +267,8 @@
 ;; regs.  The high register alternatives are not taken into account when
 ;; choosing register preferences in order to reflect their expense.
 (define_insn "*thumb2_movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m")
-	(match_operand:SI 1 "general_operand"	   "rk ,I,K,j,mi,*mi,l,*hk"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m")
+	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,*mi,l,*hk"))]
   "TARGET_THUMB2 && ! TARGET_IWMMXT
    && !(TARGET_HARD_FLOAT && TARGET_VFP)
    && (   register_operand (operands[0], SImode)
@@ -276,16 +276,19 @@
   "@
    mov%?\\t%0, %1
    mov%?\\t%0, %1
+   mov%?\\t%0, %1
    mvn%?\\t%0, #%B1
    movw%?\\t%0, %1
    ldr%?\\t%0, %1
    ldr%?\\t%0, %1
    str%?\\t%1, %0
    str%?\\t%1, %0"
-  [(set_attr "type" "*,*,simple_alu_imm,*,load1,load1,store1,store1")
+  [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,simple_alu_imm,*,load1,load1,store1,store1")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4")
    (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,*,*,*,1018,4094,*,*")
-   (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")]
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
+   (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")]
 )
 
 (define_insn "tls_load_dot_plus_four"
@@ -390,26 +393,32 @@
 )
 
 (define_insn_and_split "*thumb2_movsicc_insn"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,l")
 	(if_then_else:SI
 	 (match_operator 3 "arm_comparison_operator"
 	  [(match_operand 4 "cc_register" "") (const_int 0)])
-	 (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K")
-	 (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))]
+	 (match_operand:SI 1 "arm_not_operand" "0 ,Py,0 ,0,rI,K,rI,rI,K ,K,r,lPy")
+	 (match_operand:SI 2 "arm_not_operand" "Py,0 ,rI,K,0 ,0,rI,K ,rI,K,r,lPy")))]
   "TARGET_THUMB2"
   "@
    it\\t%D3\;mov%D3\\t%0, %2
+   it\\t%d3\;mov%d3\\t%0, %1
+   it\\t%D3\;mov%D3\\t%0, %2
    it\\t%D3\;mvn%D3\\t%0, #%B2
    it\\t%d3\;mov%d3\\t%0, %1
    it\\t%d3\;mvn%d3\\t%0, #%B1
    #
    #
    #
+   #
+   #
    #"
-   ; alt 4: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
-   ; alt 5: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
-   ; alt 6: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
-   ; alt 7: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
+   ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+   ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
+   ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
+   ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
+   ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+   ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2"
   "&& reload_completed"
   [(const_int 0)]
   {
@@ -440,7 +449,8 @@
                                                operands[2])));
     DONE;
   }
-  [(set_attr "length" "6,6,6,6,10,10,10,10")
+  [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6,6")
+   (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes,yes")
    (set_attr "conds" "use")]
 )
 
@@ -491,29 +501,30 @@
 
 
 (define_insn_and_split "*thumb2_and_scc"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
 	(and:SI (match_operator:SI 1 "arm_comparison_operator"
 		 [(match_operand 2 "cc_register" "") (const_int 0)])
 		(match_operand:SI 3 "s_register_operand" "r")))]
   "TARGET_THUMB2"
-  "#"   ; "ite\\t%D1\;mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1"
+  "#"   ; "and\\t%0, %3, #1\;it\\t%D1\;mov%D1\\t%0, #0"
   "&& reload_completed"
-  [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0)))
-   (cond_exec (match_dup 4) (set (match_dup 0)
-                                 (and:SI (match_dup 3) (const_int 1))))]
+  [(set (match_dup 0)
+        (and:SI (match_dup 3) (const_int 1)))
+   (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))]
   {
     enum machine_mode mode = GET_MODE (operands[2]);
     enum rtx_code rc = GET_CODE (operands[1]);
 
-    operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
     if (mode == CCFPmode || mode == CCFPEmode)
       rc = reverse_condition_maybe_unordered (rc);
     else
       rc = reverse_condition (rc);
-    operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+    operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
   }
   [(set_attr "conds" "use")
-   (set_attr "length" "10")]
+   (set (attr "length") (if_then_else (match_test "arm_restrict_it")
+                                      (const_int 8)
+                                      (const_int 10)))]
 )
 
 (define_insn_and_split "*thumb2_ior_scc"
@@ -649,7 +660,7 @@
 )
 
 (define_insn_and_split "*thumb2_negscc"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(set (match_operand:SI 0 "s_register_operand" "=Ts")
 	(neg:SI (match_operator 3 "arm_comparison_operator"
 		 [(match_operand:SI 1 "s_register_operand" "r")
 		  (match_operand:SI 2 "arm_rhs_operand" "rI")])))
@@ -671,7 +682,7 @@
                                                   GEN_INT (31))));
         DONE;
       }
-    else if (GET_CODE (operands[3]) == NE)
+    else if (GET_CODE (operands[3]) == NE && !arm_restrict_it)
       {
         /* Emit subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0 */
         if (CONST_INT_P (operands[2]))
@@ -691,7 +702,7 @@
       }
     else
       {
-       /* Emit:  cmp\\t%1, %2\;ite\\t%D3\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */
+       /* Emit:  cmp\\t%1, %2\;mvn\\t%0, #0\;it\\t%D3\;mov%D3\\t%0, #0\;*/
        enum rtx_code rc = reverse_condition (GET_CODE (operands[3]));
        enum machine_mode mode = SELECT_CC_MODE (rc, operands[1], operands[2]);
        rtx tmp1 = gen_rtx_REG (mode, CC_REGNUM);
@@ -699,21 +710,15 @@
        emit_insn (gen_rtx_SET (VOIDmode,
                                cc_reg,
                                gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
+
+       emit_insn (gen_rtx_SET (VOIDmode, operands[0], GEN_INT (~0)));
+
        emit_insn (gen_rtx_COND_EXEC (VOIDmode,
                                      gen_rtx_fmt_ee (rc,
                                                      VOIDmode,
                                                      tmp1,
                                                      const0_rtx),
                                      gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
-       rc = GET_CODE (operands[3]);
-       emit_insn (gen_rtx_COND_EXEC (VOIDmode,
-                                     gen_rtx_fmt_ee (rc,
-                                                     VOIDmode,
-                                                     tmp1,
-                                                     const0_rtx),
-                                     gen_rtx_SET (VOIDmode,
-                                                  operands[0],
-                                                  GEN_INT (~0))));
        DONE;
       }
     FAIL;
@@ -1063,7 +1068,7 @@
   "mul%!\\t%0, %2, %0"
   [(set_attr "predicable" "yes")
    (set_attr "length" "2")
-   (set_attr "insn" "muls")])
+   (set_attr "type" "muls")])
 
 (define_insn "*thumb2_mulsi_short_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
@@ -1076,7 +1081,7 @@
   "TARGET_THUMB2 && optimize_size"
   "muls\\t%0, %2, %0"
   [(set_attr "length" "2")
-   (set_attr "insn" "muls")])
+   (set_attr "type" "muls")])
 
 (define_insn "*thumb2_mulsi_short_compare0_scratch"
   [(set (reg:CC_NOOV CC_REGNUM)
@@ -1088,7 +1093,7 @@
   "TARGET_THUMB2 && optimize_size"
   "muls\\t%0, %2, %0"
   [(set_attr "length" "2")
-   (set_attr "insn" "muls")])
+   (set_attr "type" "muls")])
 
 (define_insn "*thumb2_cbz"
   [(set (pc) (if_then_else
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 1930cddb835..9ac887e9b19 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -18,31 +18,6 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.  */
 
-;; The VFP "type" attributes differ from those used in the FPA model.
-;; fcpys	Single precision cpy.
-;; ffariths	Single precision abs, neg.
-;; ffarithd	Double precision abs, neg, cpy.
-;; fadds	Single precision add/sub.
-;; faddd	Double precision add/sub.
-;; fconsts	Single precision load immediate.
-;; fconstd	Double precision load immediate.
-;; fcmps	Single precision comparison.
-;; fcmpd	Double precision comparison.
-;; fmuls	Single precision multiply.
-;; fmuld	Double precision multiply.
-;; fmacs	Single precision multiply-accumulate.
-;; fmacd	Double precision multiply-accumulate.
-;; ffmas	Single precision fused multiply-accumulate.
-;; ffmad	Double precision fused multiply-accumulate.
-;; fdivs	Single precision sqrt or division.
-;; fdivd	Double precision sqrt or division.
-;; f_flag	fmstat operation
-;; f_load[sd]	Floating point load from memory.
-;; f_store[sd]	Floating point store to memory.
-;; f_2_r	Transfer vfp to arm reg.
-;; r_2_f	Transfer arm to vfp reg.
-;; f_cvt	Convert floating<->integral
-
 ;; SImode moves
 ;; ??? For now do not allow loading constants into vfp regs.  This causes
 ;; problems because small constants get converted into adds.
@@ -87,45 +62,52 @@
 
 ;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
 ;; high/low register alternatives for loads and stores here.
+;; The l/Py alternative should come after r/I to ensure that the short variant
+;; is chosen with length 2 when the instruction is predicated for
+;; arm_restrict_it.
 (define_insn "*thumb2_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t,  *Uv")
-	(match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t,  *Uv")
+	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
   "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
   "*
   switch (which_alternative)
     {
-    case 0: case 1:
-      return \"mov%?\\t%0, %1\";
+    case 0:
+    case 1:
     case 2:
-      return \"mvn%?\\t%0, #%B1\";
+      return \"mov%?\\t%0, %1\";
     case 3:
-      return \"movw%?\\t%0, %1\";
+      return \"mvn%?\\t%0, #%B1\";
     case 4:
+      return \"movw%?\\t%0, %1\";
     case 5:
-      return \"ldr%?\\t%0, %1\";
     case 6:
+      return \"ldr%?\\t%0, %1\";
     case 7:
-      return \"str%?\\t%1, %0\";
     case 8:
-      return \"fmsr%?\\t%0, %1\\t%@ int\";
+      return \"str%?\\t%1, %0\";
     case 9:
-      return \"fmrs%?\\t%0, %1\\t%@ int\";
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
     case 10:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 11:
       return \"fcpys%?\\t%0, %1\\t%@ int\";
-    case 11: case 12:
+    case 12: case 13:
       return output_move_vfp (operands);
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
-   (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
-   (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
-   (set_attr "pool_range"     "*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
-   (set_attr "neg_pool_range" "*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
+   (set_attr "type" "*,*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "neon_type" "*,*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
+   (set_attr "insn" "mov,mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
+   (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
 )
 
 
@@ -412,6 +394,7 @@
     }
   "
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type"
      "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
    (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
@@ -420,7 +403,6 @@
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
 
-
 ;; DFmode moves
 
 (define_insn "*movdf_vfp"
@@ -550,7 +532,7 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it"
   "@
    it\\t%D3\;fcpys%D3\\t%0, %2
    it\\t%d3\;fcpys%d3\\t%0, %1
@@ -598,7 +580,7 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
   "@
    it\\t%D3\;fcpyd%D3\\t%P0, %P2
    it\\t%d3\;fcpyd%d3\\t%P0, %P1
@@ -624,6 +606,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fabss%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffariths")]
 )
 
@@ -633,6 +616,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fabsd%?\\t%P0, %P1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffarithd")]
 )
 
@@ -644,6 +628,7 @@
    fnegs%?\\t%0, %1
    eor%?\\t%0, %1, #-2147483648"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffariths")]
 )
 
@@ -689,6 +674,7 @@
     }
   "
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "length" "4,4,8")
    (set_attr "type" "ffarithd")]
 )
@@ -703,6 +689,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fadds%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fadds")]
 )
 
@@ -713,6 +700,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "faddd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "faddd")]
 )
 
@@ -724,6 +712,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsubs%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fadds")]
 )
 
@@ -734,6 +723,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fsubd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "faddd")]
 )
 
@@ -747,6 +737,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fdivs%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fdivs")]
 )
 
@@ -757,6 +748,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fdivd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fdivd")]
 )
 
@@ -770,6 +762,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fmuls%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuls")]
 )
 
@@ -780,6 +773,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fmuld%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuld")]
 )
 
@@ -790,6 +784,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fnmuls%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuls")]
 )
 
@@ -800,6 +795,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fnmuld%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuld")]
 )
 
@@ -815,6 +811,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fmacs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -826,6 +823,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fmacd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
 )
 
@@ -838,6 +836,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fmscs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -849,6 +848,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fmscd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
 )
 
@@ -861,6 +861,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fnmacs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -872,6 +873,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fnmacd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
 )
 
@@ -886,6 +888,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fnmscs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -898,6 +901,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fnmscd%?\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
 )
 
@@ -911,6 +915,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
   "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffma<vfp_type>")]
 )
 
@@ -923,6 +928,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
   "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffma<vfp_type>")]
 )
 
@@ -934,6 +940,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
   "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffma<vfp_type>")]
 )
 
@@ -946,6 +953,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
   "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffma<vfp_type>")]
 )
 
@@ -958,6 +966,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fcvtds%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -967,6 +976,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fcvtsd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -976,6 +986,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
   "vcvtb%?.f32.f16\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -985,6 +996,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
   "vcvtb%?.f16.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -994,6 +1006,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "ftosizs%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1003,6 +1016,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "ftosizd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1013,6 +1027,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "ftouizs%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1022,6 +1037,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "ftouizd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1032,6 +1048,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsitos%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1041,6 +1058,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fsitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1051,6 +1069,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fuitos%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1060,6 +1079,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fuitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -1072,6 +1092,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsqrts%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fdivs")]
 )
 
@@ -1081,6 +1102,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "fsqrtd%?\\t%P0, %P1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fdivd")]
 )
 
@@ -1168,6 +1190,7 @@
    fcmps%?\\t%0, %1
    fcmpzs%?\\t%0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmps")]
 )
 
@@ -1180,6 +1203,7 @@
    fcmpes%?\\t%0, %1
    fcmpezs%?\\t%0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmps")]
 )
 
@@ -1192,6 +1216,7 @@
    fcmpd%?\\t%P0, %P1
    fcmpzd%?\\t%P0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmpd")]
 )
 
@@ -1204,6 +1229,7 @@
    fcmped%?\\t%P0, %P1
    fcmpezd%?\\t%P0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmpd")]
 )
 
@@ -1263,6 +1289,7 @@
   "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
   "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
   [(set_attr "predicable" "<vrint_predicable>")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_rint<vfp_type>")]
 )
 
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index ce331cbe363..c30a9718e76 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -134,7 +134,7 @@ extern c6x_cpu_t c6x_arch;
    Really only externally visible arrays must be aligned this way, as
    only those are directly visible from another compilation unit.  But
    we don't have that information available here.  */
-#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN)					\
   (((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE)	\
    ? BITS_PER_UNIT * 8 : (ALIGN))
 
diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h
index 311292c7a44..297b98dd0d8 100644
--- a/gcc/config/i386/ammintrin.h
+++ b/gcc/config/i386/ammintrin.h
@@ -27,13 +27,15 @@
 #ifndef _AMMINTRIN_H_INCLUDED
 #define _AMMINTRIN_H_INCLUDED
 
-#ifndef __SSE4A__
-# error "SSE4A instruction set not enabled"
-#else
-
 /* We need definitions from the SSE3, SSE2 and SSE header files*/
 #include <pmmintrin.h>
 
+#ifndef __SSE4A__
+#pragma GCC push_options
+#pragma GCC target("sse4a")
+#define __DISABLE_SSE4A__
+#endif /* __SSE4A__ */
+
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_stream_sd (double * __P, __m128d __Y)
 {
@@ -83,6 +85,9 @@ _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned cons
 				      (unsigned int)(I), (unsigned int)(L)))
 #endif
 
-#endif /* __SSE4A__ */
+#ifdef __DISABLE_SSE4A__
+#undef __DISABLE_SSE4A__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4A__ */
 
 #endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 1537bf5add0..4030dfe2bc2 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -25,6 +25,15 @@
 # error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
 #endif
 
+#ifndef _AVX2INTRIN_H_INCLUDED
+#define _AVX2INTRIN_H_INCLUDED
+
+#ifndef __AVX2__
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#define __DISABLE_AVX2__
+#endif /* __AVX2__ */
+
 /* Sum absolute 8-bit integer difference of adjacent groups of 4
    byte integers in the first 2 operands.  Starting offsets within
    operands are determined by the 3rd mask operand.  */
@@ -1871,3 +1880,10 @@ _mm256_mask_i64gather_epi32 (__m128i src, int const *base,
 					   (__v4si)(__m128i)MASK,  \
 					   (int)SCALE)
 #endif  /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_AVX2__
+#undef __DISABLE_AVX2__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX2__ */
+
+#endif /* _AVX2INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index b75de451af9..7f2109a7299 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -28,6 +28,15 @@
 # error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
 #endif
 
+#ifndef _AVXINTRIN_H_INCLUDED
+#define _AVXINTRIN_H_INCLUDED
+
+#ifndef __AVX__
+#pragma GCC push_options
+#pragma GCC target("avx")
+#define __DISABLE_AVX__
+#endif /* __AVX__ */
+
 /* Internal data types for implementing the intrinsics.  */
 typedef double __v4df __attribute__ ((__vector_size__ (32)));
 typedef float __v8sf __attribute__ ((__vector_size__ (32)));
@@ -1424,3 +1433,10 @@ _mm256_castsi128_si256 (__m128i __A)
 {
   return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
 }
+
+#ifdef __DISABLE_AVX__
+#undef __DISABLE_AVX__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX__ */
+
+#endif /* _AVXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
index 929ea20b970..0c6cb9616c8 100644
--- a/gcc/config/i386/bmi2intrin.h
+++ b/gcc/config/i386/bmi2intrin.h
@@ -25,13 +25,15 @@
 # error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __BMI2__
-# error "BMI2 instruction set not enabled"
-#endif /* __BMI2__ */
-
 #ifndef _BMI2INTRIN_H_INCLUDED
 #define _BMI2INTRIN_H_INCLUDED
 
+#ifndef __BMI2__
+#pragma GCC push_options
+#pragma GCC target("bmi2")
+#define __DISABLE_BMI2__
+#endif /* __BMI2__ */
+
 extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _bzhi_u32 (unsigned int __X, unsigned int __Y)
@@ -99,4 +101,9 @@ _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
 
 #endif /* !__x86_64__  */
 
+#ifdef __DISABLE_BMI2__
+#undef __DISABLE_BMI2__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI2__ */
+
 #endif /* _BMI2INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 0087f5c06e0..281ebaaf4f2 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -25,13 +25,15 @@
 # error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __BMI__
-# error "BMI instruction set not enabled"
-#endif /* __BMI__ */
-
 #ifndef _BMIINTRIN_H_INCLUDED
 #define _BMIINTRIN_H_INCLUDED
 
+#ifndef __BMI__
+#pragma GCC push_options
+#pragma GCC target("bmi")
+#define __DISABLE_BMI__
+#endif /* __BMI__ */
+
 extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __tzcnt_u16 (unsigned short __X)
 {
@@ -52,6 +54,12 @@ __bextr_u32 (unsigned int __X, unsigned int __Y)
 }
 
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
+{
+  return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsi_u32 (unsigned int __X)
 {
   return __X & -__X;
@@ -91,6 +99,12 @@ __bextr_u64 (unsigned long long __X, unsigned long long __Y)
 }
 
 extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
+{
+  return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsi_u64 (unsigned long long __X)
 {
   return __X & -__X;
@@ -116,4 +130,9 @@ __tzcnt_u64 (unsigned long long __X)
 
 #endif /* __x86_64__  */
 
+#ifdef __DISABLE_BMI__
+#undef __DISABLE_BMI__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI__ */
+
 #endif /* _BMIINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index e28f098b03a..249c4cd1d53 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -674,8 +674,14 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 		/* Assume Sandy Bridge.  */
 		cpu = "corei7-avx";
 	      else if (has_sse4_2)
-		/* Assume Core i7.  */
-		cpu = "corei7";
+		{
+		  if (has_movbe)
+		    /* Assume SLM.  */
+		    cpu = "slm";
+		  else
+		    /* Assume Core i7.  */
+		    cpu = "corei7";
+		}
 	      else if (has_ssse3)
 		{
 		  if (has_movbe)
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index cf404a13536..c30f05657d6 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -27,13 +27,15 @@
 #ifndef _EMMINTRIN_H_INCLUDED
 #define _EMMINTRIN_H_INCLUDED
 
-#ifndef __SSE2__
-# error "SSE2 instruction set not enabled"
-#else
-
 /* We need definitions from the SSE header files*/
 #include <xmmintrin.h>
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
 /* SSE2 */
 typedef double __v2df __attribute__ ((__vector_size__ (16)));
 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
@@ -1515,6 +1517,9 @@ _mm_castsi128_pd(__m128i __A)
   return (__m128d) __A;
 }
 
-#endif /* __SSE2__  */
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
 
 #endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/f16cintrin.h b/gcc/config/i386/f16cintrin.h
index 88903c16231..76f35fa1eac 100644
--- a/gcc/config/i386/f16cintrin.h
+++ b/gcc/config/i386/f16cintrin.h
@@ -25,13 +25,15 @@
 # error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
 #endif
 
-#ifndef __F16C__
-# error "F16C instruction set not enabled"
-#else
-
 #ifndef _F16CINTRIN_H_INCLUDED
 #define _F16CINTRIN_H_INCLUDED
 
+#ifndef __F16C__
+#pragma GCC push_options
+#pragma GCC target("f16c")
+#define __DISABLE_F16C__
+#endif /* __F16C__ */
+
 extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _cvtsh_ss (unsigned short __S)
 {
@@ -88,5 +90,9 @@ _mm256_cvtps_ph (__m256 __A, const int __I)
   ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I)))
 #endif /* __OPTIMIZE */
 
+#ifdef __DISABLE_F16C__
+#undef __DISABLE_F16C__
+#pragma GCC pop_options
+#endif /* __DISABLE_F16C__ */
+
 #endif /* _F16CINTRIN_H_INCLUDED */
-#endif /* __F16C__ */
diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h
index 00ba7813123..e615f3e7ba0 100644
--- a/gcc/config/i386/fma4intrin.h
+++ b/gcc/config/i386/fma4intrin.h
@@ -28,13 +28,15 @@
 #ifndef _FMA4INTRIN_H_INCLUDED
 #define _FMA4INTRIN_H_INCLUDED
 
-#ifndef __FMA4__
-# error "FMA4 instruction set not enabled"
-#else
-
 /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
 #include <ammintrin.h>
 
+#ifndef __FMA4__
+#pragma GCC push_options
+#pragma GCC target("fma4")
+#define __DISABLE_FMA4__
+#endif /* __FMA4__ */
+
 /* 128b Floating point multiply/add type instructions.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
@@ -231,6 +233,9 @@ _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
   return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
-#endif
+#ifdef __DISABLE_FMA4__
+#undef __DISABLE_FMA4__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA4__ */
 
 #endif
diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h
index 6ede84b18d4..97de93fd146 100644
--- a/gcc/config/i386/fmaintrin.h
+++ b/gcc/config/i386/fmaintrin.h
@@ -29,8 +29,10 @@
 #define _FMAINTRIN_H_INCLUDED
 
 #ifndef __FMA__
-# error "FMA instruction set not enabled"
-#else
+#pragma GCC push_options
+#pragma GCC target("fma")
+#define __DISABLE_FMA__
+#endif /* __FMA__ */
 
 extern __inline __m128d
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -292,6 +294,9 @@ _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
                                                 -(__v8sf)__C);
 }
 
-#endif
+#ifdef __DISABLE_FMA__
+#undef __DISABLE_FMA__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA__ */
 
 #endif
diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
index 9b63222c835..41d4085b010 100644
--- a/gcc/config/i386/fxsrintrin.h
+++ b/gcc/config/i386/fxsrintrin.h
@@ -28,6 +28,12 @@
 #ifndef _FXSRINTRIN_H_INCLUDED
 #define _FXSRINTRIN_H_INCLUDED
 
+#ifndef __FXSR__
+#pragma GCC push_options
+#pragma GCC target("fxsr")
+#define __DISABLE_FXSR__
+#endif /* __FXSR__ */
+
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _fxsave (void *__P)
@@ -58,4 +64,10 @@ _fxrstor64 (void *__P)
 }
 #endif
 
+#ifdef __DISABLE_FXSR__
+#undef __DISABLE_FXSR__
+#pragma GCC pop_options
+#endif /* __DISABLE_FXSR__ */
+
+
 #endif /* _FXSRINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
index 35063e68b1e..4a91c843685 100644
--- a/gcc/config/i386/gnu.h
+++ b/gcc/config/i386/gnu.h
@@ -36,6 +36,12 @@ along with GCC.  If not, see <http://www.gnu.org/licenses/>.
 #endif
 
 #ifdef TARGET_LIBC_PROVIDES_SSP
+
+/* Not supported yet.  */
+# undef TARGET_THREAD_SSP_OFFSET
+
 /* Not supported yet.  */
-#undef TARGET_THREAD_SSP_OFFSET
+# undef TARGET_CAN_SPLIT_STACK
+# undef TARGET_THREAD_SPLIT_STACK_OFFSET
+
 #endif
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 51fec844bdf..31dd28a94cb 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -149,6 +149,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
       def_or_undef (parse_in, "__atom");
       def_or_undef (parse_in, "__atom__");
       break;
+    case PROCESSOR_SLM:
+      def_or_undef (parse_in, "__slm");
+      def_or_undef (parse_in, "__slm__");
+      break;
     /* use PROCESSOR_max to not set/unset the arch macro.  */
     case PROCESSOR_max:
       break;
@@ -241,6 +245,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     case PROCESSOR_ATOM:
       def_or_undef (parse_in, "__tune_atom__");
       break;
+    case PROCESSOR_SLM:
+      def_or_undef (parse_in, "__tune_slm__");
+      break;
     case PROCESSOR_GENERIC32:
     case PROCESSOR_GENERIC64:
       break;
@@ -369,20 +376,23 @@ ix86_pragma_target_parse (tree args, tree pop_target)
 
   if (! args)
     {
-      cur_tree = ((pop_target)
-		  ? pop_target
-		  : target_option_default_node);
+      cur_tree = (pop_target ? pop_target : target_option_default_node);
       cl_target_option_restore (&global_options,
 				TREE_TARGET_OPTION (cur_tree));
     }
   else
     {
       cur_tree = ix86_valid_target_attribute_tree (args);
-      if (!cur_tree)
-	return false;
+      if (!cur_tree || cur_tree == error_mark_node)
+       {
+         cl_target_option_restore (&global_options,
+                                   TREE_TARGET_OPTION (prev_tree));
+         return false;
+       }
     }
 
   target_option_current_node = cur_tree;
+  ix86_reset_previous_fndecl ();
 
   /* Figure out the previous/current isa, arch, tune and the differences.  */
   prev_opt  = TREE_TARGET_OPTION (prev_tree);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ef4dc761d5a..09667893910 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -40,6 +40,8 @@ extern void ix86_output_addr_diff_elt (FILE *, int, int);
 extern enum calling_abi ix86_cfun_abi (void);
 extern enum calling_abi ix86_function_type_abi (const_tree);
 
+extern void ix86_reset_previous_fndecl (void);
+
 #ifdef RTX_CODE
 extern int standard_80387_constant_p (rtx);
 extern const char *standard_80387_constant_opcode (rtx);
@@ -207,7 +209,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif	/* RTX_CODE  */
 
 #ifdef TREE_CODE
-extern int ix86_data_alignment (tree, int);
+extern int ix86_data_alignment (tree, int, bool);
 extern unsigned int ix86_local_alignment (tree, enum machine_mode,
 					  unsigned int);
 extern unsigned int ix86_minimum_alignment (tree, enum machine_mode,
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3470fef77b2..2a65fc2a6de 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1482,6 +1482,79 @@ struct processor_costs atom_cost = {
   1,					/* cond_not_taken_branch_cost.  */
 };
 
+static const
+struct processor_costs slm_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+   {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+          {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+  {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
+          {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+   {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+          {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
 /* Generic64 should produce code tuned for Nocona and K8.  */
 static const
 struct processor_costs generic64_cost = {
@@ -1735,6 +1808,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
 #define m_HASWELL (1<<PROCESSOR_HASWELL)
 #define m_CORE_ALL (m_CORE2 | m_COREI7  | m_HASWELL)
 #define m_ATOM (1<<PROCESSOR_ATOM)
+#define m_SLM (1<<PROCESSOR_SLM)
 
 #define m_GEODE (1<<PROCESSOR_GEODE)
 #define m_K6 (1<<PROCESSOR_K6)
@@ -1778,7 +1852,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   m_486 | m_PENT,
 
   /* X86_TUNE_UNROLL_STRLEN */
-  m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
+  m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
 
   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
      on simulation result. But after P4 was made, no performance benefit
@@ -1790,11 +1864,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   ~m_386,
 
   /* X86_TUNE_USE_SAHF */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
+  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
 
   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
      partial dependencies.  */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE  | m_GENERIC,
+  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE  | m_GENERIC,
 
   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
      register stalls on Generic32 compilation setting as well.  However
@@ -1817,13 +1891,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   m_386 | m_486 | m_K6_GEODE,
 
   /* X86_TUNE_USE_SIMODE_FIOP */
-  ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
+  ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC),
 
   /* X86_TUNE_USE_MOV0 */
   m_K6,
 
   /* X86_TUNE_USE_CLTD */
-  ~(m_PENT | m_ATOM | m_K6),
+  ~(m_PENT | m_ATOM | m_SLM | m_K6),
 
   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
   m_PENT4,
@@ -1838,7 +1912,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   ~(m_PENT | m_PPRO),
 
   /* X86_TUNE_PROMOTE_QIMODE */
-  m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+  m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
 
   /* X86_TUNE_FAST_PREFIX */
   ~(m_386 | m_486 | m_PENT),
@@ -1879,10 +1953,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
 
   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
      for DFmode copies */
-  ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
+  ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC),
 
   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
-  m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+  m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
 
   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
      conflict here in between PPro/Pentium4 based chips that thread 128bit
@@ -1893,13 +1967,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
      shows that disabling this option on P4 brings over 20% SPECfp regression,
      while enabling it on K8 brings roughly 2.4% regression that can be partly
      masked by careful scheduling of moves.  */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM  | m_AMDFAM10 | m_BDVER | m_GENERIC,
+  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC,
 
   /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
-  m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER,
+  m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM,
 
   /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
-  m_COREI7 | m_BDVER,
+  m_COREI7 | m_BDVER | m_SLM,
 
   /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
   m_BDVER ,
@@ -1917,7 +1991,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   m_PPRO | m_P4_NOCONA,
 
   /* X86_TUNE_MEMORY_MISMATCH_STALL */
-  m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+  m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
 
   /* X86_TUNE_PROLOGUE_USING_MOVE */
   m_PPRO | m_ATHLON_K8,
@@ -1942,16 +2016,16 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
 
   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
      than 4 branch instructions in the 16 byte window.  */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
 
   /* X86_TUNE_SCHEDULE */
-  m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+  m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
 
   /* X86_TUNE_USE_BT */
-  m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+  m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
 
   /* X86_TUNE_USE_INCDEC */
-  ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GENERIC),
+  ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC),
 
   /* X86_TUNE_PAD_RETURNS */
   m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC,
@@ -1960,7 +2034,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   m_ATOM,
 
   /* X86_TUNE_EXT_80387_CONSTANTS */
-  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
+  m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
 
   /* X86_TUNE_AVOID_VECTOR_DECODE */
   m_CORE_ALL | m_K8 | m_GENERIC64,
@@ -2005,13 +2079,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
 
   /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
      will impact LEA instruction selection. */
-  m_ATOM,
+  m_ATOM | m_SLM,
 
   /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
      instructions.  */
   ~m_ATOM,
 
-  /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
+  /* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching
      at -O3.  For the moment, the prefetching seems badly tuned for Intel
      chips.  */
   m_K6_GEODE | m_AMD_MULTIPLE,
@@ -2026,7 +2100,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
 
   /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
      during reassociation of fp computation.  */
-  m_ATOM | m_HASWELL,
+  m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2,
 
   /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
      regs instead of memory.  */
@@ -2034,7 +2108,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
 
   /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
      a conditional move.  */
-  m_ATOM
+  m_ATOM,
+
+  /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
+     fp converts to destination register.  */
+  m_SLM
+
 };
 
 /* Feature tests against the various architecture variations.  */
@@ -2060,10 +2139,10 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
 };
 
 static const unsigned int x86_accumulate_outgoing_args
-  = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
+  = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
 
 static const unsigned int x86_arch_always_fancy_math_387
-  = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
+  = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC;
 
 static const unsigned int x86_avx256_split_unaligned_load
   = m_COREI7 | m_GENERIC;
@@ -2458,7 +2537,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
   {&bdver3_cost, 16, 10, 16, 7, 11},
   {&btver1_cost, 16, 10, 16, 7, 11},
   {&btver2_cost, 16, 10, 16, 7, 11},
-  {&atom_cost, 16, 15, 16, 7, 16}
+  {&atom_cost, 16, 15, 16, 7, 16},
+  {&slm_cost, 16, 15, 16, 7, 16}
 };
 
 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
@@ -2479,6 +2559,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
   "corei7",
   "core-avx2",
   "atom",
+  "slm",
   "geode",
   "k6",
   "k6-2",
@@ -2940,6 +3021,10 @@ ix86_option_override_internal (bool main_args_p)
       {"atom", PROCESSOR_ATOM, CPU_ATOM,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR},
+      {"slm", PROCESSOR_SLM, CPU_SLM,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_MOVBE
+	| PTA_FXSR},
       {"geode", PROCESSOR_GEODE, CPU_GEODE,
 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
@@ -4564,6 +4649,13 @@ ix86_can_inline_p (tree caller, tree callee)
 /* Remember the last target of ix86_set_current_function.  */
 static GTY(()) tree ix86_previous_fndecl;
 
+/* Invalidate ix86_previous_fndecl cache.  */
+void
+ix86_reset_previous_fndecl (void)
+{
+  ix86_previous_fndecl = NULL_TREE;
+}
+
 /* Establish appropriate back-end context for processing the function
    FNDECL.  The argument might be NULL to indicate processing at top
    level, outside of any function scope.  */
@@ -6413,7 +6505,7 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
 
   /* Likewise, error if the ABI requires us to return values in the
      x87 registers and the user specified -mno-80387.  */
-  if (!TARGET_80387 && in_return)
+  if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
     for (i = 0; i < n; i++)
       if (regclass[i] == X86_64_X87_CLASS
 	  || regclass[i] == X86_64_X87UP_CLASS
@@ -17312,10 +17404,24 @@ distance_agu_use (unsigned int regno0, rtx insn)
 
 static bool
 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
-		      unsigned int regno2, int split_cost)
+		      unsigned int regno2, int split_cost, bool has_scale)
 {
   int dist_define, dist_use;
 
+  /* For Silvermont if using a 2-source or 3-source LEA for
+     non-destructive destination purposes, or due to wanting
+     ability to use SCALE, the use of LEA is justified.  */
+  if (ix86_tune == PROCESSOR_SLM)
+    {
+      if (has_scale)
+	return true;
+      if (split_cost < 1)
+	return false;
+      if (regno0 == regno1 || regno0 == regno2)
+	return false;
+      return true;
+    }
+
   dist_define = distance_non_agu_define (regno1, regno2, insn);
   dist_use = distance_agu_use (regno0, insn);
 
@@ -17404,7 +17510,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[])
   if (regno0 == regno1 || regno0 == regno2)
     return false;
   else
-    return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
+    return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
 }
 
 /* Return true if we should emit lea instruction instead of mov
@@ -17426,7 +17532,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[])
   regno0 = true_regnum (operands[0]);
   regno1 = true_regnum (operands[1]);
 
-  return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
+  return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
 }
 
 /* Return true if we need to split lea into a sequence of
@@ -17505,7 +17611,8 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
       split_cost -= 1;
     }
 
-  return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
+  return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
+				parts.scale > 1);
 }
 
 /* Emit x86 binary operand CODE in mode MODE, where the first operand
@@ -17690,7 +17797,7 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[])
   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
     return false;
 
-  return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
+  return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
 }
 
 /* Return true if destination reg of SET_BODY is shift count of
@@ -24199,6 +24306,7 @@ ix86_issue_rate (void)
     {
     case PROCESSOR_PENTIUM:
     case PROCESSOR_ATOM:
+    case PROCESSOR_SLM:
     case PROCESSOR_K6:
     case PROCESSOR_BTVER2:
       return 2;
@@ -24287,6 +24395,73 @@ ix86_agi_dependent (rtx set_insn, rtx use_insn)
   return false;
 }
 
+/* Helper function for exact_store_load_dependency.
+   Return true if addr is found in insn.  */
+static bool
+exact_dependency_1 (rtx addr, rtx insn)
+{
+  enum rtx_code code;
+  const char *format_ptr;
+  int i, j;
+
+  code = GET_CODE (insn);
+  switch (code)
+    {
+    case MEM:
+      if (rtx_equal_p (addr, insn))
+	return true;
+      break;
+    case REG:
+    CASE_CONST_ANY:
+    case SYMBOL_REF:
+    case CODE_LABEL:
+    case PC:
+    case CC0:
+    case EXPR_LIST:
+      return false;
+    default:
+      break;
+    }
+
+  format_ptr = GET_RTX_FORMAT (code);
+  for (i = 0; i < GET_RTX_LENGTH (code); i++)
+    {
+      switch (*format_ptr++)
+	{
+	case 'e':
+	  if (exact_dependency_1 (addr, XEXP (insn, i)))
+	    return true;
+	  break;
+	case 'E':
+	  for (j = 0; j < XVECLEN (insn, i); j++)
+	    if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
+	      return true;
+	    break;
+	}
+    }
+  return false;
+}
+
+/* Return true if there exists exact dependency for store & load, i.e.
+   the same memory address is used in them.  */
+static bool
+exact_store_load_dependency (rtx store, rtx load)
+{
+  rtx set1, set2;
+
+  set1 = single_set (store);
+  if (!set1)
+    return false;
+  if (!MEM_P (SET_DEST (set1)))
+    return false;
+  set2 = single_set (load);
+  if (!set2)
+    return false;
+  if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
+    return true;
+  return false;
+}
+
 static int
 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
 {
@@ -24438,6 +24613,39 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
 	  else
 	    cost = 0;
 	}
+      break;
+
+    case PROCESSOR_SLM:
+      if (!reload_completed)
+	return cost;
+
+      /* Increase cost of integer loads.  */
+      memory = get_attr_memory (dep_insn);
+      if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	{
+	  enum attr_unit unit = get_attr_unit (dep_insn);
+	  if (unit == UNIT_INTEGER && cost == 1)
+	    {
+	      if (memory == MEMORY_LOAD)
+		cost = 3;
+	      else
+		{
+		  /* Increase cost of ld/st for short int types only
+		     because of store forwarding issue.  */
+		  rtx set = single_set (dep_insn);
+		  if (set && (GET_MODE (SET_DEST (set)) == QImode
+			      || GET_MODE (SET_DEST (set)) == HImode))
+		    {
+		      /* Increase cost of store/load insn if exact
+			 dependence exists and it is load insn.  */
+		      enum attr_memory insn_memory = get_attr_memory (insn);
+		      if (insn_memory == MEMORY_LOAD
+			  && exact_store_load_dependency (dep_insn, insn))
+			cost = 3;
+		    }
+		}
+	    }
+	}
 
     default:
       break;
@@ -24466,6 +24674,7 @@ ia32_multipass_dfa_lookahead (void)
     case PROCESSOR_COREI7:
     case PROCESSOR_HASWELL:
     case PROCESSOR_ATOM:
+    case PROCESSOR_SLM:
       /* Generally, we want haifa-sched:max_issue() to look ahead as far
 	 as many instructions can be executed on a cycle, i.e.,
 	 issue_rate.  I wonder why tuning for many CPUs does not do this.  */
@@ -24483,110 +24692,204 @@ ia32_multipass_dfa_lookahead (void)
    execution. It is applied if
    (1) IMUL instruction is on the top of list;
    (2) There exists the only producer of independent IMUL instruction in
-       ready list;
-   (3) Put found producer on the top of ready list.
-   Returns issue rate.  */
-
+       ready list.
+   Return index of IMUL producer if it was found and -1 otherwise.  */
 static int
-ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
-                   int clock_var ATTRIBUTE_UNUSED)
+do_reorder_for_imul (rtx *ready, int n_ready)
 {
-  static int issue_rate = -1;
-  int n_ready = *pn_ready;
-  rtx insn, insn1, insn2;
-  int i;
+  rtx insn, set, insn1, insn2;
   sd_iterator_def sd_it;
   dep_t dep;
   int index = -1;
+  int i;
 
-  /* Set up issue rate.  */
-  issue_rate = ix86_issue_rate();
-
-  /* Do reodering for Atom only.  */
   if (ix86_tune != PROCESSOR_ATOM)
-    return issue_rate;
-  /* Do not perform ready list reodering for pre-reload schedule pass.  */
-  if (!reload_completed)
-    return issue_rate;
-  /* Nothing to do if ready list contains only 1 instruction.  */
-  if (n_ready <= 1)
-    return issue_rate;
+    return index;
 
   /* Check that IMUL instruction is on the top of ready list.  */
   insn = ready[n_ready - 1];
-  if (!NONDEBUG_INSN_P (insn))
-    return issue_rate;
-  insn = PATTERN (insn);
-  if (GET_CODE (insn) == PARALLEL)
-    insn = XVECEXP (insn, 0, 0);
-  if (GET_CODE (insn) != SET)
-    return issue_rate;
-  if (!(GET_CODE (SET_SRC (insn)) == MULT
-      && GET_MODE (SET_SRC (insn)) == SImode))
-    return issue_rate;
+  set = single_set (insn);
+  if (!set)
+    return index;
+  if (!(GET_CODE (SET_SRC (set)) == MULT
+      && GET_MODE (SET_SRC (set)) == SImode))
+    return index;
 
   /* Search for producer of independent IMUL instruction.  */
-  for (i = n_ready - 2; i>= 0; i--)
+  for (i = n_ready - 2; i >= 0; i--)
     {
       insn = ready[i];
       if (!NONDEBUG_INSN_P (insn))
-        continue;
+	continue;
       /* Skip IMUL instruction.  */
       insn2 = PATTERN (insn);
       if (GET_CODE (insn2) == PARALLEL)
-        insn2 = XVECEXP (insn2, 0, 0);
+	insn2 = XVECEXP (insn2, 0, 0);
       if (GET_CODE (insn2) == SET
-          && GET_CODE (SET_SRC (insn2)) == MULT
-          && GET_MODE (SET_SRC (insn2)) == SImode)
-        continue;
+	  && GET_CODE (SET_SRC (insn2)) == MULT
+	  && GET_MODE (SET_SRC (insn2)) == SImode)
+	continue;
 
       FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
-        {
-          rtx con;
+	{
+	  rtx con;
 	  con = DEP_CON (dep);
 	  if (!NONDEBUG_INSN_P (con))
 	    continue;
-          insn1 = PATTERN (con);
-          if (GET_CODE (insn1) == PARALLEL)
-            insn1 = XVECEXP (insn1, 0, 0);
-
-          if (GET_CODE (insn1) == SET
-              && GET_CODE (SET_SRC (insn1)) == MULT
-              && GET_MODE (SET_SRC (insn1)) == SImode)
-            {
-              sd_iterator_def sd_it1;
-              dep_t dep1;
-              /* Check if there is no other dependee for IMUL.  */
-              index = i;
-              FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
-                {
-                  rtx pro;
-                  pro = DEP_PRO (dep1);
+	  insn1 = PATTERN (con);
+	  if (GET_CODE (insn1) == PARALLEL)
+	    insn1 = XVECEXP (insn1, 0, 0);
+
+	  if (GET_CODE (insn1) == SET
+	      && GET_CODE (SET_SRC (insn1)) == MULT
+	      && GET_MODE (SET_SRC (insn1)) == SImode)
+	    {
+	      sd_iterator_def sd_it1;
+	      dep_t dep1;
+	      /* Check if there is no other dependee for IMUL.  */
+	      index = i;
+	      FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+		{
+		  rtx pro;
+		  pro = DEP_PRO (dep1);
 		  if (!NONDEBUG_INSN_P (pro))
 		    continue;
-                  if (pro != insn)
-                    index = -1;
-	        }
-              if (index >= 0)
-                break;
-            }
-        }
+		  if (pro != insn)
+		    index = -1;
+		}
+	      if (index >= 0)
+		break;
+	    }
+	}
       if (index >= 0)
-        break;
+	break;
     }
-  if (index < 0)
-    return issue_rate; /* Didn't find IMUL producer.  */
+  return index;
+}
+
+/* Try to find the best candidate on the top of ready list if two insns
+   have the same priority - candidate is best if its dependees were
+   scheduled earlier. Applied for Silvermont only.
+   Return true if top 2 insns must be interchanged.  */
+static bool
+swap_top_of_ready_list (rtx *ready, int n_ready)
+{
+  rtx top = ready[n_ready - 1];
+  rtx next = ready[n_ready - 2];
+  rtx set;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int clock1 = -1;
+  int clock2 = -1;
+  #define INSN_TICK(INSN) (HID (INSN)->tick)
 
-  if (sched_verbose > 1)
-    fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
-            INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
+  if (ix86_tune != PROCESSOR_SLM)
+    return false;
 
-  /* Put IMUL producer (ready[index]) at the top of ready list.  */
-  insn1= ready[index];
-  for (i = index; i < n_ready - 1; i++)
-    ready[i] = ready[i + 1];
-  ready[n_ready - 1] = insn1;
+  if (!NONDEBUG_INSN_P (top))
+    return false;
+  if (!NONJUMP_INSN_P (top))
+    return false;
+  if (!NONDEBUG_INSN_P (next))
+    return false;
+  if (!NONJUMP_INSN_P (next))
+    return false;
+  set = single_set (top);
+  if (!set)
+    return false;
+  set = single_set (next);
+  if (!set)
+    return false;
 
+  if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
+    {
+      if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
+	return false;
+      /* Determine winner more precise.  */
+      FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock1)
+	    clock1 = INSN_TICK (pro);
+	}
+      FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock2)
+	    clock2 = INSN_TICK (pro);
+	}
+
+      if (clock1 == clock2)
+	{
+	  /* Determine winner - load must win. */
+	  enum attr_memory memory1, memory2;
+	  memory1 = get_attr_memory (top);
+	  memory2 = get_attr_memory (next);
+	  if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
+	    return true;
+	}
+	return (bool) (clock2 < clock1);
+    }
+  return false;
+  #undef INSN_TICK
+}
+
+/* Perform possible reodering of ready list for Atom/Silvermont only.
+   Return issue rate.  */
+static int
+ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+		   int clock_var)
+{
+  int issue_rate = -1;
+  int n_ready = *pn_ready;
+  int i;
+  rtx insn;
+  int index = -1;
+
+  /* Set up issue rate.  */
+  issue_rate = ix86_issue_rate ();
+
+  /* Do reodering for Atom/SLM only.  */
+  if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM)
+    return issue_rate;
+
+  /* Nothing to do if ready list contains only 1 instruction.  */
+  if (n_ready <= 1)
+    return issue_rate;
+
+  /* Do reodering for post-reload scheduler only.  */
+  if (!reload_completed)
+    return issue_rate;
+
+  if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
+		 INSN_UID (ready[index]));
+
+      /* Put IMUL producer (ready[index]) at the top of ready list.  */
+      insn = ready[index];
+      for (i = index; i < n_ready - 1; i++)
+	ready[i] = ready[i + 1];
+      ready[n_ready - 1] = insn;
+      return issue_rate;
+    }
+  if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
+		 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
+      /* Swap 2 top elements of ready list.  */
+      insn = ready[n_ready - 1];
+      ready[n_ready - 1] = ready[n_ready - 2];
+      ready[n_ready - 2] = insn;
+    }
   return issue_rate;
 }
 
@@ -25079,11 +25382,12 @@ ix86_constant_alignment (tree exp, int align)
    instead of that alignment to align the object.  */
 
 int
-ix86_data_alignment (tree type, int align)
+ix86_data_alignment (tree type, int align, bool opt)
 {
   int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
 
-  if (AGGREGATE_TYPE_P (type)
+  if (opt
+      && AGGREGATE_TYPE_P (type)
       && TYPE_SIZE (type)
       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
@@ -25095,14 +25399,17 @@ ix86_data_alignment (tree type, int align)
      to 16byte boundary.  */
   if (TARGET_64BIT)
     {
-      if (AGGREGATE_TYPE_P (type)
-	   && TYPE_SIZE (type)
-	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
-	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
-	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+      if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
+	  && TYPE_SIZE (type)
+	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	  && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+	      || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
 	return 128;
     }
 
+  if (!opt)
+    return align;
+
   if (TREE_CODE (type) == ARRAY_TYPE)
     {
       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
@@ -25614,8 +25921,6 @@ enum ix86_builtins
   IX86_BUILTIN_CMPNEQSS,
   IX86_BUILTIN_CMPNLTSS,
   IX86_BUILTIN_CMPNLESS,
-  IX86_BUILTIN_CMPNGTSS,
-  IX86_BUILTIN_CMPNGESS,
   IX86_BUILTIN_CMPORDSS,
   IX86_BUILTIN_CMPUNORDSS,
 
@@ -27252,8 +27557,6 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
@@ -29468,7 +29771,7 @@ ix86_get_function_versions_dispatcher (void *decl)
       dispatcher_version_info
 	= insert_new_cgraph_node_version (dispatcher_node);
       dispatcher_version_info->next = default_version_info;
-      dispatcher_node->local.finalized = 1;
+      dispatcher_node->symbol.definition = 1;
 
       /* Set the dispatcher for all the versions.  */
       it_v = default_version_info;
@@ -29623,7 +29926,7 @@ ix86_generate_version_dispatcher_body (void *node_p)
   default_ver_decl = node_version_info->next->this_node->symbol.decl;
 
   /* node is going to be an alias, so remove the finalized bit.  */
-  node->local.finalized = false;
+  node->symbol.definition = false;
 
   resolver_decl = make_resolver_func (default_ver_decl,
 				      node->symbol.decl, &empty_bb);
@@ -29756,6 +30059,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
     M_INTEL_COREI7,
     M_AMDFAM10H,
     M_AMDFAM15H,
+    M_INTEL_SLM,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
@@ -29778,6 +30082,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
       {"amd", M_AMD},
       {"intel", M_INTEL},
       {"atom", M_INTEL_ATOM},
+      {"slm", M_INTEL_SLM},
       {"core2", M_INTEL_CORE2},
       {"corei7", M_INTEL_COREI7},
       {"nehalem", M_INTEL_COREI7_NEHALEM},
@@ -29817,6 +30122,9 @@ fold_builtin_cpu (tree fndecl, tree *args)
   tree __cpu_model_var = make_var_decl (__processor_model_type,
 					"__cpu_model");
 
+
+  varpool_add_new_variable (__cpu_model_var);
+
   gcc_assert ((args != NULL) && (*args != NULL));
 
   param_string_cst = *args;
@@ -33650,6 +33958,8 @@ static inline bool
 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
 				enum machine_mode mode, int strict)
 {
+  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
+    return false;
   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
@@ -35564,6 +35874,46 @@ ix86_pad_short_function (void)
     }
 }
 
+/* Fix up a Windows system unwinder issue.  If an EH region falls thru into
+   the epilogue, the Windows system unwinder will apply epilogue logic and
+   produce incorrect offsets.  This can be avoided by adding a nop between
+   the last insn that can throw and the first insn of the epilogue.  */
+
+static void
+ix86_seh_fixup_eh_fallthru (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    {
+      rtx insn, next;
+
+      /* Find the beginning of the epilogue.  */
+      for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
+	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
+	  break;
+      if (insn == NULL)
+	continue;
+
+      /* We only care about preceeding insns that can throw.  */
+      insn = prev_active_insn (insn);
+      if (insn == NULL || !can_throw_internal (insn))
+	continue;
+
+      /* Do not separate calls from their debug information.  */
+      for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
+	if (NOTE_P (next)
+            && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
+                || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
+	  insn = next;
+	else
+	  break;
+
+      emit_insn_after (gen_nops (const1_rtx), insn);
+    }
+}
+
 /* Implement machine specific optimizations.  We implement padding of returns
    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
 static void
@@ -35573,6 +35923,9 @@ ix86_reorg (void)
      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
   compute_bb_for_insn ();
 
+  if (TARGET_SEH && current_function_has_exception_handlers ())
+    ix86_seh_fixup_eh_fallthru ();
+
   if (optimize && optimize_function_for_speed_p (cfun))
     {
       if (TARGET_PAD_SHORT_FUNCTION)
@@ -42682,6 +43035,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
 #undef TARGET_REGISTER_PRIORITY
 #define TARGET_REGISTER_PRIORITY ix86_register_priority
 
+#undef TARGET_REGISTER_USAGE_LEVELING_P
+#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
+
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 6055b99a55b..7d940f98804 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -257,6 +257,7 @@ extern const struct processor_costs ix86_size_cost;
 #define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1)
 #define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
 #define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
+#define TARGET_SLM (ix86_tune == PROCESSOR_SLM)
 
 /* Feature tests against the various tunings.  */
 enum ix86_tune_indices {
@@ -332,6 +333,7 @@ enum ix86_tune_indices {
   X86_TUNE_REASSOC_FP_TO_PARALLEL,
   X86_TUNE_GENERAL_REGS_SSE_SPILL,
   X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
+  X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
 
   X86_TUNE_LAST
 };
@@ -442,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
 #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
 	ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
+#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
+	ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
@@ -623,6 +627,7 @@ enum target_cpu_default
   TARGET_CPU_DEFAULT_corei7,
   TARGET_CPU_DEFAULT_haswell,
   TARGET_CPU_DEFAULT_atom,
+  TARGET_CPU_DEFAULT_slm,
 
   TARGET_CPU_DEFAULT_geode,
   TARGET_CPU_DEFAULT_k6,
@@ -854,7 +859,18 @@ enum target_cpu_default
    cause character arrays to be word-aligned so that `strcpy' calls
    that copy constants to character arrays can be done inline.  */
 
-#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN))
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+  ix86_data_alignment ((TYPE), (ALIGN), true)
+
+/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates
+   some alignment increase, instead of optimization only purposes.  E.g.
+   AMD x86-64 psABI says that variables with array type larger than 15 bytes
+   must be aligned to 16 byte boundaries.
+
+   If this macro is not defined, then ALIGN is used.  */
+
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+  ix86_data_alignment ((TYPE), (ALIGN), false)
 
 /* If defined, a C expression to compute the alignment for a local
    variable.  TYPE is the data type, and ALIGN is the alignment that
@@ -2131,6 +2147,7 @@ enum processor_type
   PROCESSOR_BTVER1,
   PROCESSOR_BTVER2,
   PROCESSOR_ATOM,
+  PROCESSOR_SLM,
   PROCESSOR_max
 };
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ce77f15f009..a6e2946584b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -323,7 +323,7 @@
 
 ;; Processor type.
 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7,
- 		    atom,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
+		    atom,slm,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
   (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
@@ -964,6 +964,7 @@
 (include "btver2.md")
 (include "geode.md")
 (include "atom.md")
+(include "slm.md")
 (include "core2.md")
 
 
@@ -3624,6 +3625,18 @@
 				  CONST0_RTX (V4SFmode), operands[1]));
 })
 
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF
+	  (match_operand:SF 1 "memory_operand")))]
+  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && SSE_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float_extend:DF (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+
 (define_insn "*extendsfdf2_mixed"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
         (float_extend:DF
@@ -3765,6 +3778,18 @@
 				 CONST0_RTX (V2DFmode), operands[1]));
 })
 
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand")
+	(float_truncate:SF
+	  (match_operand:DF 1 "memory_operand")))]
+  "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && SSE_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
 (define_expand "truncdfsf2_with_temp"
   [(parallel [(set (match_operand:SF 0)
 		   (float_truncate:SF (match_operand:DF 1)))
@@ -11654,8 +11679,8 @@
 
 (define_insn "bmi_bextr_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r,r")
-        (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r,r")
-                       (match_operand:SWI48 2 "nonimmediate_operand" "r,m")]
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
+                       (match_operand:SWI48 2 "register_operand" "r,r")]
                        UNSPEC_BEXTR))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI"
@@ -11708,9 +11733,9 @@
 ;; BMI2 instructions.
 (define_insn "bmi2_bzhi_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(and:SWI48 (match_operand:SWI48 1 "register_operand" "r")
-		   (lshiftrt:SWI48 (const_int -1)
-				   (match_operand:SWI48 2 "nonimmediate_operand" "rm"))))
+	(and:SWI48 (lshiftrt:SWI48 (const_int -1)
+				   (match_operand:SWI48 2 "register_operand" "r"))
+		   (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI2"
   "bzhi\t{%2, %1, %0|%0, %1, %2}"
@@ -16566,6 +16591,7 @@
   "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    && peep2_reg_dead_p (4, operands[0])
    && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
    && (<MODE>mode != QImode
        || immediate_operand (operands[2], QImode)
        || q_regs_operand (operands[2], QImode))
@@ -16630,6 +16656,7 @@
        || immediate_operand (operands[2], SImode)
        || q_regs_operand (operands[2], SImode))
    && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
    && ix86_match_ccmode (peep2_next_insn (3),
 			 (GET_CODE (operands[3]) == PLUS
 			  || GET_CODE (operands[3]) == MINUS)
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 131af0be2b7..b26dc46d256 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -49,7 +49,12 @@ __bswapd (int __X)
   return __builtin_bswap32 (__X);
 }
 
-#ifdef __SSE4_2__
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
 /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
 extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -71,7 +76,11 @@ __crc32d (unsigned int __C, unsigned int __V)
 {
   return __builtin_ia32_crc32si (__C, __V);
 }
-#endif /* SSE4.2 */
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
 
 /* 32bit popcnt */
 extern __inline int
@@ -186,7 +195,12 @@ __bswapq (long long __X)
   return __builtin_bswap64 (__X);
 }
 
-#ifdef __SSE4_2__
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
 /* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
 extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -194,7 +208,11 @@ __crc32q (unsigned long long __C, unsigned long long __V)
 {
   return __builtin_ia32_crc32di (__C, __V);
 }
-#endif
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
 
 /* 64bit popcnt */
 extern __inline long long
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index b137753a4f5..e825c34a256 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -24,71 +24,43 @@
 #ifndef _IMMINTRIN_H_INCLUDED
 #define _IMMINTRIN_H_INCLUDED
 
-#ifdef __MMX__
 #include <mmintrin.h>
-#endif
 
-#ifdef __SSE__
 #include <xmmintrin.h>
-#endif
 
-#ifdef __SSE2__
 #include <emmintrin.h>
-#endif
 
-#ifdef __SSE3__
 #include <pmmintrin.h>
-#endif
 
-#ifdef __SSSE3__
 #include <tmmintrin.h>
-#endif
 
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
 #include <smmintrin.h>
-#endif
 
-#if defined (__AES__) || defined (__PCLMUL__)
 #include <wmmintrin.h>
-#endif
 
-#ifdef __AVX__
 #include <avxintrin.h>
-#endif
 
-#ifdef __AVX2__
 #include <avx2intrin.h>
-#endif
 
-#ifdef __LZCNT__
 #include <lzcntintrin.h>
-#endif
 
-#ifdef __BMI__
 #include <bmiintrin.h>
-#endif
 
-#ifdef __BMI2__
 #include <bmi2intrin.h>
-#endif
 
-#ifdef __FMA__
 #include <fmaintrin.h>
-#endif
 
-#ifdef __F16C__
 #include <f16cintrin.h>
-#endif
 
-#ifdef __RTM__
 #include <rtmintrin.h>
-#endif
 
-#ifdef __RTM__
 #include <xtestintrin.h>
-#endif
 
-#ifdef __RDRND__
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _rdrand16_step (unsigned short *__P)
@@ -102,10 +74,18 @@ _rdrand32_step (unsigned int *__P)
 {
   return __builtin_ia32_rdrand32_step (__P);
 }
-#endif /* __RDRND__ */
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
 
 #ifdef  __x86_64__
-#ifdef __FSGSBASE__
+
+#ifndef __FSGSBASE__
+#pragma GCC push_options
+#pragma GCC target("fsgsbase")
+#define __DISABLE_FSGSBASE__
+#endif /* __FSGSBASE__ */
 extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _readfsbase_u32 (void)
@@ -161,16 +141,27 @@ _writegsbase_u64 (unsigned long long __B)
 {
   __builtin_ia32_wrgsbase64 (__B);
 }
-#endif /* __FSGSBASE__ */
-
-#ifdef __RDRND__
+#ifdef __DISABLE_FSGSBASE__
+#undef __DISABLE_FSGSBASE__
+#pragma GCC pop_options
+#endif /* __DISABLE_FSGSBASE__ */
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _rdrand64_step (unsigned long long *__P)
 {
   return __builtin_ia32_rdrand64_step (__P);
 }
-#endif /* __RDRND__ */
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
 #endif /* __x86_64__  */
 
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
index 8c708508a80..64ba7321fd9 100644
--- a/gcc/config/i386/lwpintrin.h
+++ b/gcc/config/i386/lwpintrin.h
@@ -29,8 +29,10 @@
 #define _LWPINTRIN_H_INCLUDED
 
 #ifndef __LWP__
-# error "LWP instruction set not enabled"
-#else
+#pragma GCC push_options
+#pragma GCC target("lwp")
+#define __DISABLE_LWP__
+#endif /* __LWP__ */
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __llwpcb (void *pcbAddress)
@@ -95,6 +97,9 @@ __lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
 #endif
 #endif
 
-#endif /* __LWP__ */
+#ifdef __DISABLE_LWP__
+#undef __DISABLE_LWP__
+#pragma GCC pop_options
+#endif /* __DISABLE_LWP__ */
 
 #endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
index 9382bb96ecc..22b9ee7999e 100644
--- a/gcc/config/i386/lzcntintrin.h
+++ b/gcc/config/i386/lzcntintrin.h
@@ -25,13 +25,16 @@
 # error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __LZCNT__
-# error "LZCNT instruction is not enabled"
-#endif /* __LZCNT__ */
 
 #ifndef _LZCNTINTRIN_H_INCLUDED
 #define _LZCNTINTRIN_H_INCLUDED
 
+#ifndef __LZCNT__
+#pragma GCC push_options
+#pragma GCC target("lzcnt")
+#define __DISABLE_LZCNT__
+#endif /* __LZCNT__ */
+
 extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __lzcnt16 (unsigned short __X)
 {
@@ -64,4 +67,9 @@ _lzcnt_u64 (unsigned long long __X)
 }
 #endif
 
+#ifdef __DISABLE_LZCNT__
+#undef __DISABLE_LZCNT__
+#pragma GCC pop_options
+#endif /* __DISABLE_LZCNT__ */
+
 #endif /* _LZCNTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/mm3dnow.h b/gcc/config/i386/mm3dnow.h
index 7e806b701a1..093d5e77932 100644
--- a/gcc/config/i386/mm3dnow.h
+++ b/gcc/config/i386/mm3dnow.h
@@ -27,11 +27,15 @@
 #ifndef _MM3DNOW_H_INCLUDED
 #define _MM3DNOW_H_INCLUDED
 
-#ifdef __3dNOW__
-
 #include <mmintrin.h>
 #include <prfchwintrin.h>
 
+#ifndef __3dNOW__
+#pragma GCC push_options
+#pragma GCC target("3dnow")
+#define __DISABLE_3dNOW__
+#endif /* __3dNOW__ */
+
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _m_femms (void)
 {
@@ -205,6 +209,10 @@ _m_pswapd (__m64 __A)
 }
 
 #endif /* __3dNOW_A__ */
-#endif /* __3dNOW__ */
+
+#ifdef __DISABLE_3dNOW__
+#undef __DISABLE_3dNOW__
+#pragma GCC pop_options
+#endif /* __DISABLE_3dNOW__ */
 
 #endif /* _MM3DNOW_H_INCLUDED */
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index c76203b5477..c0729709373 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -28,8 +28,11 @@
 #define _MMINTRIN_H_INCLUDED
 
 #ifndef __MMX__
-# error "MMX instruction set not enabled"
-#else
+#pragma GCC push_options
+#pragma GCC target("mmx")
+#define __DISABLE_MMX__
+#endif /* __MMX__ */
+
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
@@ -303,13 +306,21 @@ _m_paddd (__m64 __m1, __m64 __m2)
 }
 
 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
-#ifdef __SSE2__
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_add_si64 (__m64 __m1, __m64 __m2)
 {
   return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
 }
-#endif
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
 
 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
    saturated arithmetic.  */
@@ -407,13 +418,21 @@ _m_psubd (__m64 __m1, __m64 __m2)
 }
 
 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
-#ifdef __SSE2__
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sub_si64 (__m64 __m1, __m64 __m2)
 {
   return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
 }
-#endif
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
 
 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
    saturating arithmetic.  */
@@ -915,6 +934,9 @@ _mm_set1_pi8 (char __b)
 {
   return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
 }
+#ifdef __DISABLE_MMX__
+#undef __DISABLE_MMX__
+#pragma GCC pop_options
+#endif /* __DISABLE_MMX__ */
 
-#endif /* __MMX__ */
 #endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/nmmintrin.h b/gcc/config/i386/nmmintrin.h
index a4fbed26268..aefe3ef9e90 100644
--- a/gcc/config/i386/nmmintrin.h
+++ b/gcc/config/i386/nmmintrin.h
@@ -27,11 +27,7 @@
 #ifndef _NMMINTRIN_H_INCLUDED
 #define _NMMINTRIN_H_INCLUDED
 
-#ifndef __SSE4_2__
-# error "SSE4.2 instruction set not enabled"
-#else
 /* We just include SSE4.1 header file.  */
 #include <smmintrin.h>
-#endif /* __SSE4_2__ */
 
 #endif /* _NMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h
index 9c6956c1374..2447d5aa31b 100644
--- a/gcc/config/i386/pmmintrin.h
+++ b/gcc/config/i386/pmmintrin.h
@@ -27,13 +27,15 @@
 #ifndef _PMMINTRIN_H_INCLUDED
 #define _PMMINTRIN_H_INCLUDED
 
-#ifndef __SSE3__
-# error "SSE3 instruction set not enabled"
-#else
-
 /* We need definitions from the SSE2 and SSE header files*/
 #include <emmintrin.h>
 
+#ifndef __SSE3__
+#pragma GCC push_options
+#pragma GCC target("sse3")
+#define __DISABLE_SSE3__
+#endif /* __SSE3__ */
+
 /* Additional bits in the MXCSR.  */
 #define _MM_DENORMALS_ZERO_MASK		0x0040
 #define _MM_DENORMALS_ZERO_ON		0x0040
@@ -122,6 +124,9 @@ _mm_mwait (unsigned int __E, unsigned int __H)
   __builtin_ia32_mwait (__E, __H);
 }
 
-#endif /* __SSE3__ */
+#ifdef __DISABLE_SSE3__
+#undef __DISABLE_SSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE3__ */
 
 #endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
index af7efdf5d10..ee3a8e0d076 100644
--- a/gcc/config/i386/popcntintrin.h
+++ b/gcc/config/i386/popcntintrin.h
@@ -21,13 +21,15 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef __POPCNT__
-# error "POPCNT instruction set not enabled"
-#endif /* __POPCNT__ */
-
 #ifndef _POPCNTINTRIN_H_INCLUDED
 #define _POPCNTINTRIN_H_INCLUDED
 
+#ifndef __POPCNT__
+#pragma GCC push_options
+#pragma GCC target("popcnt")
+#define __DISABLE_POPCNT__
+#endif /* __POPCNT__ */
+
 /* Calculate a number of bits set to 1.  */
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_popcnt_u32 (unsigned int __X)
@@ -43,4 +45,9 @@ _mm_popcnt_u64 (unsigned long long __X)
 }
 #endif
 
+#ifdef __DISABLE_POPCNT__
+#undef __DISABLE_POPCNT__
+#pragma GCC pop_options
+#endif  /* __DISABLE_POPCNT__ */
+
 #endif /* _POPCNTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/prfchwintrin.h b/gcc/config/i386/prfchwintrin.h
index b8011bb6bd1..73aa4cac7af 100644
--- a/gcc/config/i386/prfchwintrin.h
+++ b/gcc/config/i386/prfchwintrin.h
@@ -26,17 +26,24 @@
 #endif
 
 
-#if !defined (__PRFCHW__) && !defined (__3dNOW__)
-# error "PRFCHW instruction not enabled"
-#endif /* __PRFCHW__ or  __3dNOW__*/
-
 #ifndef _PRFCHWINTRIN_H_INCLUDED
 #define _PRFCHWINTRIN_H_INCLUDED
 
+#ifndef __PRFCHW__
+#pragma GCC push_options
+#pragma GCC target("prfchw")
+#define __DISABLE_PRFCHW__
+#endif /* __PRFCHW__ */
+
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _m_prefetchw (void *__P)
 {
   __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
 }
 
+#ifdef __DISABLE_PRFCHW__
+#undef __DISABLE_PRFCHW__
+#pragma GCC pop_options
+#endif /* __DISABLE_PRFCHW__ */
+
 #endif /* _PRFCHWINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
index f30c237a6cb..3d040ab3af6 100644
--- a/gcc/config/i386/rdseedintrin.h
+++ b/gcc/config/i386/rdseedintrin.h
@@ -25,12 +25,15 @@
 # error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
+#ifndef _RDSEEDINTRIN_H_INCLUDED
+#define _RDSEEDINTRIN_H_INCLUDED
+
 #ifndef __RDSEED__
-# error "RDSEED instruction not enabled"
+#pragma GCC push_options
+#pragma GCC target("rdseed")
+#define __DISABLE_RDSEED__
 #endif /* __RDSEED__ */
 
-#ifndef _RDSEEDINTRIN_H_INCLUDED
-#define _RDSEEDINTRIN_H_INCLUDED
 
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -55,4 +58,9 @@ _rdseed64_step (unsigned long long *p)
 }
 #endif
 
+#ifdef __DISABLE_RDSEED__
+#undef __DISABLE_RDSEED__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDSEED__ */
+
 #endif /* _RDSEEDINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
index 003a7718db3..eb2812fd82e 100644
--- a/gcc/config/i386/rtmintrin.h
+++ b/gcc/config/i386/rtmintrin.h
@@ -25,13 +25,15 @@
 # error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
 #endif
 
-#ifndef __RTM__
-# error "RTM instruction set not enabled"
-#endif /* __RTM__ */
-
 #ifndef _RTMINTRIN_H_INCLUDED
 #define _RTMINTRIN_H_INCLUDED
 
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
 #define _XBEGIN_STARTED		(~0u)
 #define _XABORT_EXPLICIT	(1 << 0)
 #define _XABORT_RETRY		(1 << 1)
@@ -74,4 +76,9 @@ _xabort (const unsigned int imm)
 #define _xabort(N)  __builtin_ia32_xabort (N)
 #endif /* __OPTIMIZE__ */
 
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
 #endif /* _RTMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/slm.md b/gcc/config/i386/slm.md
new file mode 100644
index 00000000000..3ac919e372c
--- /dev/null
+++ b/gcc/config/i386/slm.md
@@ -0,0 +1,758 @@
+;; Slivermont(SLM) Scheduling
+;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Silvermont has 2 out-of-order IEC, 2 in-order FEC and 1 in-order MEC.
+
+
+(define_automaton "slm")
+
+;;  EU: Execution Unit
+;;  Silvermont EUs are connected by port 0 or port 1.
+
+;;  SLM has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "slm-port-0,slm-port-1" "slm")
+
+(define_cpu_unit "slm-ieu-0, slm-ieu-1,
+                  slm-imul, slm-feu-0, slm-feu-1"
+                  "slm")
+
+(define_reservation "slm-all-ieu" "(slm-ieu-0 + slm-ieu-1 + slm-imul)")
+(define_reservation "slm-all-feu" "(slm-feu-0 + slm-feu-1)")
+(define_reservation "slm-all-eu" "(slm-all-ieu + slm-all-feu)")
+(define_reservation "slm-fp-0" "(slm-port-0 + slm-feu-0)")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "slm-port-either" "(slm-port-0 | slm-port-1)"
+(define_reservation "slm-port-dual" "(slm-port-0 + slm-port-1)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "slm-fmul-5c"
+                    "(slm-port-0 + slm-feu-0), slm-feu-0, nothing*3")
+(define_reservation "slm-fmul-4c" "(slm-port-0 + slm-feu-0), nothing*3")
+
+;;; fadd can has 3 cycles latency depends on instruction forms
+(define_reservation "slm-fadd-3c" "(slm-port-1 + slm-feu-1), nothing*2")
+(define_reservation "slm-fadd-4c"
+                    "(slm-port-1 + slm-feu-1), slm-feu-1, nothing*2")
+
+;;; imul insn has 3 cycles latency for SI operands
+(define_reservation "slm-imul-32"
+                    "(slm-port-1 + slm-imul), nothing*2")
+(define_reservation "slm-imul-mem-32"
+                    "(slm-port-1 + slm-imul + slm-port-0), nothing*2")
+;;; imul has 4 cycles latency for DI operands with 1/2 tput
+(define_reservation "slm-imul-64"
+                    "(slm-port-1 + slm-imul), slm-imul, nothing*2")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
+;;; instruction forms
+(define_reservation "slm-dual-1c" "(slm-port-dual + slm-all-eu)")
+(define_reservation "slm-dual-2c"
+                    "(slm-port-dual + slm-all-eu, nothing)")
+
+;;; Most of simple ALU instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "slm-simple-0" "(slm-port-0 + slm-ieu-0)")
+(define_reservation "slm-simple-1" "(slm-port-1 + slm-ieu-1)")
+(define_reservation "slm-simple-either" "(slm-simple-0 | slm-simple-1)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "slm-complex" "(slm-port-dual + slm-all-eu)")
+
+(define_insn_reservation  "slm_other" 9
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "!jeu")))
+  "slm-complex, slm-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation  "slm_other_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "jeu")))
+  "slm-dual-1c")
+
+(define_insn_reservation  "slm_multi" 9
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "multi"))
+  "slm-complex, slm-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "slm_alu" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "0"))))
+  "slm-simple-either")
+
+;; Normal alu insns without carry, but use MEC.
+(define_insn_reservation  "slm_alu_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                 (eq_attr "use_carry" "0"))))
+  "slm-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "slm_alu_carry" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "1"))))
+  "slm-simple-either, nothing")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "slm_alu_carry_mem" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                (eq_attr "use_carry" "1"))))
+  "slm-simple-either, nothing")
+
+(define_insn_reservation  "slm_alu1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))
+  "slm-simple-either")
+
+;; bsf and bsf insn
+(define_insn_reservation  "slm_alu1_1" 10
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none") (eq_attr "prefix_0f" "1")))
+  "slm-simple-1, slm-ieu-1*9")
+
+(define_insn_reservation  "slm_alu1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_negnot" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_negnot_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_imov" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_imov_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+;; 16<-16, 32<-32
+(define_insn_reservation  "slm_imovx" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "slm-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation  "slm_imovx_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation  "slm_imovx_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation  "slm_imovx_2_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "slm-simple-0")
+
+;; 16<-8
+(define_insn_reservation  "slm_imovx_3" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imovx")
+            (and (match_operand:HI 0 "register_operand")
+                 (match_operand:QI 1 "general_operand"))))
+  "slm-simple-0, nothing*2")
+
+(define_insn_reservation  "slm_lea" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "!HI")))
+  "slm-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation  "slm_lea_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "HI")))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_incdec" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_incdec_mem" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0, nothing*2")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation  "slm_ishift" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+  "slm-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation  "slm_ishift_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+  "slm-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 4 cycles
+(define_insn_reservation  "slm_ishift_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift")
+            (eq_attr "prefix_0f" "1")))
+  "slm-complex, slm-all-eu*3")
+
+(define_insn_reservation  "slm_ishift1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_ishift1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_rotate1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_imul" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+  "slm-imul-32")
+
+(define_insn_reservation  "slm_imul_mem" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+  "slm-imul-mem-32")
+
+;; latency set to 4 as common 64x64 imul with 1/2 tput
+(define_insn_reservation  "slm_imul_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "imul")
+            (eq_attr "mode" "!SI")))
+  "slm-imul-64")
+
+(define_insn_reservation  "slm_idiv" 33
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "idiv"))
+  "slm-complex, slm-all-eu*16, nothing*16")
+
+(define_insn_reservation  "slm_icmp" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_icmp_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_test" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_test_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_ibr" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "!load")))
+  "slm-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation  "slm_ibr_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "load")))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_setcc" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "!store")))
+  "slm-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation  "slm_setcc_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "store")))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_icmov" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "none")))
+  "slm-simple-either, nothing")
+
+(define_insn_reservation  "slm_icmov_mem" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0, nothing")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "slm_push" 2
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "push"))
+  "slm-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation  "slm_pop" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "DI")))
+  "slm-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation  "slm_pop_2" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "!DI")))
+  "slm-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "slm_call" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "call"))
+  "slm-dual-1c")
+
+(define_insn_reservation  "slm_callv" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "callv"))
+  "slm-dual-1c")
+
+(define_insn_reservation  "slm_leave" 3
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "leave"))
+  "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation  "slm_str" 3
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "str"))
+  "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation  "slm_sselog" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_sselog_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_sselog1" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "none")))
+  "slm-simple-0")
+
+(define_insn_reservation  "slm_sselog1_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "!none")))
+  "slm-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation  "slm_sseiadd" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "!simul")
+                      (eq_attr "atom_unit" "!complex")))))
+  "slm-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation  "slm_sseiadd_2" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "DI")))))
+  "slm-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation  "slm_sseiadd_3" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "TI")))))
+  "slm-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation  "slm_sseiadd_4" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseiadd")
+            (ior (match_operand:V2DI 0 "register_operand")
+                 (eq_attr "atom_unit" "complex"))))
+  "slm-fadd-4c")
+
+;; if immediate op.
+(define_insn_reservation  "slm_sseishft" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "!sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "slm-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation  "slm_sseishft_2" 1
+  (and (eq_attr "cpu" "slm")
+       (ior (eq_attr "type" "sseishft1")
+	    (and (eq_attr "type" "sseishft")
+		 (and (eq_attr "atom_unit" "sishuf")
+		      (match_operand 2 "immediate_operand")))))
+  "slm-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation  "slm_sseishft_3" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseishft")
+            (not (match_operand 2 "immediate_operand"))))
+  "slm-complex, slm-all-eu")
+
+(define_insn_reservation  "slm_sseimul" 5
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "sseimul"))
+  "slm-fmul-5c")
+
+;; rcpss or rsqrtss
+(define_insn_reservation  "slm_sse" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+  "slm-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation  "slm_sse_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "movdup")))
+  "slm-simple-0")
+
+;; lfence
+(define_insn_reservation  "slm_sse_3" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "lfence")))
+  "slm-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation  "slm_sse_4" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (ior (eq_attr "atom_sse_attr" "fence")
+                 (eq_attr "atom_sse_attr" "prefetch"))))
+  "slm-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation  "slm_sse_5" 9
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sse")
+            (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+                      (eq_attr "atom_sse_attr" "mxcsr"))
+                 (and (eq_attr "atom_sse_attr" "rcp")
+                      (eq_attr "mode" "V4SF")))))
+  "slm-complex, slm-all-eu*7, nothing")
+
+;; xmm->xmm
+(define_insn_reservation  "slm_ssemov" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy")
+                 (match_operand 1 "register_operand" "xy"))))
+  "slm-simple-either")
+
+;; reg->xmm
+(define_insn_reservation  "slm_ssemov_2" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy")
+                 (match_operand 1 "register_operand" "r"))))
+  "slm-simple-0")
+
+;; xmm->reg
+(define_insn_reservation  "slm_ssemov_3" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "r")
+                 (match_operand 1 "register_operand" "xy"))))
+  "slm-simple-0, nothing*2")
+
+;; mov mem
+(define_insn_reservation  "slm_ssemov_4" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+  "slm-simple-0")
+
+;; movu mem
+(define_insn_reservation  "slm_ssemov_5" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemov")
+            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+  "slm-simple-0, nothing")
+
+;; no memory simple
+(define_insn_reservation  "slm_sseadd" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "slm-fadd-3c")
+
+;; memory simple
+(define_insn_reservation  "slm_sseadd_mem" 3
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "!none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "slm-fadd-3c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation  "slm_sseadd_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseadd")
+            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+  "slm-fadd-4c")
+
+;; Except dppd/dpps
+(define_insn_reservation  "slm_ssemul" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "!SF")))
+  "slm-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation  "slm_ssemul_2" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "SF")))
+  "slm-fmul-4c")
+
+(define_insn_reservation  "slm_ssecmp" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "ssecmp"))
+  "slm-simple-either")
+
+(define_insn_reservation  "slm_ssecomi" 1
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "ssecomi"))
+  "slm-simple-0")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "slm_ssecvt" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "register_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand")))))
+  "slm-fp-0, slm-feu-0, nothing*3")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "slm_ssecvt_mem" 5
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "memory_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand")))))
+"slm-fp-0, slm-feu-0, nothing*3")
+
+;; cvtpd2pi, cvtpi2pd
+(define_insn_reservation  "slm_ssecvt_1" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand"))
+                 (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V2DF 1 "register_operand")))))
+  "slm-fp-0, slm-feu-0")
+
+;; memory and cvtpd2pi, cvtpi2pd
+(define_insn_reservation  "slm_ssecvt_1_mem" 2
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand"))
+                 (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V2DF 1 "memory_operand")))))
+  "slm-fp-0, slm-feu-0")
+
+;; otherwise. 4 cycles average for cvtss2sd
+(define_insn_reservation  "slm_ssecvt_3" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "ssecvt")
+            (not (ior (and (match_operand:V2SI 0 "register_operand")
+                           (match_operand:V4SF 1 "nonimmediate_operand"))
+                      (and (match_operand:V4SF 0 "register_operand")
+                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
+  "slm-fp-0, nothing*3")
+
+;; memory and cvtsi2sd
+(define_insn_reservation  "slm_sseicvt" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseicvt")
+            (and (match_operand:V2DF 0 "register_operand")
+                 (match_operand:SI 1 "nonimmediate_operand"))))
+  "slm-fp-0")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation  "slm_sseicvt_2" 4
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "sseicvt")
+            (not (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:SI 1 "memory_operand")))))
+  "slm-fp-0, nothing*3")
+
+(define_insn_reservation  "slm_ssediv" 13
+  (and (eq_attr "cpu" "slm")
+       (eq_attr "type" "ssediv"))
+  "slm-fp-0, slm-feu-0*10, nothing*2")
+
+;; simple for fmov
+(define_insn_reservation  "slm_fmov" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "none")))
+  "slm-simple-either")
+
+;; simple for fmov
+(define_insn_reservation  "slm_fmov_mem" 1
+  (and (eq_attr "cpu" "slm")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "!none")))
+  "slm-simple-either")
+
+;; Define bypass here
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "slm_icmp, slm_test, slm_alu, slm_alu_carry,
+                  slm_alu1, slm_negnot, slm_incdec, slm_ishift,
+                  slm_ishift1, slm_rotate, slm_rotate1"
+                 "slm_icmov, slm_alu_carry")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "slm_lea"
+                 "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1"
+                 "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "slm_alu_carry,
+                  slm_alu,slm_alu1,slm_negnot,slm_imov,slm_imovx,
+                  slm_incdec,slm_ishift,slm_ishift1,slm_rotate,
+                  slm_rotate1, slm_setcc, slm_icmov, slm_pop,
+                  slm_alu_mem, slm_alu_carry_mem, slm_alu1_mem,
+                  slm_imovx_mem, slm_imovx_2_mem,
+                  slm_imov_mem, slm_icmov_mem, slm_fmov_mem"
+                 "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1,
+                  slm_ishift_mem, slm_ishift1_mem,
+                  slm_rotate_mem, slm_rotate1_mem"
+                 "ix86_dep_by_shift_count")
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index 3ae916ce5d3..20fa2ca2f94 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -27,14 +27,16 @@
 #ifndef _SMMINTRIN_H_INCLUDED
 #define _SMMINTRIN_H_INCLUDED
 
-#ifndef __SSE4_1__
-# error "SSE4.1 instruction set not enabled"
-#else
-
 /* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
    files.  */
 #include <tmmintrin.h>
 
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
 /* Rounding mode macros. */
 #define _MM_FROUND_TO_NEAREST_INT	0x00
 #define _MM_FROUND_TO_NEG_INF		0x01
@@ -582,7 +584,11 @@ _mm_stream_load_si128 (__m128i *__X)
   return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
 }
 
-#ifdef __SSE4_2__
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
 
 /* These macros specify the source data format.  */
 #define _SIDD_UBYTE_OPS			0x00
@@ -792,9 +798,29 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
 }
 
-#ifdef __POPCNT__
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
+
 #include <popcntintrin.h>
-#endif
+
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_1__ */
 
 /* Accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -823,8 +849,14 @@ _mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
 }
 #endif
 
-#endif /* __SSE4_2__ */
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
 
-#endif /* __SSE4_1__ */
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
 
 #endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming
index f615ad7a2af..ba076a7f49a 100644
--- a/gcc/config/i386/t-cygming
+++ b/gcc/config/i386/t-cygming
@@ -22,7 +22,7 @@ LIMITS_H_TEST = true
 
 winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
   $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
-  $(TM_P_H) $(HASHTAB_H) $(GGC_H) $(LTO_STREAMER_H)
+  $(TM_P_H) $(HASH_TABLE_H) $(GGC_H) $(LTO_STREAMER_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 	$(srcdir)/config/i386/winnt.c
 
diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix
index 43443e72a45..4d7b5987037 100644
--- a/gcc/config/i386/t-interix
+++ b/gcc/config/i386/t-interix
@@ -18,7 +18,7 @@
 
 winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
   $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
-  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+  $(TM_P_H) $(HASH_TABLE_H) $(GGC_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/i386/winnt.c
 
diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
index 07c4f77fdd6..9235d6c713d 100644
--- a/gcc/config/i386/tbmintrin.h
+++ b/gcc/config/i386/tbmintrin.h
@@ -25,13 +25,15 @@
 # error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __TBM__
-# error "TBM instruction set not enabled"
-#endif /* __TBM__ */
-
 #ifndef _TBMINTRIN_H_INCLUDED
 #define _TBMINTRIN_H_INCLUDED
 
+#ifndef __TBM__
+#pragma GCC push_options
+#pragma GCC target("tbm")
+#define __DISABLE_TBM__
+#endif /* __TBM__ */
+
 #ifdef __OPTIMIZE__
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __bextri_u32 (unsigned int __X, const unsigned int __I)
@@ -169,4 +171,10 @@ __tzmsk_u64 (unsigned long long __X)
 
 
 #endif /* __x86_64__  */
+
+#ifdef __DISABLE_TBM__
+#undef __DISABLE_TBM__
+#pragma GCC pop_options
+#endif /* __DISABLE_TBM__ */
+
 #endif /* _TBMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h
index 767b199d3c4..3f63b4f8934 100644
--- a/gcc/config/i386/tmmintrin.h
+++ b/gcc/config/i386/tmmintrin.h
@@ -27,13 +27,15 @@
 #ifndef _TMMINTRIN_H_INCLUDED
 #define _TMMINTRIN_H_INCLUDED
 
-#ifndef __SSSE3__
-# error "SSSE3 instruction set not enabled"
-#else
-
 /* We need definitions from the SSE3, SSE2 and SSE header files*/
 #include <pmmintrin.h>
 
+#ifndef __SSSE3__
+#pragma GCC push_options
+#pragma GCC target("ssse3")
+#define __DISABLE_SSSE3__
+#endif /* __SSSE3__ */
+
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_hadd_epi16 (__m128i __X, __m128i __Y)
 {
@@ -239,6 +241,9 @@ _mm_abs_pi32 (__m64 __X)
   return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
 }
 
-#endif /* __SSSE3__ */
+#ifdef __DISABLE_SSSE3__
+#undef __DISABLE_SSSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSSE3__ */
 
 #endif /* _TMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index f0f972c56d9..c9e3aa98a37 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -30,7 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "flags.h"
 #include "tm_p.h"
 #include "diagnostic-core.h"
-#include "hashtab.h"
+#include "hash-table.h"
 #include "langhooks.h"
 #include "ggc.h"
 #include "target.h"
@@ -449,7 +449,7 @@ i386_pe_reloc_rw_mask (void)
 unsigned int
 i386_pe_section_type_flags (tree decl, const char *name, int reloc)
 {
-  static htab_t htab;
+  static hash_table <pointer_hash <unsigned int> > htab;
   unsigned int flags;
   unsigned int **slot;
 
@@ -460,8 +460,8 @@ i386_pe_section_type_flags (tree decl, const char *name, int reloc)
   /* The names we put in the hashtable will always be the unique
      versions given to us by the stringtable, so we can just use
      their addresses as the keys.  */
-  if (!htab)
-    htab = htab_create (31, htab_hash_pointer, htab_eq_pointer, NULL);
+  if (!htab.is_created ())
+    htab.create (31);
 
   if (decl && TREE_CODE (decl) == FUNCTION_DECL)
     flags = SECTION_CODE;
@@ -480,7 +480,7 @@ i386_pe_section_type_flags (tree decl, const char *name, int reloc)
     flags |= SECTION_LINKONCE;
 
   /* See if we already have an entry for this section.  */
-  slot = (unsigned int **) htab_find_slot (htab, name, INSERT);
+  slot = htab.find_slot ((unsigned int *)name, INSERT);
   if (!*slot)
     {
       *slot = (unsigned int *) xmalloc (sizeof (unsigned int));
@@ -714,12 +714,29 @@ i386_pe_record_stub (const char *name)
 
 #ifdef CXX_WRAP_SPEC_LIST
 
+/* Hashtable helpers.  */
+
+struct wrapped_symbol_hasher : typed_noop_remove <char>
+{
+  typedef char value_type;
+  typedef char compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+  static inline void remove (value_type *);
+};
+
+inline hashval_t
+wrapped_symbol_hasher::hash (const value_type *v)
+{
+  return htab_hash_string (v);
+}
+
 /*  Hash table equality helper function.  */
 
-static int
-wrapper_strcmp (const void *x, const void *y)
+inline bool
+wrapped_symbol_hasher::equal (const value_type *x, const compare_type *y)
 {
-  return !strcmp ((const char *) x, (const char *) y);
+  return !strcmp (x, y);
 }
 
 /* Search for a function named TARGET in the list of library wrappers
@@ -733,7 +750,7 @@ static const char *
 i386_find_on_wrapper_list (const char *target)
 {
   static char first_time = 1;
-  static htab_t wrappers;
+  static hash_table <wrapped_symbol_hasher> wrappers;
 
   if (first_time)
     {
@@ -746,8 +763,7 @@ i386_find_on_wrapper_list (const char *target)
       char *bufptr;
       /* Breaks up the char array into separated strings
          strings and enter them into the hash table.  */
-      wrappers = htab_create_alloc (8, htab_hash_string, wrapper_strcmp,
-	0, xcalloc, free);
+      wrappers.create (8);
       for (bufptr = wrapper_list_buffer; *bufptr; ++bufptr)
 	{
 	  char *found = NULL;
@@ -760,12 +776,12 @@ i386_find_on_wrapper_list (const char *target)
 	  if (*bufptr)
 	    *bufptr = 0;
 	  if (found)
-	    *htab_find_slot (wrappers, found, INSERT) = found;
+	    *wrappers.find_slot (found, INSERT) = found;
 	}
       first_time = 0;
     }
 
-  return (const char *) htab_find (wrappers, target);
+  return wrappers.find (target);
 }
 
 #endif /* CXX_WRAP_SPEC_LIST */
diff --git a/gcc/config/i386/wmmintrin.h b/gcc/config/i386/wmmintrin.h
index 93c24f41ce6..defcfd82acc 100644
--- a/gcc/config/i386/wmmintrin.h
+++ b/gcc/config/i386/wmmintrin.h
@@ -30,13 +30,14 @@
 /* We need definitions from the SSE2 header file.  */
 #include <emmintrin.h>
 
-#if !defined (__AES__) && !defined (__PCLMUL__)
-# error "AES/PCLMUL instructions not enabled"
-#else
-
 /* AES */
 
-#ifdef __AES__
+#ifndef __AES__
+#pragma GCC push_options
+#pragma GCC target("aes")
+#define __DISABLE_AES__
+#endif /* __AES__ */
+
 /* Performs 1 round of AES decryption of the first m128i using 
    the second m128i as a round key.  */
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -92,11 +93,20 @@ _mm_aeskeygenassist_si128 (__m128i __X, const int __C)
   ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X),	\
 						(int)(C)))
 #endif
-#endif  /* __AES__ */
+
+#ifdef __DISABLE_AES__
+#undef __DISABLE_AES__
+#pragma GCC pop_options
+#endif /* __DISABLE_AES__ */
 
 /* PCLMUL */
 
-#ifdef __PCLMUL__
+#ifndef __PCLMUL__
+#pragma GCC push_options
+#pragma GCC target("pclmul")
+#define __DISABLE_PCLMUL__
+#endif /* __PCLMUL__ */
+
 /* Performs carry-less integer multiplication of 64-bit halves of
    128-bit input operands.  The third parameter inducates which 64-bit
    haves of the input parameters v1 and v2 should be used. It must be
@@ -113,8 +123,10 @@ _mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
   ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X),		\
 					  (__v2di)(__m128i)(Y), (int)(I)))
 #endif
-#endif  /* __PCLMUL__  */
 
-#endif /* __AES__/__PCLMUL__ */
+#ifdef __DISABLE_PCLMUL__
+#undef __DISABLE_PCLMUL__
+#pragma GCC pop_options
+#endif /* __DISABLE_PCLMUL__ */
 
 #endif /* _WMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 5bf29d5d361..46ced969a9f 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -26,96 +26,52 @@
 
 #include <ia32intrin.h>
 
-#ifdef __MMX__
 #include <mmintrin.h>
-#endif
 
-#ifdef __SSE__
 #include <xmmintrin.h>
-#endif
 
-#ifdef __SSE2__
 #include <emmintrin.h>
-#endif
 
-#ifdef __SSE3__
 #include <pmmintrin.h>
-#endif
 
-#ifdef __SSSE3__
 #include <tmmintrin.h>
-#endif
 
-#ifdef __SSE4A__
 #include <ammintrin.h>
-#endif
 
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
 #include <smmintrin.h>
-#endif
 
-#if defined (__AES__) || defined (__PCLMUL__)
 #include <wmmintrin.h>
-#endif
 
 /* For including AVX instructions */
 #include <immintrin.h>
 
-#ifdef __3dNOW__
 #include <mm3dnow.h>
-#endif
 
-#ifdef __FMA4__
 #include <fma4intrin.h>
-#endif
 
-#ifdef __XOP__
 #include <xopintrin.h>
-#endif
 
-#ifdef __LWP__
 #include <lwpintrin.h>
-#endif
 
-#ifdef __BMI__
 #include <bmiintrin.h>
-#endif
 
-#ifdef __BMI2__
 #include <bmi2intrin.h>
-#endif
 
-#ifdef __TBM__
 #include <tbmintrin.h>
-#endif
 
-#ifdef __LZCNT__
 #include <lzcntintrin.h>
-#endif
 
-#ifdef __POPCNT__
 #include <popcntintrin.h>
-#endif
 
-#ifdef __RDSEED__
 #include <rdseedintrin.h>
-#endif
 
-#ifdef __PRFCHW__
 #include <prfchwintrin.h>
-#endif
 
-#ifdef __FXSR__
 #include <fxsrintrin.h>
-#endif
 
-#ifdef __XSAVE__
 #include <xsaveintrin.h>
-#endif
 
-#ifdef __XSAVEOPT__
 #include <xsaveoptintrin.h>
-#endif
 
 #include <adxintrin.h>
 
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index a223562490e..14d1e7fe2b0 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -27,16 +27,18 @@
 #ifndef _XMMINTRIN_H_INCLUDED
 #define _XMMINTRIN_H_INCLUDED
 
-#ifndef __SSE__
-# error "SSE instruction set not enabled"
-#else
-
 /* We need type definitions from the MMX header file.  */
 #include <mmintrin.h>
 
 /* Get _mm_malloc () and _mm_free ().  */
 #include <mm_malloc.h>
 
+#ifndef __SSE__
+#pragma GCC push_options
+#pragma GCC target("sse")
+#define __DISABLE_SSE__
+#endif /* __SSE__ */
+
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
@@ -1242,9 +1244,11 @@ do {									\
 } while (0)
 
 /* For backward source compatibility.  */
-#ifdef __SSE2__
 # include <emmintrin.h>
-#endif
 
-#endif /* __SSE__ */
+#ifdef __DISABLE_SSE__
+#undef __DISABLE_SSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE__ */
+
 #endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h
index 66b0f0de5c9..e0d148a0818 100644
--- a/gcc/config/i386/xopintrin.h
+++ b/gcc/config/i386/xopintrin.h
@@ -28,12 +28,14 @@
 #ifndef _XOPMMINTRIN_H_INCLUDED
 #define _XOPMMINTRIN_H_INCLUDED
 
-#ifndef __XOP__
-# error "XOP instruction set not enabled"
-#else
-
 #include <fma4intrin.h>
 
+#ifndef __XOP__
+#pragma GCC push_options
+#pragma GCC target("xop")
+#define __DISABLE_XOP__
+#endif /* __XOP__ */
+
 /* Integer multiply/add intructions. */
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
@@ -830,6 +832,9 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
  					  (int)(I)))
 #endif /* __OPTIMIZE__ */
 
-#endif /* __XOP__ */
+#ifdef __DISABLE_XOP__
+#undef __DISABLE_XOP__
+#pragma GCC pop_options
+#endif /* __DISABLE_XOP__ */
 
 #endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
index f5665894084..31c17b1d2c5 100644
--- a/gcc/config/i386/xsaveintrin.h
+++ b/gcc/config/i386/xsaveintrin.h
@@ -28,6 +28,12 @@
 #ifndef _XSAVEINTRIN_H_INCLUDED
 #define _XSAVEINTRIN_H_INCLUDED
 
+#ifndef __XSAVE__
+#pragma GCC push_options
+#pragma GCC target("xsave")
+#define __DISABLE_XSAVE__
+#endif /* __XSAVE__ */
+
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _xsave (void *__P, long long __M)
@@ -58,4 +64,9 @@ _xrstor64 (void *__P, long long __M)
 }
 #endif
 
+#ifdef __DISABLE_XSAVE__
+#undef __DISABLE_XSAVE__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVE__ */
+
 #endif /* _XSAVEINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
index 0d73e341f3f..aa9538da33e 100644
--- a/gcc/config/i386/xsaveoptintrin.h
+++ b/gcc/config/i386/xsaveoptintrin.h
@@ -28,6 +28,12 @@
 #ifndef _XSAVEOPTINTRIN_H_INCLUDED
 #define _XSAVEOPTINTRIN_H_INCLUDED
 
+#ifndef __XSAVEOPT__
+#pragma GCC push_options
+#pragma GCC target("xsaveopt")
+#define __DISABLE_XSAVEOPT__
+#endif /* __XSAVEOPT__ */
+
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _xsaveopt (void *__P, long long __M)
@@ -44,4 +50,9 @@ _xsaveopt64 (void *__P, long long __M)
 }
 #endif
 
+#ifdef __DISABLE_XSAVEOPT__
+#undef __DISABLE_XSAVEOPT__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVEOPT__ */
+
 #endif /* _XSAVEOPTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
index c82fb7a61ae..a6afa896b4f 100644
--- a/gcc/config/i386/xtestintrin.h
+++ b/gcc/config/i386/xtestintrin.h
@@ -25,13 +25,15 @@
 # error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
 #endif
 
-#ifndef __RTM__
-# error "RTM instruction set not enabled"
-#endif /* __RTM__ */
-
 #ifndef _XTESTINTRIN_H_INCLUDED
 #define _XTESTINTRIN_H_INCLUDED
 
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
 /* Return non-zero if the instruction executes inside an RTM or HLE code
    region.  Return zero otherwise.   */
 extern __inline int
@@ -41,4 +43,9 @@ _xtest (void)
   return __builtin_ia32_xtest ();
 }
 
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
 #endif /* _XTESTINTRIN_H_INCLUDED */
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 144cf7ee5ee..a128b19c7ca 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "target-def.h"
 #include "common/common-target.h"
 #include "tm_p.h"
-#include "hashtab.h"
+#include "hash-table.h"
 #include "langhooks.h"
 #include "gimple.h"
 #include "intl.h"
@@ -170,7 +170,7 @@ static ds_t ia64_get_insn_spec_ds (rtx);
 static ds_t ia64_get_insn_checked_ds (rtx);
 static bool ia64_skip_rtx_p (const_rtx);
 static int ia64_speculate_insn (rtx, ds_t, rtx *);
-static bool ia64_needs_block_p (int);
+static bool ia64_needs_block_p (ds_t);
 static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
 static int ia64_spec_check_p (rtx);
 static int ia64_spec_check_src_p (rtx);
@@ -257,8 +257,6 @@ static struct bundle_state *get_free_bundle_state (void);
 static void free_bundle_state (struct bundle_state *);
 static void initiate_bundle_states (void);
 static void finish_bundle_states (void);
-static unsigned bundle_state_hash (const void *);
-static int bundle_state_eq_p (const void *, const void *);
 static int insert_bundle_state (struct bundle_state *);
 static void initiate_bundle_state_table (void);
 static void finish_bundle_state_table (void);
@@ -8341,9 +8339,7 @@ ia64_needs_block_p (ds_t ts)
   return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
 }
 
-/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
-   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
-   Otherwise, generate a simple check.  */
+/* Generate (or regenerate) a recovery check for INSN.  */
 static rtx
 ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
 {
@@ -8528,18 +8524,21 @@ finish_bundle_states (void)
     }
 }
 
-/* Hash table of the bundle states.  The key is dfa_state and insn_num
-   of the bundle states.  */
+/* Hashtable helpers.  */
 
-static htab_t bundle_state_table;
+struct bundle_state_hasher : typed_noop_remove <bundle_state>
+{
+  typedef bundle_state value_type;
+  typedef bundle_state compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
 
 /* The function returns hash of BUNDLE_STATE.  */
 
-static unsigned
-bundle_state_hash (const void *bundle_state)
+inline hashval_t
+bundle_state_hasher::hash (const value_type *state)
 {
-  const struct bundle_state *const state
-    = (const struct bundle_state *) bundle_state;
   unsigned result, i;
 
   for (result = i = 0; i < dfa_state_size; i++)
@@ -8550,19 +8549,20 @@ bundle_state_hash (const void *bundle_state)
 
 /* The function returns nonzero if the bundle state keys are equal.  */
 
-static int
-bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
+inline bool
+bundle_state_hasher::equal (const value_type *state1,
+			    const compare_type *state2)
 {
-  const struct bundle_state *const state1
-    = (const struct bundle_state *) bundle_state_1;
-  const struct bundle_state *const state2
-    = (const struct bundle_state *) bundle_state_2;
-
   return (state1->insn_num == state2->insn_num
 	  && memcmp (state1->dfa_state, state2->dfa_state,
 		     dfa_state_size) == 0);
 }
 
+/* Hash table of the bundle states.  The key is dfa_state and insn_num
+   of the bundle states.  */
+
+static hash_table <bundle_state_hasher> bundle_state_table;
+
 /* The function inserts the BUNDLE_STATE into the hash table.  The
    function returns nonzero if the bundle has been inserted into the
    table.  The table contains the best bundle state with given key.  */
@@ -8570,39 +8570,35 @@ bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
 static int
 insert_bundle_state (struct bundle_state *bundle_state)
 {
-  void **entry_ptr;
+  struct bundle_state **entry_ptr;
 
-  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
+  entry_ptr = bundle_state_table.find_slot (bundle_state, INSERT);
   if (*entry_ptr == NULL)
     {
       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
-      *entry_ptr = (void *) bundle_state;
+      *entry_ptr = bundle_state;
       return TRUE;
     }
-  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
-	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
-	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
+  else if (bundle_state->cost < (*entry_ptr)->cost
+	   || (bundle_state->cost == (*entry_ptr)->cost
+	       && ((*entry_ptr)->accumulated_insns_num
 		   > bundle_state->accumulated_insns_num
-		   || (((struct bundle_state *)
-			*entry_ptr)->accumulated_insns_num
+		   || ((*entry_ptr)->accumulated_insns_num
 		       == bundle_state->accumulated_insns_num
-		       && (((struct bundle_state *)
-			    *entry_ptr)->branch_deviation
+		       && ((*entry_ptr)->branch_deviation
 			   > bundle_state->branch_deviation
-			   || (((struct bundle_state *)
-				*entry_ptr)->branch_deviation
+			   || ((*entry_ptr)->branch_deviation
 			       == bundle_state->branch_deviation
-			       && ((struct bundle_state *)
-				   *entry_ptr)->middle_bundle_stops
+			       && (*entry_ptr)->middle_bundle_stops
 			       > bundle_state->middle_bundle_stops))))))
 
     {
       struct bundle_state temp;
 
-      temp = *(struct bundle_state *) *entry_ptr;
-      *(struct bundle_state *) *entry_ptr = *bundle_state;
-      ((struct bundle_state *) *entry_ptr)->next = temp.next;
+      temp = **entry_ptr;
+      **entry_ptr = *bundle_state;
+      (*entry_ptr)->next = temp.next;
       *bundle_state = temp;
     }
   return FALSE;
@@ -8613,8 +8609,7 @@ insert_bundle_state (struct bundle_state *bundle_state)
 static void
 initiate_bundle_state_table (void)
 {
-  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
-				    (htab_del) 0);
+  bundle_state_table.create (50);
 }
 
 /* Finish work with the hash table.  */
@@ -8622,7 +8617,7 @@ initiate_bundle_state_table (void)
 static void
 finish_bundle_state_table (void)
 {
-  htab_delete (bundle_state_table);
+  bundle_state_table.dispose ();
 }
 
 
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64
index 5c3ac644be3..b009cdf2bc5 100644
--- a/gcc/config/ia64/t-ia64
+++ b/gcc/config/ia64/t-ia64
@@ -24,4 +24,5 @@ ia64-c.o: $(srcdir)/config/ia64/ia64-c.c $(CONFIG_H) $(SYSTEM_H) \
 # genattrtab generates very long string literals.
 insn-attrtab.o-warn = -Wno-error
 
-ia64.o: debug.h $(PARAMS_H) sel-sched.h reload.h $(OPTS_H) dumpfile.h
+ia64.o: $(srcdir)/config/ia64/ia64.c debug.h $(PARAMS_H) sel-sched.h reload.h \
+	$(OPTS_H) dumpfile.h $(HASH_TABLE_H)
diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md
index ddef8cc495d..1fe6119d075 100644
--- a/gcc/config/mips/constraints.md
+++ b/gcc/config/mips/constraints.md
@@ -92,6 +92,12 @@
 ;; but the DSP version allows any accumulator target.
 (define_register_constraint "ka" "ISA_HAS_DSP_MULT ? ACC_REGS : MD_REGS")
 
+;; The register class to use for an allocatable division result.
+;; MIPS16 uses M16_REGS because LO is fixed.
+(define_register_constraint "kl"
+  "TARGET_MIPS16 ? M16_REGS : TARGET_BIG_ENDIAN ? MD1_REG : MD0_REG"
+  "@internal")
+
 (define_constraint "kf"
   "@internal"
   (match_operand 0 "force_to_mem_operand"))
diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def
index 9e5fd162189..a1c65915f78 100644
--- a/gcc/config/mips/mips-cpus.def
+++ b/gcc/config/mips/mips-cpus.def
@@ -43,7 +43,7 @@ MIPS_CPU ("mips4", PROCESSOR_R8000, 4, 0)
    that to a recommendation to avoid the instructions in code that
    isn't tuned to a specific processor.  */
 MIPS_CPU ("mips32", PROCESSOR_4KC, 32, PTF_AVOID_BRANCHLIKELY)
-MIPS_CPU ("mips32r2", PROCESSOR_M4K, 33, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("mips32r2", PROCESSOR_74KF2_1, 33, PTF_AVOID_BRANCHLIKELY)
 MIPS_CPU ("mips64", PROCESSOR_5KC, 64, PTF_AVOID_BRANCHLIKELY)
 /* ??? For now just tune the generic MIPS64r2 for 5KC as well.   */
 MIPS_CPU ("mips64r2", PROCESSOR_5KC, 65, PTF_AVOID_BRANCHLIKELY)
@@ -68,6 +68,7 @@ MIPS_CPU ("r4600", PROCESSOR_R4600, 3, 0)
 MIPS_CPU ("orion", PROCESSOR_R4600, 3, 0)
 MIPS_CPU ("r4650", PROCESSOR_R4650, 3, 0)
 MIPS_CPU ("r4700", PROCESSOR_R4700, 3, 0)
+MIPS_CPU ("r5900", PROCESSOR_R5900, 3, 0)
 /* ST Loongson 2E/2F processors.  */
 MIPS_CPU ("loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY)
 MIPS_CPU ("loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY)
@@ -94,6 +95,8 @@ MIPS_CPU ("4ksc", PROCESSOR_4KC, 32, 0)
 MIPS_CPU ("m4k", PROCESSOR_M4K, 33, 0)
 MIPS_CPU ("m14kc", PROCESSOR_M4K, 33, 0)
 MIPS_CPU ("m14k", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14ke", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14kec", PROCESSOR_M4K, 33, 0)
 MIPS_CPU ("4kec", PROCESSOR_4KC, 33, 0)
 MIPS_CPU ("4kem", PROCESSOR_4KC, 33, 0)
 MIPS_CPU ("4kep", PROCESSOR_4KP, 33, 0)
diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md
index 002c9992001..49a08689638 100644
--- a/gcc/config/mips/mips-dsp.md
+++ b/gcc/config/mips/mips-dsp.md
@@ -1131,8 +1131,7 @@
   "ISA_HAS_L<SHORT:SIZE><U>X"
   "l<SHORT:size><u>x\t%0,%2(%1)"
   [(set_attr "type"	"load")
-   (set_attr "mode"	"<GPR:MODE>")
-   (set_attr "length"	"4")])
+   (set_attr "mode"	"<GPR:MODE>")])
 
 (define_expand "mips_lhx"
   [(match_operand:SI 0 "register_operand")
@@ -1165,8 +1164,7 @@
   "ISA_HAS_L<GPR:SIZE>X"
   "l<GPR:size>x\t%0,%2(%1)"
   [(set_attr "type"	"load")
-   (set_attr "mode"	"<GPR:MODE>")
-   (set_attr "length"	"4")])
+   (set_attr "mode"	"<GPR:MODE>")])
 
 (define_insn "*mips_lw<u>x_<P:mode>_ext"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -1176,8 +1174,7 @@
   "ISA_HAS_LW<U>X && TARGET_64BIT"
   "lw<u>x\t%0,%2(%1)"
   [(set_attr "type"	"load")
-   (set_attr "mode"	"DI")
-   (set_attr "length"	"4")])
+   (set_attr "mode"	"DI")])
 
 ;; Table 2-8. MIPS DSP ASE Instructions: Branch
 ;; BPOSGE32
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index 9c70cc4324f..a22c7829b77 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -481,7 +481,7 @@
   operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
 }
   [(set_attr "type" "fcmp")
-   (set_attr "length" "8")
+   (set_attr "insn_count" "2")
    (set_attr "mode" "FPSW")])
 
 (define_insn_and_split "mips_cabs_cond_4s"
@@ -510,7 +510,7 @@
   operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
 }
   [(set_attr "type" "fcmp")
-   (set_attr "length" "8")
+   (set_attr "insn_count" "2")
    (set_attr "mode" "FPSW")])
 
 
diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt
index 0d7fa26510d..409356e1af7 100644
--- a/gcc/config/mips/mips-tables.opt
+++ b/gcc/config/mips/mips-tables.opt
@@ -208,425 +208,437 @@ EnumValue
 Enum(mips_arch_opt_value) String(4700) Value(22)
 
 EnumValue
-Enum(mips_arch_opt_value) String(loongson2e) Value(23) Canonical
+Enum(mips_arch_opt_value) String(r5900) Value(23) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(loongson2f) Value(24) Canonical
+Enum(mips_arch_opt_value) String(5900) Value(23)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r8000) Value(25) Canonical
+Enum(mips_arch_opt_value) String(loongson2e) Value(24) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r8k) Value(25)
+Enum(mips_arch_opt_value) String(loongson2f) Value(25) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(8000) Value(25)
+Enum(mips_arch_opt_value) String(r8000) Value(26) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(8k) Value(25)
+Enum(mips_arch_opt_value) String(r8k) Value(26)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r10000) Value(26) Canonical
+Enum(mips_arch_opt_value) String(8000) Value(26)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r10k) Value(26)
+Enum(mips_arch_opt_value) String(8k) Value(26)
 
 EnumValue
-Enum(mips_arch_opt_value) String(10000) Value(26)
+Enum(mips_arch_opt_value) String(r10000) Value(27) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(10k) Value(26)
+Enum(mips_arch_opt_value) String(r10k) Value(27)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r12000) Value(27) Canonical
+Enum(mips_arch_opt_value) String(10000) Value(27)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r12k) Value(27)
+Enum(mips_arch_opt_value) String(10k) Value(27)
 
 EnumValue
-Enum(mips_arch_opt_value) String(12000) Value(27)
+Enum(mips_arch_opt_value) String(r12000) Value(28) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(12k) Value(27)
+Enum(mips_arch_opt_value) String(r12k) Value(28)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r14000) Value(28) Canonical
+Enum(mips_arch_opt_value) String(12000) Value(28)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r14k) Value(28)
+Enum(mips_arch_opt_value) String(12k) Value(28)
 
 EnumValue
-Enum(mips_arch_opt_value) String(14000) Value(28)
+Enum(mips_arch_opt_value) String(r14000) Value(29) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(14k) Value(28)
+Enum(mips_arch_opt_value) String(r14k) Value(29)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r16000) Value(29) Canonical
+Enum(mips_arch_opt_value) String(14000) Value(29)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r16k) Value(29)
+Enum(mips_arch_opt_value) String(14k) Value(29)
 
 EnumValue
-Enum(mips_arch_opt_value) String(16000) Value(29)
+Enum(mips_arch_opt_value) String(r16000) Value(30) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(16k) Value(29)
+Enum(mips_arch_opt_value) String(r16k) Value(30)
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5000) Value(30) Canonical
+Enum(mips_arch_opt_value) String(16000) Value(30)
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5k) Value(30)
+Enum(mips_arch_opt_value) String(16k) Value(30)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5000) Value(30)
+Enum(mips_arch_opt_value) String(vr5000) Value(31) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(5k) Value(30)
+Enum(mips_arch_opt_value) String(vr5k) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5000) Value(30)
+Enum(mips_arch_opt_value) String(5000) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5k) Value(30)
+Enum(mips_arch_opt_value) String(5k) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5400) Value(31) Canonical
+Enum(mips_arch_opt_value) String(r5000) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5400) Value(31)
+Enum(mips_arch_opt_value) String(r5k) Value(31)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5400) Value(31)
+Enum(mips_arch_opt_value) String(vr5400) Value(32) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(vr5500) Value(32) Canonical
+Enum(mips_arch_opt_value) String(5400) Value(32)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5500) Value(32)
+Enum(mips_arch_opt_value) String(r5400) Value(32)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5500) Value(32)
+Enum(mips_arch_opt_value) String(vr5500) Value(33) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm7000) Value(33) Canonical
+Enum(mips_arch_opt_value) String(5500) Value(33)
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm7k) Value(33)
+Enum(mips_arch_opt_value) String(r5500) Value(33)
 
 EnumValue
-Enum(mips_arch_opt_value) String(7000) Value(33)
+Enum(mips_arch_opt_value) String(rm7000) Value(34) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(7k) Value(33)
+Enum(mips_arch_opt_value) String(rm7k) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r7000) Value(33)
+Enum(mips_arch_opt_value) String(7000) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r7k) Value(33)
+Enum(mips_arch_opt_value) String(7k) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm9000) Value(34) Canonical
+Enum(mips_arch_opt_value) String(r7000) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(rm9k) Value(34)
+Enum(mips_arch_opt_value) String(r7k) Value(34)
 
 EnumValue
-Enum(mips_arch_opt_value) String(9000) Value(34)
+Enum(mips_arch_opt_value) String(rm9000) Value(35) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(9k) Value(34)
+Enum(mips_arch_opt_value) String(rm9k) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r9000) Value(34)
+Enum(mips_arch_opt_value) String(9000) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r9k) Value(34)
+Enum(mips_arch_opt_value) String(9k) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kc) Value(35) Canonical
+Enum(mips_arch_opt_value) String(r9000) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kc) Value(35)
+Enum(mips_arch_opt_value) String(r9k) Value(35)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4km) Value(36) Canonical
+Enum(mips_arch_opt_value) String(4kc) Value(36) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4km) Value(36)
+Enum(mips_arch_opt_value) String(r4kc) Value(36)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kp) Value(37) Canonical
+Enum(mips_arch_opt_value) String(4km) Value(37) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kp) Value(37)
+Enum(mips_arch_opt_value) String(r4km) Value(37)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4ksc) Value(38) Canonical
+Enum(mips_arch_opt_value) String(4kp) Value(38) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4ksc) Value(38)
+Enum(mips_arch_opt_value) String(r4kp) Value(38)
 
 EnumValue
-Enum(mips_arch_opt_value) String(m4k) Value(39) Canonical
+Enum(mips_arch_opt_value) String(4ksc) Value(39) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(m14kc) Value(40) Canonical
+Enum(mips_arch_opt_value) String(r4ksc) Value(39)
 
 EnumValue
-Enum(mips_arch_opt_value) String(m14k) Value(41) Canonical
+Enum(mips_arch_opt_value) String(m4k) Value(40) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kec) Value(42) Canonical
+Enum(mips_arch_opt_value) String(m14kc) Value(41) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kec) Value(42)
+Enum(mips_arch_opt_value) String(m14k) Value(42) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kem) Value(43) Canonical
+Enum(mips_arch_opt_value) String(m14ke) Value(43) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kem) Value(43)
+Enum(mips_arch_opt_value) String(m14kec) Value(44) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(4kep) Value(44) Canonical
+Enum(mips_arch_opt_value) String(4kec) Value(45) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4kep) Value(44)
+Enum(mips_arch_opt_value) String(r4kec) Value(45)
 
 EnumValue
-Enum(mips_arch_opt_value) String(4ksd) Value(45) Canonical
+Enum(mips_arch_opt_value) String(4kem) Value(46) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r4ksd) Value(45)
+Enum(mips_arch_opt_value) String(r4kem) Value(46)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kc) Value(46) Canonical
+Enum(mips_arch_opt_value) String(4kep) Value(47) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kc) Value(46)
+Enum(mips_arch_opt_value) String(r4kep) Value(47)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kf2_1) Value(47) Canonical
+Enum(mips_arch_opt_value) String(4ksd) Value(48) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kf2_1) Value(47)
+Enum(mips_arch_opt_value) String(r4ksd) Value(48)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kf) Value(48) Canonical
+Enum(mips_arch_opt_value) String(24kc) Value(49) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kf) Value(48)
+Enum(mips_arch_opt_value) String(r24kc) Value(49)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kf1_1) Value(49) Canonical
+Enum(mips_arch_opt_value) String(24kf2_1) Value(50) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kf1_1) Value(49)
+Enum(mips_arch_opt_value) String(r24kf2_1) Value(50)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kfx) Value(50) Canonical
+Enum(mips_arch_opt_value) String(24kf) Value(51) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kfx) Value(50)
+Enum(mips_arch_opt_value) String(r24kf) Value(51)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kx) Value(51) Canonical
+Enum(mips_arch_opt_value) String(24kf1_1) Value(52) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kx) Value(51)
+Enum(mips_arch_opt_value) String(r24kf1_1) Value(52)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kec) Value(52) Canonical
+Enum(mips_arch_opt_value) String(24kfx) Value(53) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kec) Value(52)
+Enum(mips_arch_opt_value) String(r24kfx) Value(53)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kef2_1) Value(53) Canonical
+Enum(mips_arch_opt_value) String(24kx) Value(54) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kef2_1) Value(53)
+Enum(mips_arch_opt_value) String(r24kx) Value(54)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kef) Value(54) Canonical
+Enum(mips_arch_opt_value) String(24kec) Value(55) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kef) Value(54)
+Enum(mips_arch_opt_value) String(r24kec) Value(55)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kef1_1) Value(55) Canonical
+Enum(mips_arch_opt_value) String(24kef2_1) Value(56) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kef1_1) Value(55)
+Enum(mips_arch_opt_value) String(r24kef2_1) Value(56)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kefx) Value(56) Canonical
+Enum(mips_arch_opt_value) String(24kef) Value(57) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kefx) Value(56)
+Enum(mips_arch_opt_value) String(r24kef) Value(57)
 
 EnumValue
-Enum(mips_arch_opt_value) String(24kex) Value(57) Canonical
+Enum(mips_arch_opt_value) String(24kef1_1) Value(58) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r24kex) Value(57)
+Enum(mips_arch_opt_value) String(r24kef1_1) Value(58)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kc) Value(58) Canonical
+Enum(mips_arch_opt_value) String(24kefx) Value(59) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kc) Value(58)
+Enum(mips_arch_opt_value) String(r24kefx) Value(59)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kf2_1) Value(59) Canonical
+Enum(mips_arch_opt_value) String(24kex) Value(60) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kf2_1) Value(59)
+Enum(mips_arch_opt_value) String(r24kex) Value(60)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kf) Value(60) Canonical
+Enum(mips_arch_opt_value) String(34kc) Value(61) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kf) Value(60)
+Enum(mips_arch_opt_value) String(r34kc) Value(61)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kf1_1) Value(61) Canonical
+Enum(mips_arch_opt_value) String(34kf2_1) Value(62) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kf1_1) Value(61)
+Enum(mips_arch_opt_value) String(r34kf2_1) Value(62)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kfx) Value(62) Canonical
+Enum(mips_arch_opt_value) String(34kf) Value(63) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kfx) Value(62)
+Enum(mips_arch_opt_value) String(r34kf) Value(63)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kx) Value(63) Canonical
+Enum(mips_arch_opt_value) String(34kf1_1) Value(64) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kx) Value(63)
+Enum(mips_arch_opt_value) String(r34kf1_1) Value(64)
 
 EnumValue
-Enum(mips_arch_opt_value) String(34kn) Value(64) Canonical
+Enum(mips_arch_opt_value) String(34kfx) Value(65) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r34kn) Value(64)
+Enum(mips_arch_opt_value) String(r34kfx) Value(65)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kc) Value(65) Canonical
+Enum(mips_arch_opt_value) String(34kx) Value(66) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kc) Value(65)
+Enum(mips_arch_opt_value) String(r34kx) Value(66)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf2_1) Value(66) Canonical
+Enum(mips_arch_opt_value) String(34kn) Value(67) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf2_1) Value(66)
+Enum(mips_arch_opt_value) String(r34kn) Value(67)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf) Value(67) Canonical
+Enum(mips_arch_opt_value) String(74kc) Value(68) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf) Value(67)
+Enum(mips_arch_opt_value) String(r74kc) Value(68)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf1_1) Value(68) Canonical
+Enum(mips_arch_opt_value) String(74kf2_1) Value(69) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf1_1) Value(68)
+Enum(mips_arch_opt_value) String(r74kf2_1) Value(69)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kfx) Value(69) Canonical
+Enum(mips_arch_opt_value) String(74kf) Value(70) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kfx) Value(69)
+Enum(mips_arch_opt_value) String(r74kf) Value(70)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kx) Value(70) Canonical
+Enum(mips_arch_opt_value) String(74kf1_1) Value(71) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kx) Value(70)
+Enum(mips_arch_opt_value) String(r74kf1_1) Value(71)
 
 EnumValue
-Enum(mips_arch_opt_value) String(74kf3_2) Value(71) Canonical
+Enum(mips_arch_opt_value) String(74kfx) Value(72) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r74kf3_2) Value(71)
+Enum(mips_arch_opt_value) String(r74kfx) Value(72)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kc) Value(72) Canonical
+Enum(mips_arch_opt_value) String(74kx) Value(73) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kc) Value(72)
+Enum(mips_arch_opt_value) String(r74kx) Value(73)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kf2_1) Value(73) Canonical
+Enum(mips_arch_opt_value) String(74kf3_2) Value(74) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kf2_1) Value(73)
+Enum(mips_arch_opt_value) String(r74kf3_2) Value(74)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kf) Value(74) Canonical
+Enum(mips_arch_opt_value) String(1004kc) Value(75) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kf) Value(74)
+Enum(mips_arch_opt_value) String(r1004kc) Value(75)
 
 EnumValue
-Enum(mips_arch_opt_value) String(1004kf1_1) Value(75) Canonical
+Enum(mips_arch_opt_value) String(1004kf2_1) Value(76) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r1004kf1_1) Value(75)
+Enum(mips_arch_opt_value) String(r1004kf2_1) Value(76)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5kc) Value(76) Canonical
+Enum(mips_arch_opt_value) String(1004kf) Value(77) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5kc) Value(76)
+Enum(mips_arch_opt_value) String(r1004kf) Value(77)
 
 EnumValue
-Enum(mips_arch_opt_value) String(5kf) Value(77) Canonical
+Enum(mips_arch_opt_value) String(1004kf1_1) Value(78) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r5kf) Value(77)
+Enum(mips_arch_opt_value) String(r1004kf1_1) Value(78)
 
 EnumValue
-Enum(mips_arch_opt_value) String(20kc) Value(78) Canonical
+Enum(mips_arch_opt_value) String(5kc) Value(79) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(r20kc) Value(78)
+Enum(mips_arch_opt_value) String(r5kc) Value(79)
 
 EnumValue
-Enum(mips_arch_opt_value) String(sb1) Value(79) Canonical
+Enum(mips_arch_opt_value) String(5kf) Value(80) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(sb1a) Value(80) Canonical
+Enum(mips_arch_opt_value) String(r5kf) Value(80)
 
 EnumValue
-Enum(mips_arch_opt_value) String(sr71000) Value(81) Canonical
+Enum(mips_arch_opt_value) String(20kc) Value(81) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(sr71k) Value(81)
+Enum(mips_arch_opt_value) String(r20kc) Value(81)
 
 EnumValue
-Enum(mips_arch_opt_value) String(xlr) Value(82) Canonical
+Enum(mips_arch_opt_value) String(sb1) Value(82) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(loongson3a) Value(83) Canonical
+Enum(mips_arch_opt_value) String(sb1a) Value(83) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon) Value(84) Canonical
+Enum(mips_arch_opt_value) String(sr71000) Value(84) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon+) Value(85) Canonical
+Enum(mips_arch_opt_value) String(sr71k) Value(84)
 
 EnumValue
-Enum(mips_arch_opt_value) String(octeon2) Value(86) Canonical
+Enum(mips_arch_opt_value) String(xlr) Value(85) Canonical
 
 EnumValue
-Enum(mips_arch_opt_value) String(xlp) Value(87) Canonical
+Enum(mips_arch_opt_value) String(loongson3a) Value(86) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon) Value(87) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon+) Value(88) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon2) Value(89) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(xlp) Value(90) Canonical
 
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 1f2774638fc..bd1d10b0e4e 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -43,7 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"
 #include "ggc.h"
 #include "gstab.h"
-#include "hashtab.h"
+#include "hash-table.h"
 #include "debug.h"
 #include "target.h"
 #include "target-def.h"
@@ -1029,6 +1029,19 @@ static const struct mips_rtx_cost_data
 		     1,           /* branch_cost */
 		     4            /* memory_latency */
   },
+  { /* R5900 */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (256),          /* fp_mult_df */
+    COSTS_N_INSNS (8),            /* fp_div_sf */
+    COSTS_N_INSNS (256),          /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (256),          /* int_mult_di */
+    COSTS_N_INSNS (37),           /* int_div_si */
+    COSTS_N_INSNS (256),          /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
   { /* R7000 */
     /* The only costs that are changed here are
        integer multiplication.  */
@@ -1426,6 +1439,16 @@ mips_merge_decl_attributes (tree olddecl, tree newdecl)
   return merge_attributes (DECL_ATTRIBUTES (olddecl),
 			   DECL_ATTRIBUTES (newdecl));
 }
+
+/* Implement TARGET_CAN_INLINE_P.  */
+
+static bool
+mips_can_inline_p (tree caller, tree callee)
+{
+  if (mips_get_compress_mode (callee) != mips_get_compress_mode (caller))
+    return false;
+  return default_target_can_inline_p (caller, callee);
+}
 
 /* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
    and *OFFSET_PTR.  Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise.  */
@@ -12440,7 +12463,10 @@ mips_start_ll_sc_sync_block (void)
   if (!ISA_HAS_LL_SC)
     {
       output_asm_insn (".set\tpush", 0);
-      output_asm_insn (".set\tmips2", 0);
+      if (TARGET_64BIT)
+	output_asm_insn (".set\tmips3", 0);
+      else
+	output_asm_insn (".set\tmips2", 0);
     }
 }
 
@@ -12995,6 +13021,7 @@ mips_issue_rate (void)
     case PROCESSOR_R4130:
     case PROCESSOR_R5400:
     case PROCESSOR_R5500:
+    case PROCESSOR_R5900:
     case PROCESSOR_R7000:
     case PROCESSOR_R9000:
     case PROCESSOR_OCTEON:
@@ -15796,30 +15823,43 @@ mips_hash_base (rtx base)
   return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false);
 }
 
+/* Hashtable helpers.  */
+
+struct mips_lo_sum_offset_hasher : typed_free_remove <mips_lo_sum_offset>
+{
+  typedef mips_lo_sum_offset value_type;
+  typedef rtx_def compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
+
 /* Hash-table callbacks for mips_lo_sum_offsets.  */
 
-static hashval_t
-mips_lo_sum_offset_hash (const void *entry)
+inline hashval_t
+mips_lo_sum_offset_hasher::hash (const value_type *entry)
 {
-  return mips_hash_base (((const struct mips_lo_sum_offset *) entry)->base);
+  return mips_hash_base (entry->base);
 }
 
-static int
-mips_lo_sum_offset_eq (const void *entry, const void *value)
+inline bool
+mips_lo_sum_offset_hasher::equal (const value_type *entry,
+				  const compare_type *value)
 {
-  return rtx_equal_p (((const struct mips_lo_sum_offset *) entry)->base,
-		      (const_rtx) value);
+  return rtx_equal_p (entry->base, value);
 }
 
+typedef hash_table <mips_lo_sum_offset_hasher> mips_offset_table;
+
 /* Look up symbolic constant X in HTAB, which is a hash table of
    mips_lo_sum_offsets.  If OPTION is NO_INSERT, return true if X can be
    paired with a recorded LO_SUM, otherwise record X in the table.  */
 
 static bool
-mips_lo_sum_offset_lookup (htab_t htab, rtx x, enum insert_option option)
+mips_lo_sum_offset_lookup (mips_offset_table htab, rtx x,
+			   enum insert_option option)
 {
   rtx base, offset;
-  void **slot;
+  mips_lo_sum_offset **slot;
   struct mips_lo_sum_offset *entry;
 
   /* Split X into a base and offset.  */
@@ -15828,7 +15868,7 @@ mips_lo_sum_offset_lookup (htab_t htab, rtx x, enum insert_option option)
     base = UNSPEC_ADDRESS (base);
 
   /* Look up the base in the hash table.  */
-  slot = htab_find_slot_with_hash (htab, base, mips_hash_base (base), option);
+  slot = htab.find_slot_with_hash (base, mips_hash_base (base), option);
   if (slot == NULL)
     return false;
 
@@ -15858,7 +15898,8 @@ static int
 mips_record_lo_sum (rtx *loc, void *data)
 {
   if (GET_CODE (*loc) == LO_SUM)
-    mips_lo_sum_offset_lookup ((htab_t) data, XEXP (*loc, 1), INSERT);
+    mips_lo_sum_offset_lookup (*(mips_offset_table*) data,
+			       XEXP (*loc, 1), INSERT);
   return 0;
 }
 
@@ -15867,7 +15908,7 @@ mips_record_lo_sum (rtx *loc, void *data)
    LO_SUMs in the current function.  */
 
 static bool
-mips_orphaned_high_part_p (htab_t htab, rtx insn)
+mips_orphaned_high_part_p (mips_offset_table htab, rtx insn)
 {
   enum mips_symbol_type type;
   rtx x, set;
@@ -15975,7 +16016,7 @@ mips_reorg_process_insns (void)
 {
   rtx insn, last_insn, subinsn, next_insn, lo_reg, delayed_reg;
   int hilo_delay;
-  htab_t htab;
+  mips_offset_table htab;
 
   /* Force all instructions to be split into their final form.  */
   split_all_insns_noflow ();
@@ -16001,8 +16042,9 @@ mips_reorg_process_insns (void)
     cfun->machine->all_noreorder_p = false;
 
   /* Code compiled with -mfix-vr4120 or -mfix-24k can't be all noreorder
-     because we rely on the assembler to work around some errata.  */
-  if (TARGET_FIX_VR4120 || TARGET_FIX_24K)
+     because we rely on the assembler to work around some errata.
+     The r5900 too has several bugs.  */
+  if (TARGET_FIX_VR4120 || TARGET_FIX_24K || TARGET_MIPS5900)
     cfun->machine->all_noreorder_p = false;
 
   /* The same is true for -mfix-vr4130 if we might generate MFLO or
@@ -16012,14 +16054,13 @@ mips_reorg_process_insns (void)
   if (TARGET_FIX_VR4130 && !ISA_HAS_MACCHI)
     cfun->machine->all_noreorder_p = false;
 
-  htab = htab_create (37, mips_lo_sum_offset_hash,
-		      mips_lo_sum_offset_eq, free);
+  htab.create (37);
 
   /* Make a first pass over the instructions, recording all the LO_SUMs.  */
   for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
     FOR_EACH_SUBINSN (subinsn, insn)
       if (USEFUL_INSN_P (subinsn))
-	for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, htab);
+	for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, &htab);
 
   last_insn = 0;
   hilo_delay = 2;
@@ -16076,7 +16117,7 @@ mips_reorg_process_insns (void)
 	}
     }
 
-  htab_delete (htab);
+  htab.dispose ();
 }
 
 /* Return true if the function has a long branch instruction.  */
@@ -18600,6 +18641,8 @@ mips_expand_vec_minmax (rtx target, rtx op0, rtx op1,
 #define TARGET_INSERT_ATTRIBUTES mips_insert_attributes
 #undef TARGET_MERGE_DECL_ATTRIBUTES
 #define TARGET_MERGE_DECL_ATTRIBUTES mips_merge_decl_attributes
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P mips_can_inline_p
 #undef TARGET_SET_CURRENT_FUNCTION
 #define TARGET_SET_CURRENT_FUNCTION mips_set_current_function
 
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 50a030f7f2c..d775a8c940b 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -222,6 +222,7 @@ struct mips_cpu_info {
 #define TARGET_MIPS4130             (mips_arch == PROCESSOR_R4130)
 #define TARGET_MIPS5400             (mips_arch == PROCESSOR_R5400)
 #define TARGET_MIPS5500             (mips_arch == PROCESSOR_R5500)
+#define TARGET_MIPS5900             (mips_arch == PROCESSOR_R5900)
 #define TARGET_MIPS7000             (mips_arch == PROCESSOR_R7000)
 #define TARGET_MIPS9000             (mips_arch == PROCESSOR_R9000)
 #define TARGET_OCTEON		    (mips_arch == PROCESSOR_OCTEON	\
@@ -399,6 +400,9 @@ struct mips_cpu_info {
       if (TARGET_MCU)							\
 	builtin_define ("__mips_mcu");					\
 									\
+      if (TARGET_EVA)							\
+	builtin_define ("__mips_eva");					\
+									\
       if (TARGET_DSP)							\
 	{								\
 	  builtin_define ("__mips_dsp");				\
@@ -614,39 +618,25 @@ struct mips_cpu_info {
 #endif
 
 #ifndef MULTILIB_ISA_DEFAULT
-#  if MIPS_ISA_DEFAULT == 1
-#    define MULTILIB_ISA_DEFAULT "mips1"
-#  else
-#    if MIPS_ISA_DEFAULT == 2
-#      define MULTILIB_ISA_DEFAULT "mips2"
-#    else
-#      if MIPS_ISA_DEFAULT == 3
-#        define MULTILIB_ISA_DEFAULT "mips3"
-#      else
-#        if MIPS_ISA_DEFAULT == 4
-#          define MULTILIB_ISA_DEFAULT "mips4"
-#        else
-#          if MIPS_ISA_DEFAULT == 32
-#            define MULTILIB_ISA_DEFAULT "mips32"
-#          else
-#            if MIPS_ISA_DEFAULT == 33
-#              define MULTILIB_ISA_DEFAULT "mips32r2"
-#            else
-#              if MIPS_ISA_DEFAULT == 64
-#                define MULTILIB_ISA_DEFAULT "mips64"
-#              else
-#		 if MIPS_ISA_DEFAULT == 65
-#		   define MULTILIB_ISA_DEFAULT "mips64r2"
-#	         else
-#                  define MULTILIB_ISA_DEFAULT "mips1"
-#		 endif
-#              endif
-#            endif
-#          endif
-#        endif
-#      endif
-#    endif
-#  endif
+#if MIPS_ISA_DEFAULT == 1
+#define MULTILIB_ISA_DEFAULT "mips1"
+#elif MIPS_ISA_DEFAULT == 2
+#define MULTILIB_ISA_DEFAULT "mips2"
+#elif MIPS_ISA_DEFAULT == 3
+#define MULTILIB_ISA_DEFAULT "mips3"
+#elif MIPS_ISA_DEFAULT == 4
+#define MULTILIB_ISA_DEFAULT "mips4"
+#elif MIPS_ISA_DEFAULT == 32
+#define MULTILIB_ISA_DEFAULT "mips32"
+#elif MIPS_ISA_DEFAULT == 33
+#define MULTILIB_ISA_DEFAULT "mips32r2"
+#elif MIPS_ISA_DEFAULT == 64
+#define MULTILIB_ISA_DEFAULT "mips64"
+#elif MIPS_ISA_DEFAULT == 65
+#define MULTILIB_ISA_DEFAULT "mips64r2"
+#else
+#define MULTILIB_ISA_DEFAULT "mips1"
+#endif
 #endif
 
 #ifndef MIPS_ABI_DEFAULT
@@ -657,21 +647,13 @@ struct mips_cpu_info {
 
 #if MIPS_ABI_DEFAULT == ABI_32
 #define MULTILIB_ABI_DEFAULT "mabi=32"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_O64
+#elif MIPS_ABI_DEFAULT == ABI_O64
 #define MULTILIB_ABI_DEFAULT "mabi=o64"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_N32
+#elif MIPS_ABI_DEFAULT == ABI_N32
 #define MULTILIB_ABI_DEFAULT "mabi=n32"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_64
+#elif MIPS_ABI_DEFAULT == ABI_64
 #define MULTILIB_ABI_DEFAULT "mabi=64"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_EABI
+#elif MIPS_ABI_DEFAULT == ABI_EABI
 #define MULTILIB_ABI_DEFAULT "mabi=eabi"
 #endif
 
@@ -743,9 +725,9 @@ struct mips_cpu_info {
 #define MIPS_ISA_SYNCI_SPEC \
   "%{msynci|mno-synci:;:%{mips32r2|mips64r2:-msynci;:-mno-synci}}"
 
-#if MIPS_ABI_DEFAULT == ABI_O64 \
-  || MIPS_ABI_DEFAULT == ABI_N32 \
-  || MIPS_ABI_DEFAULT == ABI_64
+#if (MIPS_ABI_DEFAULT == ABI_O64 \
+     || MIPS_ABI_DEFAULT == ABI_N32 \
+     || MIPS_ABI_DEFAULT == ABI_64)
 #define OPT_ARCH64 "mabi=32|mgp32:;"
 #define OPT_ARCH32 "mabi=32|mgp32"
 #else
@@ -781,7 +763,7 @@ struct mips_cpu_info {
 #define BASE_DRIVER_SELF_SPECS \
   "%{!mno-dsp: \
      %{march=24ke*|march=34kc*|march=34kf*|march=34kx*|march=1004k*: -mdsp} \
-     %{march=74k*:%{!mno-dspr2: -mdspr2 -mdsp}}}"
+     %{march=74k*|march=m14ke*: %{!mno-dspr2: -mdspr2 -mdsp}}}"
 
 #define DRIVER_SELF_SPECS BASE_DRIVER_SELF_SPECS
 
@@ -825,6 +807,7 @@ struct mips_cpu_info {
 #define ISA_HAS_MUL3		((TARGET_MIPS3900                       \
 				  || TARGET_MIPS5400			\
 				  || TARGET_MIPS5500			\
+				  || TARGET_MIPS5900			\
 				  || TARGET_MIPS7000			\
 				  || TARGET_MIPS9000			\
 				  || TARGET_MAD				\
@@ -839,6 +822,26 @@ struct mips_cpu_info {
 				 && TARGET_OCTEON			\
 				 && !TARGET_MIPS16)
 
+/* ISA supports instructions DMULT and DMULTU. */
+#define ISA_HAS_DMULT		(TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions MULT and MULTU.
+   This is always true, but the macro is needed for ISA_HAS_<D>MULT
+   in mips.md.  */
+#define ISA_HAS_MULT		(1)
+
+/* ISA supports instructions DDIV and DDIVU. */
+#define ISA_HAS_DDIV		(TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions DIV and DIVU.
+   This is always true, but the macro is needed for ISA_HAS_<D>DIV
+   in mips.md.  */
+#define ISA_HAS_DIV		(1)
+
+#define ISA_HAS_DIV3		((TARGET_LOONGSON_2EF			\
+				  || TARGET_LOONGSON_3A)		\
+				 && !TARGET_MIPS16)
+
 /* ISA has the floating-point conditional move instructions introduced
    in mips4.  */
 #define ISA_HAS_FP_CONDMOVE	((ISA_MIPS4				\
@@ -851,7 +854,9 @@ struct mips_cpu_info {
 
 /* ISA has the integer conditional move instructions introduced in mips4 and
    ST Loongson 2E/2F.  */
-#define ISA_HAS_CONDMOVE        (ISA_HAS_FP_CONDMOVE || TARGET_LOONGSON_2EF)
+#define ISA_HAS_CONDMOVE        (ISA_HAS_FP_CONDMOVE			\
+				 || TARGET_MIPS5900			\
+				 || TARGET_LOONGSON_2EF)
 
 /* ISA has LDC1 and SDC1.  */
 #define ISA_HAS_LDC1_SDC1	(!ISA_MIPS1 && !TARGET_MIPS16)
@@ -964,6 +969,7 @@ struct mips_cpu_info {
 /* ISA has data prefetch instructions.  This controls use of 'pref'.  */
 #define ISA_HAS_PREFETCH	((ISA_MIPS4				\
 				  || TARGET_LOONGSON_2EF		\
+				  || TARGET_MIPS5900			\
 				  || ISA_MIPS32				\
 				  || ISA_MIPS32R2			\
 				  || ISA_MIPS64				\
@@ -1025,15 +1031,18 @@ struct mips_cpu_info {
    and "addiu $4,$4,1".  */
 #define ISA_HAS_LOAD_DELAY	(ISA_MIPS1				\
 				 && !TARGET_MIPS3900			\
+				 && !TARGET_MIPS5900			\
 				 && !TARGET_MIPS16			\
 				 && !TARGET_MICROMIPS)
 
 /* Likewise mtc1 and mfc1.  */
 #define ISA_HAS_XFER_DELAY	(mips_isa <= 3			\
+				 && !TARGET_MIPS5900		\
 				 && !TARGET_LOONGSON_2EF)
 
 /* Likewise floating-point comparisons.  */
 #define ISA_HAS_FCMP_DELAY	(mips_isa <= 3			\
+				 && !TARGET_MIPS5900		\
 				 && !TARGET_LOONGSON_2EF)
 
 /* True if mflo and mfhi can be immediately followed by instructions
@@ -1053,6 +1062,7 @@ struct mips_cpu_info {
 				 || ISA_MIPS64				\
 				 || ISA_MIPS64R2			\
 				 || TARGET_MIPS5500			\
+				 || TARGET_MIPS5900			\
 				 || TARGET_LOONGSON_2EF)
 
 /* ISA includes synci, jr.hb and jalr.hb.  */
@@ -1070,7 +1080,7 @@ struct mips_cpu_info {
 /* ISA includes ll and sc.  Note that this implies ISA_HAS_SYNC
    because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC
    instructions.  */
-#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS16)
+#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS5900 && !TARGET_MIPS16)
 #define GENERATE_LL_SC			\
   (target_flags_explicit & MASK_LLSC	\
    ? TARGET_LLSC && !TARGET_MIPS16	\
@@ -1143,6 +1153,7 @@ struct mips_cpu_info {
 %{mdsp} %{mno-dsp} \
 %{mdspr2} %{mno-dspr2} \
 %{mmcu} %{mno-mcu} \
+%{meva} %{mno-eva} \
 %{msmartmips} %{mno-smartmips} \
 %{mmt} %{mno-mt} \
 %{mfix-vr4120} %{mfix-vr4130} \
@@ -1361,8 +1372,8 @@ struct mips_cpu_info {
 #define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE
 
 #ifdef IN_LIBGCC2
-#if  (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
-  || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#if ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+     || (defined _ABI64 && _MIPS_SIM == _ABI64))
 #  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
 # else
 #  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
@@ -2868,9 +2879,8 @@ while (0)
 	jal " USER_LABEL_PREFIX #FUNC "\n\
 	.set pop\n\
 	" TEXT_SECTION_ASM_OP);
-#endif /* Switch to #elif when we're no longer limited by K&R C.  */
-#if (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
-   || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#elif ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+       || (defined _ABI64 && _MIPS_SIM == _ABI64))
 #define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
    asm (SECTION_OP "\n\
 	.set push\n\
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 7284e5f3384..b832dda27f0 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -55,6 +55,7 @@
   r5000
   r5400
   r5500
+  r5900
   r7000
   r8000
   r9000
@@ -406,8 +407,12 @@
 
 ;; Is this an extended instruction in mips16 mode?
 (define_attr "extended_mips16" "no,yes"
-  (if_then_else (ior (eq_attr "move_type" "sll0")
-		     (eq_attr "jal" "direct"))
+  (if_then_else (ior ;; In general, constant-pool loads are extended
+  		     ;; instructions.  We don't yet optimize for 16-bit
+		     ;; PC-relative references.
+  		     (eq_attr "move_type" "sll0,loadpool")
+		     (eq_attr "jal" "direct")
+		     (eq_attr "got" "load"))
 		(const_string "yes")
 		(const_string "no")))
 
@@ -420,14 +425,89 @@
 	                  (match_test "TARGET_MICROMIPS")))
 	        (const_string "yes")
 	        (const_string "no")))
-  
-;; Length of instruction in bytes.
-(define_attr "length" ""
-   (cond [(and (eq_attr "extended_mips16" "yes")
-	       (match_test "TARGET_MIPS16"))
-	  (const_int 4)
 
-	  (and (eq_attr "compression" "micromips,all")
+;; The number of individual instructions that a non-branch pattern generates,
+;; using units of BASE_INSN_LENGTH.
+(define_attr "insn_count" ""
+  (cond [;; "Ghost" instructions occupy no space.
+	 (eq_attr "type" "ghost")
+	 (const_int 0)
+
+	 ;; Extended instructions count as 2.
+   	 (and (eq_attr "extended_mips16" "yes")
+	      (match_test "TARGET_MIPS16"))
+	 (const_int 2)
+
+	 ;; A GOT load followed by an add of $gp.  This is not used for MIPS16.
+	 (eq_attr "got" "xgot_high")
+	 (const_int 2)
+
+	 ;; SHIFT_SHIFTs are decomposed into two separate instructions.
+	 ;; They are extended instructions on MIPS16 targets.
+	 (eq_attr "move_type" "shift_shift")
+	 (if_then_else (match_test "TARGET_MIPS16")
+	 	       (const_int 4)
+	 	       (const_int 2))
+
+	 ;; Check for doubleword moves that are decomposed into two
+	 ;; instructions.  The individual instructions are unextended
+	 ;; MIPS16 ones.
+	 (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
+	      (eq_attr "dword_mode" "yes"))
+	 (const_int 2)
+
+	 ;; Constants, loads and stores are handled by external routines.
+	 (and (eq_attr "move_type" "const,constN")
+	      (eq_attr "dword_mode" "yes"))
+	 (symbol_ref "mips_split_const_insns (operands[1])")
+	 (eq_attr "move_type" "const,constN")
+	 (symbol_ref "mips_const_insns (operands[1])")
+	 (eq_attr "move_type" "load,fpload")
+	 (symbol_ref "mips_load_store_insns (operands[1], insn)")
+	 (eq_attr "move_type" "store,fpstore")
+	 (symbol_ref "mips_load_store_insns (operands[0], insn)
+		      + (TARGET_FIX_24K ? 1 : 0)")
+
+	 ;; In the worst case, a call macro will take 8 instructions:
+	 ;;
+	 ;;	lui $25,%call_hi(FOO)
+	 ;;	addu $25,$25,$28
+	 ;;	lw $25,%call_lo(FOO)($25)
+	 ;;	nop
+	 ;;	jalr $25
+	 ;;	nop
+	 ;;	lw $gp,X($sp)
+	 ;;	nop
+	 (eq_attr "jal_macro" "yes")
+	 (const_int 8)
+
+	 ;; Various VR4120 errata require a nop to be inserted after a macc
+	 ;; instruction.  The assembler does this for us, so account for
+	 ;; the worst-case length here.
+	 (and (eq_attr "type" "imadd")
+	      (match_test "TARGET_FIX_VR4120"))
+	 (const_int 2)
+
+	 ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
+	 ;; the result of the second one is missed.  The assembler should work
+	 ;; around this by inserting a nop after the first dmult.
+	 (and (eq_attr "type" "imul,imul3")
+	      (eq_attr "mode" "DI")
+	      (match_test "TARGET_FIX_VR4120"))
+	 (const_int 2)
+
+	 (eq_attr "type" "idiv,idiv3")
+	 (symbol_ref "mips_idiv_insns ()")
+
+	 (not (eq_attr "sync_mem" "none"))
+	 (symbol_ref "mips_sync_loop_insns (insn, operands)")]
+	(const_int 1)))
+
+;; Length of instruction in bytes.  The default is derived from "insn_count",
+;; but there are special cases for branches (which must be handled here)
+;; and for compressed single instructions.
+(define_attr "length" ""
+   (cond [(and (eq_attr "compression" "micromips,all")
 	       (eq_attr "dword_mode" "no")
 	       (match_test "TARGET_MICROMIPS"))
 	  (const_int 2)
@@ -580,95 +660,8 @@
 		 (const_int 20)
 		 (match_test "Pmode == SImode")
 		 (const_int 16)
-		 ] (const_int 24))
-
-	  ;; "Ghost" instructions occupy no space.
-	  (eq_attr "type" "ghost")
-	  (const_int 0)
-
-	  ;; GOT loads are extended MIPS16 instructions and 4-byte
-	  ;; microMIPS instructions.
-	  (eq_attr "got" "load")
-	  (const_int 4)
-
-	  ;; A GOT load followed by an add of $gp.
-	  (eq_attr "got" "xgot_high")
-	  (const_int 8)
-
-	  ;; In general, constant-pool loads are extended instructions.
-	  (eq_attr "move_type" "loadpool")
-	  (const_int 4)
-
-	  ;; SHIFT_SHIFTs are decomposed into two separate instructions.
-	  ;; They are extended instructions on MIPS16 targets.
-	  (eq_attr "move_type" "shift_shift")
-	  (const_int 8)
-
-	  ;; Check for doubleword moves that are decomposed into two
-	  ;; instructions.  The individual instructions are unextended
-	  ;; MIPS16 ones or 2-byte microMIPS ones.
-	  (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
-	       (eq_attr "dword_mode" "yes"))
-	  (if_then_else (match_test "TARGET_COMPRESSION")
-	  		(const_int 4)
-	  		(const_int 8))
-
-	  ;; Doubleword CONST{,N} moves are split into two word
-	  ;; CONST{,N} moves.
-	  (and (eq_attr "move_type" "const,constN")
-	       (eq_attr "dword_mode" "yes"))
-	  (symbol_ref "mips_split_const_insns (operands[1]) * BASE_INSN_LENGTH")
-
-	  ;; Otherwise, constants, loads and stores are handled by external
-	  ;; routines.
-	  (eq_attr "move_type" "const,constN")
-	  (symbol_ref "mips_const_insns (operands[1]) * BASE_INSN_LENGTH")
-	  (eq_attr "move_type" "load,fpload")
-	  (symbol_ref "mips_load_store_insns (operands[1], insn)
-	  	       * BASE_INSN_LENGTH")
-	  (eq_attr "move_type" "store,fpstore")
-	  (symbol_ref "mips_load_store_insns (operands[0], insn)
-		       * BASE_INSN_LENGTH
-		       + (TARGET_FIX_24K ? NOP_INSN_LENGTH : 0)")
-
-	  ;; In the worst case, a call macro will take 8 instructions:
-	  ;;
-	  ;;	 lui $25,%call_hi(FOO)
-	  ;;	 addu $25,$25,$28
-	  ;;     lw $25,%call_lo(FOO)($25)
-	  ;;	 nop
-	  ;;	 jalr $25
-	  ;;	 nop
-	  ;;	 lw $gp,X($sp)
-	  ;;	 nop
-	  (eq_attr "jal_macro" "yes")
-	  (const_int 32)
-
-	  ;; Various VR4120 errata require a nop to be inserted after a macc
-	  ;; instruction.  The assembler does this for us, so account for
-	  ;; the worst-case length here.
-	  (and (eq_attr "type" "imadd")
-	       (match_test "TARGET_FIX_VR4120"))
-	  (const_int 8)
-
-	  ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
-	  ;; the result of the second one is missed.  The assembler should work
-	  ;; around this by inserting a nop after the first dmult.
-	  (and (eq_attr "type" "imul,imul3")
-	       (and (eq_attr "mode" "DI")
-		    (match_test "TARGET_FIX_VR4120")))
-	  (const_int 8)
-
-	  (eq_attr "type" "idiv,idiv3")
-	  (symbol_ref "mips_idiv_insns () * BASE_INSN_LENGTH")
-
-	  (not (eq_attr "sync_mem" "none"))
-	  (symbol_ref "mips_sync_loop_insns (insn, operands)
-	  	       * BASE_INSN_LENGTH")
-
-	  (match_test "TARGET_MIPS16")
-	  (const_int 2)
-	  ] (const_int 4)))
+		 ] (const_int 24))]
+	 (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH")))
 
 ;; Attribute describing the processor.
 (define_enum_attr "cpu" "processor"
@@ -701,16 +694,11 @@
 	 (const_string "hilo")]
 	(const_string "none")))
 
-;; Is it a single instruction?
-(define_attr "single_insn" "no,yes"
-  (symbol_ref "(get_attr_length (insn) == (TARGET_MIPS16 ? 2 : 4)
-		? SINGLE_INSN_YES : SINGLE_INSN_NO)"))
-
 ;; Can the instruction be put into a delay slot?
 (define_attr "can_delay" "no,yes"
   (if_then_else (and (eq_attr "type" "!branch,call,jump")
-		     (and (eq_attr "hazard" "none")
-			  (eq_attr "single_insn" "yes")))
+		     (eq_attr "hazard" "none")
+		     (match_test "get_attr_insn_count (insn) == 1"))
 		(const_string "yes")
 		(const_string "no")))
 
@@ -755,7 +743,9 @@
 ;; This mode iterator allows :MOVECC to be used anywhere that a
 ;; conditional-move-type condition is needed.
 (define_mode_iterator MOVECC [SI (DI "TARGET_64BIT")
-                              (CC "TARGET_HARD_FLOAT && !TARGET_LOONGSON_2EF")])
+                              (CC "TARGET_HARD_FLOAT
+				   && !TARGET_LOONGSON_2EF
+				   && !TARGET_MIPS5900")])
 
 ;; 32-bit integer moves for which we provide move patterns.
 (define_mode_iterator IMOVE32
@@ -1417,7 +1407,7 @@
   "mul.<fmt>\t%0,%1,%2\;nop"
   [(set_attr "type" "fmul")
    (set_attr "mode" "<MODE>")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_insn "mulv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=f")
@@ -1478,7 +1468,7 @@
   [(set (match_operand:GPR 0 "register_operand")
 	(mult:GPR (match_operand:GPR 1 "register_operand")
 		  (match_operand:GPR 2 "register_operand")))]
-  ""
+  "ISA_HAS_<D>MULT"
 {
   rtx lo;
 
@@ -1524,7 +1514,7 @@
 {
   if (which_alternative == 1)
     return "<d>mult\t%1,%2";
-  if (<MODE>mode == SImode && TARGET_MIPS3900)
+  if (<MODE>mode == SImode && (TARGET_MIPS3900 || TARGET_MIPS5900))
     return "mult\t%0,%1,%2";
   return "<d>mul\t%0,%1,%2";
 }
@@ -1558,7 +1548,7 @@
   [(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
 	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
 		  (match_operand:GPR 2 "register_operand" "d")))]
-  "!TARGET_FIX_R4000"
+  "ISA_HAS_<D>MULT && !TARGET_FIX_R4000"
   "<d>mult\t%1,%2"
   [(set_attr "type" "imul")
    (set_attr "mode" "<MODE>")])
@@ -1568,11 +1558,11 @@
 	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
 		  (match_operand:GPR 2 "register_operand" "d")))
    (clobber (match_scratch:GPR 3 "=l"))]
-  "TARGET_FIX_R4000"
+  "ISA_HAS_<D>MULT && TARGET_FIX_R4000"
   "<d>mult\t%1,%2\;mflo\t%0"
   [(set_attr "type" "imul")
    (set_attr "mode" "<MODE>")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 ;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead
 ;; of "mult; mflo".  They have the same latency, but the first form gives
@@ -1632,7 +1622,7 @@
   [(set_attr "type"	"imadd")
    (set_attr "accum_in"	"3")
    (set_attr "mode"	"SI")
-   (set_attr "length"	"4,8")])
+   (set_attr "insn_count" "1,2")])
 
 ;; The same idea applies here.  The middle alternative needs one less
 ;; clobber than the final alternative, so we add "*?" as a counterweight.
@@ -1651,7 +1641,7 @@
   [(set_attr "type"	"imadd")
    (set_attr "accum_in"	"3")
    (set_attr "mode"	"SI")
-   (set_attr "length"	"4,4,8")])
+   (set_attr "insn_count" "1,1,2")])
 
 ;; Split *mul_acc_si if both the source and destination accumulator
 ;; values are GPRs.
@@ -1732,7 +1722,7 @@
   ""
   [(set_attr "type"     "imadd")
    (set_attr "accum_in"	"1")
-   (set_attr "length"	"8")])
+   (set_attr "insn_count" "2")])
 
 ;; Patterns generated by the define_peephole2 below.
 
@@ -1868,7 +1858,7 @@
   [(set_attr "type"     "imadd")
    (set_attr "accum_in"	"1")
    (set_attr "mode"     "SI")
-   (set_attr "length"   "4,8")])
+   (set_attr "insn_count" "1,2")])
 
 ;; Split *mul_sub_si if both the source and destination accumulator
 ;; values are GPRs.
@@ -1949,7 +1939,7 @@
   "mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
   [(set_attr "type" "imul")
    (set_attr "mode" "SI")
-   (set_attr "length" "12")])
+   (set_attr "insn_count" "3")])
 
 (define_insn_and_split "<u>mulsidi3_64bit"
   [(set (match_operand:DI 0 "register_operand" "=d")
@@ -1968,10 +1958,10 @@
 }
   [(set_attr "type" "imul")
    (set_attr "mode" "SI")
-   (set (attr "length")
+   (set (attr "insn_count")
 	(if_then_else (match_test "ISA_HAS_EXT_INS")
-		      (const_int 16)
-		      (const_int 28)))])
+		      (const_int 4)
+		      (const_int 7)))])
 
 (define_expand "<u>mulsidi3_64bit_mips16"
   [(set (match_operand:DI 0 "register_operand")
@@ -2035,7 +2025,7 @@
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
 		 (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))
    (clobber (match_scratch:DI 3 "=l"))]
-  "TARGET_64BIT && ISA_HAS_DMUL3"
+  "ISA_HAS_DMUL3"
   "dmul\t%0,%1,%2"
   [(set_attr "type" "imul3")
    (set_attr "mode" "DI")])
@@ -2122,7 +2112,7 @@
 }
   [(set_attr "type" "imul")
    (set_attr "mode" "SI")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_expand "<su>mulsi3_highpart_split"
   [(set (match_operand:SI 0 "register_operand")
@@ -2189,7 +2179,7 @@
 	  (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
 		   (any_extend:TI (match_operand:DI 2 "register_operand")))
 	  (const_int 64))))]
-  "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
 {
   if (TARGET_MIPS16)
     emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1],
@@ -2208,7 +2198,7 @@
 		   (any_extend:TI (match_operand:DI 2 "register_operand" "d")))
 	  (const_int 64))))
    (clobber (match_scratch:DI 3 "=l"))]
-  "TARGET_64BIT
+  "ISA_HAS_DMULT
    && !TARGET_MIPS16
    && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
   { return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
@@ -2221,7 +2211,7 @@
 }
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_expand "<su>muldi3_highpart_split"
   [(set (match_operand:DI 0 "register_operand")
@@ -2244,7 +2234,7 @@
   [(set (match_operand:TI 0 "register_operand")
 	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
 		 (any_extend:TI (match_operand:DI 2 "register_operand"))))]
-  "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
 {
   rtx hilo;
 
@@ -2266,7 +2256,7 @@
   [(set (match_operand:TI 0 "muldiv_target_operand" "=x")
 	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
 		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))]
-  "TARGET_64BIT
+  "ISA_HAS_DMULT
    && !TARGET_FIX_R4000
    && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
   "dmult<u>\t%1,%2"
@@ -2278,13 +2268,13 @@
 	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
 		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))
    (clobber (match_scratch:TI 3 "=x"))]
-  "TARGET_64BIT
+  "ISA_HAS_DMULT
    && TARGET_FIX_R4000
    && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
   "dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")
-   (set_attr "length" "12")])
+   (set_attr "insn_count" "3")])
 
 ;; The R4650 supports a 32-bit multiply/ 64-bit accumulate
 ;; instruction.  The HI/LO registers are used as a 64-bit accumulator.
@@ -2535,10 +2525,10 @@
 }
   [(set_attr "type" "fdiv")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 (define_insn "*recip<mode>3"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2553,92 +2543,64 @@
 }
   [(set_attr "type" "frdiv")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 ;; VR4120 errata MD(A1): signed division instructions do not work correctly
 ;; with negative operands.  We use special libgcc functions instead.
-(define_expand "divmod<mode>4"
-  [(set (match_operand:GPR 0 "register_operand")
-	(div:GPR (match_operand:GPR 1 "register_operand")
-		 (match_operand:GPR 2 "register_operand")))
-   (set (match_operand:GPR 3 "register_operand")
-	(mod:GPR (match_dup 1)
-		 (match_dup 2)))]
-  "!TARGET_FIX_VR4120"
-{
-  if (TARGET_MIPS16)
-    {
-      emit_insn (gen_divmod<mode>4_split (operands[3], operands[1],
-					  operands[2]));
-      emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
-    }
-  else
-    emit_insn (gen_divmod<mode>4_internal (operands[0], operands[1],
-					   operands[2], operands[3]));
-  DONE;
-})
-
-(define_insn_and_split "divmod<mode>4_internal"
-  [(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
+;;
+;; Expand generates divmod instructions for individual division and modulus
+;; operations.  We then rely on CSE to reuse earlier divmods where possible.
+;; This means that, when generating MIPS16 code, it is better not to expose
+;; the fixed LO register until after CSE has finished.  However, it's still
+;; better to split before register allocation, so that we don't allocate
+;; one of the scarce MIPS16 registers to an unused result.
+(define_insn_and_split "divmod<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=kl")
 	(div:GPR (match_operand:GPR 1 "register_operand" "d")
 		 (match_operand:GPR 2 "register_operand" "d")))
    (set (match_operand:GPR 3 "register_operand" "=d")
 	(mod:GPR (match_dup 1)
 		 (match_dup 2)))]
-  "!TARGET_FIX_VR4120 && !TARGET_MIPS16"
+  "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120"
   "#"
-  "&& reload_completed"
+  "&& ((TARGET_MIPS16 && cse_not_expected) || reload_completed)"
   [(const_int 0)]
 {
   emit_insn (gen_divmod<mode>4_split (operands[3], operands[1], operands[2]));
+  if (TARGET_MIPS16)
+    emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
   DONE;
 }
  [(set_attr "type" "idiv")
   (set_attr "mode" "<MODE>")
-  (set_attr "length" "8")])
+  ;; Worst case for MIPS16.
+  (set_attr "insn_count" "3")])
 
-(define_expand "udivmod<mode>4"
-  [(set (match_operand:GPR 0 "register_operand")
-	(udiv:GPR (match_operand:GPR 1 "register_operand")
-		  (match_operand:GPR 2 "register_operand")))
-   (set (match_operand:GPR 3 "register_operand")
-	(umod:GPR (match_dup 1)
-		  (match_dup 2)))]
-  ""
-{
-  if (TARGET_MIPS16)
-    {
-      emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1],
-					   operands[2]));
-      emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
-    }
-  else
-    emit_insn (gen_udivmod<mode>4_internal (operands[0], operands[1],
-					    operands[2], operands[3]));
-  DONE;
-})
-
-(define_insn_and_split "udivmod<mode>4_internal"
-  [(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
+;; See the comment above "divmod<mode>4" for the MIPS16 handling.
+(define_insn_and_split "udivmod<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=kl")
 	(udiv:GPR (match_operand:GPR 1 "register_operand" "d")
 		  (match_operand:GPR 2 "register_operand" "d")))
    (set (match_operand:GPR 3 "register_operand" "=d")
 	(umod:GPR (match_dup 1)
 		  (match_dup 2)))]
-  "!TARGET_MIPS16"
+  "ISA_HAS_<D>DIV"
   "#"
-  "reload_completed"
+  "(TARGET_MIPS16 && cse_not_expected) || reload_completed"
   [(const_int 0)]
 {
   emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1], operands[2]));
+  if (TARGET_MIPS16)
+    emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
   DONE;
 }
- [(set_attr "type" "idiv")
-  (set_attr "mode" "<MODE>")
-  (set_attr "length" "8")])
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")
+   ;; Worst case for MIPS16.
+   (set_attr "insn_count" "3")])
 
 (define_expand "<u>divmod<mode>4_split"
   [(set (match_operand:GPR 0 "register_operand")
@@ -2671,7 +2633,7 @@
 	  [(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
 			(match_operand:GPR 2 "register_operand" "d"))]
 	  UNSPEC_SET_HILO))]
-  ""
+  "ISA_HAS_<GPR:D>DIV"
   { return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); }
   [(set_attr "type" "idiv")
    (set_attr "mode" "<GPR:MODE>")])
@@ -2698,10 +2660,10 @@
 }
   [(set_attr "type" "fsqrt")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 (define_insn "*rsqrt<mode>a"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2716,10 +2678,10 @@
 }
   [(set_attr "type" "frsqrt")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 (define_insn "*rsqrt<mode>b"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2734,10 +2696,10 @@
 }
   [(set_attr "type" "frsqrt")
    (set_attr "mode" "<UNITMODE>")
-   (set (attr "length")
+   (set (attr "insn_count")
         (if_then_else (match_test "TARGET_FIX_SB1")
-                      (const_int 8)
-                      (const_int 4)))])
+                      (const_int 2)
+                      (const_int 1)))])
 
 ;;
 ;;  ....................
@@ -3530,7 +3492,7 @@
   [(set_attr "type"	"fcvt")
    (set_attr "mode"	"DF")
    (set_attr "cnv_mode"	"D2I")
-   (set_attr "length"	"36")])
+   (set_attr "insn_count" "9")])
 
 (define_expand "fix_truncsfsi2"
   [(set (match_operand:SI 0 "register_operand")
@@ -3567,7 +3529,7 @@
   [(set_attr "type"	"fcvt")
    (set_attr "mode"	"SF")
    (set_attr "cnv_mode"	"S2I")
-   (set_attr "length"	"36")])
+   (set_attr "insn_count" "9")])
 
 
 (define_insn "fix_truncdfdi2"
@@ -4045,7 +4007,7 @@
   operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
   operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID);
 }
-  [(set_attr "length" "20")])
+  [(set_attr "insn_count" "5")])
 
 ;; Use a scratch register to reduce the latency of the above pattern
 ;; on superscalar machines.  The optimized sequence is:
@@ -4100,7 +4062,7 @@
   operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
   operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW);
 }
-  [(set_attr "length" "24")])
+  [(set_attr "insn_count" "6")])
 
 ;; Split HIGHs into:
 ;;
@@ -5110,7 +5072,7 @@
     return ".cprestore\t%1";
 }
   [(set_attr "type" "store")
-   (set_attr "length" "4,12")])
+   (set_attr "insn_count" "1,3")])
 
 (define_insn "use_cprestore_<mode>"
   [(set (reg:P CPRESTORE_SLOT_REGNUM)
@@ -5171,7 +5133,7 @@
          "\tjr.hb\t$31\n"
          "\tnop%>%)";
 }
-  [(set_attr "length" "20")])
+  [(set_attr "insn_count" "5")])
 
 ;; Cache operations for R4000-style caches.
 (define_insn "mips_cache"
@@ -5364,8 +5326,7 @@
 ;; not have and immediate).  We recognize a shift of a load in order
 ;; to make it simple enough for combine to understand.
 ;;
-;; The length here is the worst case: the length of the split version
-;; will be more accurate.
+;; The instruction count here is the worst case.
 (define_insn_and_split ""
   [(set (match_operand:SI 0 "register_operand" "=d")
 	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
@@ -5378,7 +5339,8 @@
   ""
   [(set_attr "type"	"load")
    (set_attr "mode"	"SI")
-   (set_attr "length"	"8")])
+   (set (attr "insn_count")
+	(symbol_ref "mips_load_store_insns (operands[1], insn) + 2"))])
 
 (define_insn "rotr<mode>3"
   [(set (match_operand:GPR 0 "register_operand" "=d")
@@ -5986,7 +5948,7 @@
    (clobber (reg:SI MIPS16_T_REGNUM))]
   "TARGET_MIPS16_SHORT_JUMP_TABLES"
 {
-  rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
 
   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
   
@@ -6017,7 +5979,7 @@
   
   return "j\t%4";
 }
-  [(set_attr "length" "32")])
+  [(set_attr "insn_count" "16")])
 
 ;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well.
 ;; While it is possible to either pull it off the stack (in the
@@ -6908,11 +6870,8 @@
    (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
   ""
   [(set_attr "type" "unknown")
-   ; Since rdhwr always generates a trap for now, putting it in a delay
-   ; slot would make the kernel's emulation of it much slower.
-   (set_attr "can_delay" "no")
    (set_attr "mode" "<MODE>")
-   (set_attr "length" "8")])
+   (set_attr "insn_count" "2")])
 
 (define_insn "*tls_get_tp_<mode>_split"
   [(set (reg:P TLS_GET_TP_REGNUM)
@@ -6920,7 +6879,8 @@
   "HAVE_AS_TLS && !TARGET_MIPS16"
   ".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop"
   [(set_attr "type" "unknown")
-   ; See tls_get_tp_<mode>
+   ; Since rdhwr always generates a trap for now, putting it in a delay
+   ; slot would make the kernel's emulation of it much slower.
    (set_attr "can_delay" "no")
    (set_attr "mode" "<MODE>")])
 
@@ -6952,7 +6912,7 @@
    (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
   ""
   [(set_attr "type" "multi")
-   (set_attr "length" "8")
+   (set_attr "insn_count" "4")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*tls_get_tp_mips16_call_<mode>"
@@ -6964,7 +6924,7 @@
   "HAVE_AS_TLS && TARGET_MIPS16"
   { return MIPS_CALL ("jal", operands, 0, -1); }
   [(set_attr "type" "call")
-   (set_attr "length" "6")
+   (set_attr "insn_count" "3")
    (set_attr "mode" "<MODE>")])
 
 ;; Named pattern for expanding thread pointer reference.
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index e11710db3c0..08ab29b1810 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -141,6 +141,10 @@ membedded-data
 Target Report Var(TARGET_EMBEDDED_DATA)
 Use ROM instead of RAM
 
+meva
+Target Report Var(TARGET_EVA)
+Use Enhanced Virtual Addressing instructions
+
 mexplicit-relocs
 Target Report Mask(EXPLICIT_RELOCS)
 Use NewABI-style %reloc() assembly operators
diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h
index a3fb48976bd..45bc0b88107 100644
--- a/gcc/config/mips/mti-linux.h
+++ b/gcc/config/mips/mti-linux.h
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 /* This target is a multilib target, specify the sysroot paths.  */
 #undef SYSROOT_SUFFIX_SPEC
 #define SYSROOT_SUFFIX_SPEC \
-    "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}"
+    "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}"
 
 #undef DRIVER_SELF_SPECS
 #define DRIVER_SELF_SPECS						\
diff --git a/gcc/config/mips/n32-elf.h b/gcc/config/mips/n32-elf.h
new file mode 100644
index 00000000000..0f41a6e9fc7
--- /dev/null
+++ b/gcc/config/mips/n32-elf.h
@@ -0,0 +1,35 @@
+/* Definitions of target machine for GNU compiler.
+   n32 for embedded systems.
+   Copyright (C) 2003-2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use standard ELF-style local labels (not '$' as on early Irix).  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Use periods rather than dollar signs in special g++ assembler names.  */
+#define NO_DOLLAR_IN_LABEL
+
+/* Force n32 to use 64-bit long doubles.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#ifdef IN_LIBGCC2
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
diff --git a/gcc/config/mips/sde.h b/gcc/config/mips/sde.h
index d42fee6309f..d35f79f25be 100644
--- a/gcc/config/mips/sde.h
+++ b/gcc/config/mips/sde.h
@@ -89,23 +89,6 @@ along with GCC; see the file COPYING3.  If not see
 #undef PTRDIFF_TYPE
 #define PTRDIFF_TYPE "long int"
 
-/* Use standard ELF-style local labels (not '$' as on early Irix).  */
-#undef LOCAL_LABEL_PREFIX
-#define LOCAL_LABEL_PREFIX "."
-
-/* Use periods rather than dollar signs in special g++ assembler names.  */
-#define NO_DOLLAR_IN_LABEL
-
-/* Currently we don't support 128bit long doubles, so for now we force
-   n32 to be 64bit.  */
-#undef LONG_DOUBLE_TYPE_SIZE
-#define LONG_DOUBLE_TYPE_SIZE 64
-
-#ifdef IN_LIBGCC2
-#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
-#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
-#endif
-
 /* Force all .init and .fini entries to be 32-bit, not mips16, so that
    in a mixed environment they are all the same mode. The crti.asm and
    crtn.asm files will also be compiled as 32-bit due to the
diff --git a/gcc/config/mips/t-mti-elf b/gcc/config/mips/t-mti-elf
index 3f0868fb856..bce8f063452 100644
--- a/gcc/config/mips/t-mti-elf
+++ b/gcc/config/mips/t-mti-elf
@@ -16,20 +16,29 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mabi=64 EL msoft-float
-MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 64 el sof
-MULTILIB_MATCHES = EL=mel EB=meb
+# The default build is mips32r2, hard-float big-endian.  Add mips32,
+# soft-float, and little-endian variations.
 
-# We do not want to build mips16 versions of mips64* architectures.
-MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof
+MULTILIB_MATCHES = EL=mel EB=meb
 
-# 64 bit ABI is not supported on mips32 architecture.
+# The 64 bit ABI is not supported on the mips32 architecture.
 MULTILIB_EXCEPTIONS += *mips32*/*mabi=64*
 
-# The 64 bit ABI is not supported on the mips32r2 bit architecture.
-# Because mips32r2 is the default the exception list is a little messy.
-# Basically we are saying any list that doesn't specify mips32, mips64,
-# or mips64r2 but does specify mabi=64 is not allowed because that
-# would be defaulting to the mips32r2 architecture.
+# The 64 bit ABI is not supported on the mips32r2 architecture.
+# Because mips32r2 is the default we can't use that flag to trigger
+# the exception so we check for mabi=64 with no specific mips
+# architecture flag instead.
 MULTILIB_EXCEPTIONS += mabi=64*
-MULTILIB_EXCEPTIONS += mips16/mabi=64*
+
+# We do not want to build mips16 versions of mips64* architectures.
+MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_EXCEPTIONS += *mips16/mabi=64*
+
+# We only want micromips for mips32r2 architecture and we do not want
+# it used in conjunction with -mips16.
+MULTILIB_EXCEPTIONS += *mips16/mmicromips*
+MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
+MULTILIB_EXCEPTIONS += *mips32/mmicromips*
+MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
diff --git a/gcc/config/mips/t-mti-linux b/gcc/config/mips/t-mti-linux
index 775a68d9dae..bce8f063452 100644
--- a/gcc/config/mips/t-mti-linux
+++ b/gcc/config/mips/t-mti-linux
@@ -19,8 +19,8 @@
 # The default build is mips32r2, hard-float big-endian.  Add mips32,
 # soft-float, and little-endian variations.
 
-MULTILIB_OPTIONS = mips32/mips64/mips64r2 mabi=64 EL msoft-float
-MULTILIB_DIRNAMES = mips32 mips64 mips64r2 64 el sof
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof
 MULTILIB_MATCHES = EL=mel EB=meb
 
 # The 64 bit ABI is not supported on the mips32 architecture.
@@ -28,6 +28,17 @@ MULTILIB_EXCEPTIONS += *mips32*/*mabi=64*
 
 # The 64 bit ABI is not supported on the mips32r2 architecture.
 # Because mips32r2 is the default we can't use that flag to trigger
-# the exception so we check for mabi=64 with no specific mips flag
-# instead.
+# the exception so we check for mabi=64 with no specific mips
+# architecture flag instead.
 MULTILIB_EXCEPTIONS += mabi=64*
+
+# We do not want to build mips16 versions of mips64* architectures.
+MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_EXCEPTIONS += *mips16/mabi=64*
+
+# We only want micromips for mips32r2 architecture and we do not want
+# it used in conjunction with -mips16.
+MULTILIB_EXCEPTIONS += *mips16/mmicromips*
+MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
+MULTILIB_EXCEPTIONS += *mips32/mmicromips*
+MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
index 1af09e559b0..bd37067dfc4 100644
--- a/gcc/config/mmix/mmix.c
+++ b/gcc/config/mmix/mmix.c
@@ -313,7 +313,7 @@ mmix_init_machine_status (void)
   return ggc_alloc_cleared_machine_function ();
 }
 
-/* DATA_ALIGNMENT.
+/* DATA_ABI_ALIGNMENT.
    We have trouble getting the address of stuff that is located at other
    than 32-bit alignments (GETA requirements), so try to give everything
    at least 32-bit alignment.  */
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
index 4ca1a2b8c86..c5edc5777a9 100644
--- a/gcc/config/mmix/mmix.h
+++ b/gcc/config/mmix/mmix.h
@@ -164,7 +164,7 @@ struct GTY(()) machine_function
 /* Copied from elfos.h.  */
 #define MAX_OFILE_ALIGNMENT (32768 * 8)
 
-#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \
+#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \
  mmix_data_alignment (TYPE, BASIC_ALIGN)
 
 #define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index 2e18bebf3d8..c2ed7389bc4 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -647,6 +647,15 @@ rl78_addr_space_pointer_mode (addr_space_t addrspace)
     }
 }
 
+/* Returns TRUE for valid addresses.  */
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE rl78_valid_pointer_mode
+static bool
+rl78_valid_pointer_mode (enum machine_mode m)
+{
+  return (m == HImode || m == SImode);
+}
+
 /* Return the appropriate mode for a named address address.  */
 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
 #define TARGET_ADDR_SPACE_ADDRESS_MODE rl78_addr_space_address_mode
@@ -2730,6 +2739,16 @@ rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 }
 
 
+
+#undef  TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode
+
+static enum machine_mode
+rl78_unwind_word_mode (void)
+{
+  return HImode;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rl78.h"
diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md
index b3cfe6d1bbc..efc26210498 100644
--- a/gcc/config/rl78/rl78.md
+++ b/gcc/config/rl78/rl78.md
@@ -235,6 +235,24 @@
   [(set_attr "valloc" "macax")]
 )
 
+(define_expand "mulqi3"
+  [(set (match_operand:QI          0 "register_operand" "")
+	(mult:QI  (match_operand:QI 1 "general_operand" "")
+		  (match_operand:QI 2 "nonmemory_operand" "")))
+   ]
+  "" ; mulu supported by all targets
+  ""
+)
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI          0 "register_operand" "")
+	(mult:HI (match_operand:HI 1 "general_operand" "")
+		 (match_operand:HI 2 "nonmemory_operand" "")))
+   ]
+  "! RL78_MUL_NONE"
+  ""
+)
+
 (define_expand "mulsi3"
   [(set (match_operand:SI          0 "register_operand" "=&v")
 	(mult:SI (match_operand:SI 1 "nonmemory_operand" "vi")
@@ -244,6 +262,58 @@
   ""
 )
 
+(define_insn "*mulqi3_rl78"
+  [(set (match_operand:QI          0 "register_operand" "=&v")
+	(mult:QI (match_operand:QI 1 "general_operand" "+viU")
+		 (match_operand:QI 2 "general_operand" "vi")))
+   ]
+  "" ; mulu supported by all targets
+  "; mulqi macro %0 = %1 * %2
+	mov    a, %h1
+	mov    x, a
+	mov    a, %h2
+	mulu   x ; ax = a * x
+	mov    a, x
+	mov    %h0, a
+	; end of mulqi macro"
+;;  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_rl78"
+  [(set (match_operand:HI          0 "register_operand" "=&v")
+	(mult:HI (match_operand:HI 1 "general_operand" "+viU")
+		 (match_operand:HI 2 "general_operand" "vi")))
+   ]
+  "RL78_MUL_RL78"
+  "; mulhi macro %0 = %1 * %2
+	movw    ax, %h1
+	movw    bc, %h2
+	mulhu   ; bcax = bc * ax
+	movw    %h0, ax
+	; end of mulhi macro"
+;;  [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_g13"
+  [(set (match_operand:HI          0 "register_operand" "=&v")
+	(mult:HI (match_operand:HI 1 "general_operand" "+viU")
+		 (match_operand:HI 2 "general_operand" "vi")))
+   ]
+  "RL78_MUL_G13"
+  "; mulhi macro %0 = %1 * %2
+	mov     a, #0x00
+	mov     !0xf00e8, a     ; MDUC
+	movw    ax, %h1
+	movw    0xffff0, ax     ; MDAL
+	movw    ax, %h2
+	movw    0xffff2, ax     ; MDAH
+	nop     ; mdb = mdal * mdah
+	movw    ax, 0xffff6     ; MDBL
+	movw    %h0, ax
+        ; end of mulhi macro"
+;;  [(set_attr "valloc" "umul")]
+)
+
 ;; 0xFFFF0 is MACR(L).  0xFFFF2 is MACR(H) but we don't care about it
 ;; because we're only using the lower 16 bits (which is the upper 16
 ;; bits of the result).
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index fd6d07f50ff..4b91c5c5e24 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -321,6 +321,42 @@
 #define vec_vsx_st __builtin_vec_vsx_st
 #endif
 
+#ifdef _ARCH_PWR8
+/* Vector additions added in ISA 2.07.  */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
+#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vgbbd __builtin_vec_vgbbd
+#define vec_vmaxsd __builtin_vec_vmaxsd
+#define vec_vmaxud __builtin_vec_vmaxud
+#define vec_vminsd __builtin_vec_vminsd
+#define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
+#define vec_vpksdss __builtin_vec_vpksdss
+#define vec_vpksdus __builtin_vec_vpksdus
+#define vec_vpkudum __builtin_vec_vpkudum
+#define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
+#define vec_vrld __builtin_vec_vrld
+#define vec_vsld __builtin_vec_vsld
+#define vec_vsrad __builtin_vec_vsrad
+#define vec_vsrd __builtin_vec_vsrd
+#define vec_vsubudm __builtin_vec_vsubudm
+#define vec_vupkhsw __builtin_vec_vupkhsw
+#define vec_vupklsw __builtin_vec_vupklsw
+#endif
+
 /* Predicates.
    For C++, we use templates in order to allow non-parenthesized arguments.
    For C, instead, we use macros since non-parenthesized arguments were
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1b0b5c3fb13..6607e450be3 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -41,15 +41,11 @@
    UNSPEC_VMULOSB
    UNSPEC_VMULOUH
    UNSPEC_VMULOSH
-   UNSPEC_VPKUHUM
-   UNSPEC_VPKUWUM
    UNSPEC_VPKPX
-   UNSPEC_VPKSHSS
-   UNSPEC_VPKSWSS
-   UNSPEC_VPKUHUS
-   UNSPEC_VPKSHUS
-   UNSPEC_VPKUWUS
-   UNSPEC_VPKSWUS
+   UNSPEC_VPACK_SIGN_SIGN_SAT
+   UNSPEC_VPACK_SIGN_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_MOD
    UNSPEC_VSLV4SI
    UNSPEC_VSLO
    UNSPEC_VSR
@@ -71,12 +67,10 @@
    UNSPEC_VLOGEFP
    UNSPEC_VEXPTEFP
    UNSPEC_VLSDOI
-   UNSPEC_VUPKHSB
+   UNSPEC_VUNPACK_HI_SIGN
+   UNSPEC_VUNPACK_LO_SIGN
    UNSPEC_VUPKHPX
-   UNSPEC_VUPKHSH
-   UNSPEC_VUPKLSB
    UNSPEC_VUPKLPX
-   UNSPEC_VUPKLSH
    UNSPEC_DST
    UNSPEC_DSTT
    UNSPEC_DSTST
@@ -134,6 +128,7 @@
    UNSPEC_VUPKLS_V4SF
    UNSPEC_VUPKHU_V4SF
    UNSPEC_VUPKLU_V4SF
+   UNSPEC_VGBBD
 ])
 
 (define_c_enum "unspecv"
@@ -146,6 +141,8 @@
 
 ;; Vec int modes
 (define_mode_iterator VI [V4SI V8HI V16QI])
+;; Like VI, but add ISA 2.07 integer vector ops
+(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
 ;; Short vec in modes
 (define_mode_iterator VIshort [V8HI V16QI])
 ;; Vec float modes
@@ -159,8 +156,18 @@
 ;; Like VM, except don't do TImode
 (define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
 
-(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
-(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
+(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
+			   (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
+			   (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
+			   (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
+
+;; Vector pack/unpack
+(define_mode_iterator VP [V2DI V4SI V8HI])
+(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
+(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
+(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
 
 ;; Vector move instructions.
 (define_insn "*altivec_mov<mode>"
@@ -378,10 +385,10 @@
 
 ;; add
 (define_insn "add<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (plus:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vaddu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -398,17 +405,17 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VADDCUW))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vaddcuw %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "altivec_vaddu<VI_char>s"
   [(set (match_operand:VI 0 "register_operand" "=v")
         (unspec:VI [(match_operand:VI 1 "register_operand" "v")
-                    (match_operand:VI 2 "register_operand" "v")]
+		    (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VADDU))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "<VI_unit>"
   "vaddu<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -418,16 +425,16 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VADDS))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vadds<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 ;; sub
 (define_insn "sub<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (minus:VI (match_operand:VI 1 "register_operand" "v")
-                  (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		   (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsubu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -444,7 +451,7 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VSUBCUW))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vsubcuw %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -454,7 +461,7 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VSUBU))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vsubu<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -464,7 +471,7 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VSUBS))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vsubs<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -483,7 +490,7 @@
         (unspec:VI [(match_operand:VI 1 "register_operand" "v")
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VAVGS))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vavgs<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -492,31 +499,31 @@
         (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
                       (match_operand:V4SF 2 "register_operand" "v")] 
                       UNSPEC_VCMPBFP))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vcmpbfp %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_eq<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(eq:VI (match_operand:VI 1 "altivec_register_operand" "v")
-	       (match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpequ<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_gt<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(gt:VI (match_operand:VI 1 "altivec_register_operand" "v")
-	       (match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpgts<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_gtu<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(gtu:VI (match_operand:VI 1 "altivec_register_operand" "v")
-		(match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		 (match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpgtu<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
@@ -744,18 +751,18 @@
 ;; max
 
 (define_insn "umax<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (umax:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmaxu<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "smax<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (smax:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmaxs<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -768,18 +775,18 @@
   [(set_attr "type" "veccmp")])
 
 (define_insn "umin<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (umin:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vminu<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "smin<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (smin:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmins<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -935,6 +942,31 @@
   "vmrglw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_P8_VECTOR"
+  "vmrgew %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_vmrgow"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_P8_VECTOR"
+  "vmrgow %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
 (define_insn "vec_widen_umult_even_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1011,10 +1043,13 @@
 ;; logical ops.  Have the logical ops follow the memory ops in
 ;; terms of whether to prefer VSX or Altivec
 
+;; AND has a clobber to be consistant with VSX, which adds splitters for using
+;; the GPR registers.
 (define_insn "*altivec_and<mode>3"
   [(set (match_operand:VM 0 "register_operand" "=v")
         (and:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
+		(match_operand:VM 2 "register_operand" "v")))
+   (clobber (match_scratch:CC 3 "=X"))]
   "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
   "vand %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -1044,8 +1079,8 @@
   
 (define_insn "*altivec_nor<mode>3"
   [(set (match_operand:VM 0 "register_operand" "=v")
-        (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
-			(match_operand:VM 2 "register_operand" "v"))))]
+	(and:VM (not:VM (match_operand:VM 1 "register_operand" "v"))
+		(not:VM (match_operand:VM 2 "register_operand" "v"))))]
   "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
   "vnor %0,%1,%2"
   [(set_attr "type" "vecsimple")])
@@ -1058,24 +1093,6 @@
   "vandc %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_vpkuhum"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKUHUM))]
-  "TARGET_ALTIVEC"
-  "vpkuhum %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkuwum"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKUWUM))]
-  "TARGET_ALTIVEC"
-  "vpkuwum %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
 (define_insn "altivec_vpkpx"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1085,71 +1102,47 @@
   "vpkpx %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkshss"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKSHSS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkshss %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkswss"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKSWSS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkswss %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkuhus"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKUHUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkuhus %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>ss"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_SIGN_SAT))]
+  "<VI_unit>"
+  "vpks<VI_char>ss %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkshus"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKSHUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkshus %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_UNS_SAT))]
+  "<VI_unit>"
+  "vpks<VI_char>us %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkuwus"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKUWUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkuwus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_SAT))]
+  "<VI_unit>"
+  "vpku<VI_char>us %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkswus"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKSWUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkswus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>um"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "vpku<VI_char>um %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
 (define_insn "*altivec_vrl<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (rotate:VI (match_operand:VI 1 "register_operand" "v")
-		   (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -1172,26 +1165,26 @@
   [(set_attr "type" "vecperm")])
 
 (define_insn "*altivec_vsl<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (ashift:VI (match_operand:VI 1 "register_operand" "v")
-		   (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashift:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "*altivec_vsr<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
-		     (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsr<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "*altivec_vsra<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
-		     (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsra<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -1476,12 +1469,20 @@
   "vsldoi %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupkhsb"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-		     UNSPEC_VUPKHSB))]
-  "TARGET_ALTIVEC"
-  "vupkhsb %0,%1"
+(define_insn "altivec_vupkhs<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_HI_SIGN))]
+  "<VI_unit>"
+  "vupkhs<VU_char> %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkls<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_LO_SIGN))]
+  "<VI_unit>"
+  "vupkls<VU_char> %0,%1"
   [(set_attr "type" "vecperm")])
 
 (define_insn "altivec_vupkhpx"
@@ -1492,22 +1493,6 @@
   "vupkhpx %0,%1"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupkhsh"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKHSH))]
-  "TARGET_ALTIVEC"
-  "vupkhsh %0,%1"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vupklsb"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-		     UNSPEC_VUPKLSB))]
-  "TARGET_ALTIVEC"
-  "vupklsb %0,%1"
-  [(set_attr "type" "vecperm")])
-
 (define_insn "altivec_vupklpx"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
@@ -1516,49 +1501,41 @@
   "vupklpx %0,%1"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupklsh"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKLSH))]
-  "TARGET_ALTIVEC"
-  "vupklsh %0,%1"
-  [(set_attr "type" "vecperm")])
-
 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
 ;; indicate a combined status
 (define_insn "*altivec_vcmpequ<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v")
-			   (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(eq:VI (match_dup 1)
-	       (match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(eq:VI2 (match_dup 1)
+		(match_dup 2)))]
+  "<VI_unit>"
   "vcmpequ<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_vcmpgts<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v")
-			   (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(gt:VI (match_dup 1)
-	       (match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gt:VI2 (match_dup 1)
+		(match_dup 2)))]
+  "<VI_unit>"
   "vcmpgts<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_vcmpgtu<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v")
-			    (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
+			    (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(gtu:VI (match_dup 1)
-		(match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gtu:VI2 (match_dup 1)
+		 (match_dup 2)))]
+  "<VI_unit>"
   "vcmpgtu<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
@@ -1779,20 +1756,28 @@
   [(set_attr "type" "vecstore")])
 
 ;; Generate
-;;    vspltis? SCRATCH0,0
+;;    xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
 ;;    vsubu?m SCRATCH2,SCRATCH1,%1
 ;;    vmaxs? %0,%1,SCRATCH2"
 (define_expand "abs<mode>2"
-  [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
-   (set (match_dup 3)
-        (minus:VI (match_dup 2)
-                  (match_operand:VI 1 "register_operand" "v")))
-   (set (match_operand:VI 0 "register_operand" "=v")
-        (smax:VI (match_dup 1) (match_dup 3)))]
-  "TARGET_ALTIVEC"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4)
+        (minus:VI2 (match_dup 2)
+		   (match_operand:VI2 1 "register_operand" "v")))
+   (set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_dup 1) (match_dup 4)))]
+  "<VI_unit>"
 {
-  operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
-  operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
+  int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
+  rtvec v = rtvec_alloc (n_elt);
+
+  /* Create an all 0 constant.  */
+  for (i = 0; i < n_elt; ++i)
+    RTVEC_ELT (v, i) = const0_rtx;
+
+  operands[2] = gen_reg_rtx (<MODE>mode);
+  operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
+  operands[4] = gen_reg_rtx (<MODE>mode);
 })
 
 ;; Generate
@@ -1950,49 +1935,19 @@
   DONE;
 }")
 
-(define_expand "vec_unpacks_hi_v16qi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-                     UNSPEC_VUPKHSB))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
-  DONE;
-}")
-
-(define_expand "vec_unpacks_hi_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-                     UNSPEC_VUPKHSH))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
-  DONE;
-}")
-
-(define_expand "vec_unpacks_lo_v16qi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-                     UNSPEC_VUPKLSB))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
-  DONE;
-}")
+(define_expand "vec_unpacks_hi_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_HI_SIGN))]
+  "<VI_unit>"
+  "")
 
-(define_expand "vec_unpacks_lo_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-                     UNSPEC_VUPKLSH))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
-  DONE;
-}")
+(define_expand "vec_unpacks_lo_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_LO_SIGN))]
+  "<VI_unit>"
+  "")
 
 (define_insn "vperm_v8hiv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -2291,29 +2246,13 @@
   DONE;
 }")
 
-(define_expand "vec_pack_trunc_v8hi"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-                      UNSPEC_VPKUHUM))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
-  DONE;
-}")
-                                                                                
-(define_expand "vec_pack_trunc_v4si"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-                     UNSPEC_VPKUWUM))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
-  DONE;
-}")
+(define_expand "vec_pack_trunc_<mode>"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+        (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+                      UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "")
 
 (define_expand "altivec_negv4sf2"
   [(use (match_operand:V4SF 0 "register_operand" ""))
@@ -2460,3 +2399,34 @@
   emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
   DONE;
 }")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+	(clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vclz<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vpopcnt<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+		      UNSPEC_VGBBD))]
+  "TARGET_P8_VECTOR"
+  "vgbbd %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 463d69c6ba4..fa53cbb9de7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -79,12 +79,31 @@
 (define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
   "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
 
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
+  "VSX register if direct move instructions are enabled, or NO_REGS.")
+
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
+  "General purpose register if 64-bit instructions are enabled or NO_REGS.")
+
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
+  "Altivec register if -mpower8-vector is used or NO_REGS.")
+
 (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
   "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
 
 (define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
   "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
 
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
+;; direct move directly, and movsf can't to move between the register sets.
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
+(define_register_constraint "wn" "NO_REGS")
+
+;; Lq/stq validates the address for load/store quad
+(define_memory_constraint "wQ"
+  "Memory operand suitable for the load/store quad instructions"
+  (match_operand 0 "quad_memory_operand"))
+
 ;; Altivec style load/store that ignores the bottom bits of the address
 (define_memory_constraint "wZ"
   "Indexed or indirect memory operand, ignoring the bottom 4 bits"
diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md
new file mode 100644
index 00000000000..9f7e4a1b255
--- /dev/null
+++ b/gcc/config/rs6000/crypto.md
@@ -0,0 +1,101 @@
+;; Cryptographic instructions added in ISA 2.07
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec"
+  [UNSPEC_VCIPHER
+   UNSPEC_VNCIPHER
+   UNSPEC_VCIPHERLAST
+   UNSPEC_VNCIPHERLAST
+   UNSPEC_VSBOX
+   UNSPEC_VSHASIGMA
+   UNSPEC_VPERMXOR
+   UNSPEC_VPMSUM])
+
+;; Iterator for VPMSUM/VPERMXOR
+(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI])
+
+(define_mode_attr CR_char [(V16QI "b")
+			   (V8HI  "h")
+			   (V4SI  "w")
+			   (V2DI  "d")])
+
+;; Iterator for VSHASIGMAD/VSHASIGMAW
+(define_mode_iterator CR_hash [V4SI V2DI])
+
+;; Iterator for the other crypto functions
+(define_int_iterator CR_code   [UNSPEC_VCIPHER
+				UNSPEC_VNCIPHER
+				UNSPEC_VCIPHERLAST
+				UNSPEC_VNCIPHERLAST])
+
+(define_int_attr CR_insn [(UNSPEC_VCIPHER      "vcipher")
+			  (UNSPEC_VNCIPHER     "vncipher")
+			  (UNSPEC_VCIPHERLAST  "vcipherlast")
+			  (UNSPEC_VNCIPHERLAST "vncipherlast")])
+
+;; 2 operand crypto instructions
+(define_insn "crypto_<CR_insn>"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
+		      (match_operand:V2DI 2 "register_operand" "v")]
+		     CR_code))]
+  "TARGET_CRYPTO"
+  "<CR_insn> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+(define_insn "crypto_vpmsum<CR_char>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")]
+			UNSPEC_VPMSUM))]
+  "TARGET_CRYPTO"
+  "vpmsum<CR_char> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+;; 3 operand crypto instructions
+(define_insn "crypto_vpermxor_<mode>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")
+			 (match_operand:CR_mode 3 "register_operand" "v")]
+			UNSPEC_VPERMXOR))]
+  "TARGET_CRYPTO"
+  "vpermxor %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
+
+;; 1 operand crypto instruction
+(define_insn "crypto_vsbox"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")]
+		     UNSPEC_VSBOX))]
+  "TARGET_CRYPTO"
+  "vsbox %0,%1"
+  [(set_attr "type" "crypto")])
+
+;; Hash crypto instructions
+(define_insn "crypto_vshasigma<CR_char>"
+  [(set (match_operand:CR_hash 0 "register_operand" "=v")
+	(unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v")
+			 (match_operand:SI 2 "const_0_to_1_operand" "n")
+			 (match_operand:SI 3 "const_0_to_15_operand" "n")]
+			UNSPEC_VSHASIGMA))]
+  "TARGET_CRYPTO"
+  "vshasigma<CR_char> %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c
index e608dce184c..1a173d0b1cc 100644
--- a/gcc/config/rs6000/driver-rs6000.c
+++ b/gcc/config/rs6000/driver-rs6000.c
@@ -167,7 +167,7 @@ elf_platform (void)
 
   if (fd != -1)
     {
-      char buf[1024];
+      static char buf[1024];
       ElfW(auxv_t) *av;
       ssize_t n;
 
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 3f280581feb..79f0f0b5f00 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -136,8 +136,11 @@ extern int dot_symbols;
 		SET_CMODEL (CMODEL_MEDIUM);			\
 	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
 		{						\
-		  TARGET_NO_FP_IN_TOC = 0;			\
-		  TARGET_NO_SUM_IN_TOC = 0;			\
+		  if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
+		    TARGET_NO_FP_IN_TOC				\
+		      = rs6000_current_cmodel == CMODEL_MEDIUM;	\
+		  if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
+		    TARGET_NO_SUM_IN_TOC = 0;			\
 		}						\
 	    }							\
 	}							\
diff --git a/gcc/config/rs6000/power8.md b/gcc/config/rs6000/power8.md
new file mode 100644
index 00000000000..83bf7197483
--- /dev/null
+++ b/gcc/config/rs6000/power8.md
@@ -0,0 +1,373 @@
+;; Scheduling description for IBM POWER8 processor.
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power8fxu,power8lsu,power8vsu,power8misc")
+
+(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu")
+(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu")
+(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu")
+(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu")
+(define_cpu_unit "bpu_power8,cru_power8" "power8misc")
+(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\
+		  du5_power8,du6_power8"  "power8misc")
+
+
+; Dispatch group reservations
+(define_reservation "DU_any_power8"
+		    "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\
+		     du5_power8")
+
+; 2-way Cracked instructions go in slots 0-1
+;   (can also have a second in slots 3-4 if insns are adjacent)
+(define_reservation "DU_cracked_power8"
+		    "du0_power8+du1_power8")
+
+; Insns that are first in group
+(define_reservation "DU_first_power8"
+		    "du0_power8")
+
+; Insns that are first and last in group
+(define_reservation "DU_both_power8"
+		    "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\
+		     du5_power8+du6_power8")
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\
+	      du5_power8,du6_power8")
+(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\
+	      du6_power8")
+(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8")
+(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8")
+(absence_set "du4_power8" "du5_power8,du6_power8")
+(absence_set "du5_power8" "du6_power8")
+
+
+; Execution unit reservations
+(define_reservation "FXU_power8"
+                    "fxu0_power8|fxu1_power8")
+
+(define_reservation "LU_power8"
+                    "lu0_power8|lu1_power8")
+
+(define_reservation "LSU_power8"
+                    "lsu0_power8|lsu1_power8")
+
+(define_reservation "LU_or_LSU_power8"
+                    "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8")
+
+(define_reservation "VSU_power8"
+                    "vsu0_power8|vsu1_power8")
+
+
+; LS Unit
+(define_insn_reservation "power8-load" 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-load-update" 3
+  (and (eq_attr "type" "load_u,load_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8+FXU_power8")
+
+(define_insn_reservation "power8-load-ext" 3
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8,FXU_power8")
+
+(define_insn_reservation "power8-load-ext-update" 3
+  (and (eq_attr "type" "load_ext_u,load_ext_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-fpload" 5
+  (and (eq_attr "type" "fpload,vecload")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_power8")
+
+(define_insn_reservation "power8-fpload-update" 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_power8+FXU_power8")
+
+(define_insn_reservation "power8-store" 5 ; store-forwarding latency
+  (and (eq_attr "type" "store,store_u")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-store-update-indexed" 5
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-fpstore" 5
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-fpstore-update" 5
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-vecstore" 5
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-stcx" 10
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-sync" 1
+  (and (eq_attr "type" "sync,isync")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8")
+
+
+; FX Unit
+(define_insn_reservation "power8-1cyc" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,exts,isel")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 2 "power8-1cyc"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+;		 "power8-load,power8-load-update,power8-load-ext,\
+;		  power8-load-ext-update,power8-fpload,power8-fpload-update,\
+;		  power8-store,power8-store-update,power8-store-update-indexed,\
+;		  power8-fpstore,power8-fpstore-update,power8-vecstore,\
+;		  power8-larx,power8-stcx")
+
+(define_insn_reservation "power8-2cyc" 2
+  (and (eq_attr "type" "cntlz,popcnt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8")
+
+; cmp - Normal compare insns
+(define_insn_reservation "power8-cmp" 2
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; fast_compare : add./and./nor./etc
+(define_insn_reservation "power8-fast-compare" 2
+  (and (eq_attr "type" "fast_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; compare : rldicl./exts./etc
+; delayed_compare : rlwinm./slwi./etc
+; var_delayed_compare : rlwnm./slw./etc
+(define_insn_reservation "power8-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 3 "power8-fast-compare,power8-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 5 cycle CR latency 
+(define_bypass 5 "power8-fast-compare,power8-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+(define_insn_reservation "power8-mul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,lmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-mul-compare" 4
+  (and (eq_attr "type" "imul_compare,lmul_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 5 "power8-mul,power8-mul-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 7 cycle CR latency 
+(define_bypass 7 "power8-mul,power8-mul-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+; FXU divides are not pipelined
+(define_insn_reservation "power8-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*37|fxu1_power8*37")
+
+(define_insn_reservation "power8-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*68|fxu1_power8*68")
+
+(define_insn_reservation "power8-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,FXU_power8")
+
+; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode
+(define_insn_reservation "power8-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,FXU_power8")
+
+
+; CR Unit
+(define_insn_reservation "power8-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8+FXU_power8")
+
+(define_insn_reservation "power8-crlogical" 3
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcr" 5
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+
+; BR Unit
+; Branches take dispatch slot 7, but reserve any remaining prior slots to
+; prevent other insns from grabbing them once this is assigned.
+(define_insn_reservation "power8-branch" 3
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power8"))
+  "(du6_power8\
+   |du5_power8+du6_power8\
+   |du4_power8+du5_power8+du6_power8\
+   |du3_power8+du4_power8+du5_power8+du6_power8\
+   |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\
+    du6_power8),bpu_power8")
+
+; Branch updating LR/CTR feeding mf[lr|ctr]
+(define_bypass 4 "power8-branch" "power8-mfjmpr")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
+(define_insn_reservation "power8-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+; Additional 3 cycles for any CR result
+(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch")
+
+(define_insn_reservation "power8-fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sdiv" 27
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sqrt" 32
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-dsqrt" 44
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecsimple" 2
+  (and (eq_attr "type" "vecperm,vecsimple,veccmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecnormal" 6
+  (and (eq_attr "type" "vecfloat,vecdouble")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_bypass 7 "power8-vecnormal"
+		 "power8-vecsimple,power8-veccomplex,power8-fpstore*,\
+		  power8-vecstore")
+
+(define_insn_reservation "power8-veccomplex" 7
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecfdiv" 25
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecdiv" 31
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mffgpr" 5
+  (and (eq_attr "type" "mffgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mftgpr" 6
+  (and (eq_attr "type" "mftgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-crypto" 7
+  (and (eq_attr "type" "crypto")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 78ec1b20913..f47967a48aa 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -166,6 +166,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
 
+;; Match op = 0..15
+(define_predicate "const_0_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
 ;; Return 1 if op is a register that is not special.
 (define_predicate "gpc_reg_operand"
   (match_operand 0 "register_operand")
@@ -182,9 +187,68 @@
   if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
     return 1;
 
+  if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
+    return 1;
+
   return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a general purpose register.  Unlike gpc_reg_operand, don't
+;; allow floating point or vector registers.
+(define_predicate "int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return INT_REGNO_P (REGNO (op));
+})
+
+;; Like int_reg_operand, but only return true for base registers
+(define_predicate "base_reg_operand"
+  (match_operand 0 "int_reg_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  return (REGNO (op) != FIRST_GPR_REGNO);
+})
+
+;; Return 1 if op is a general purpose register that is an even register
+;; which suitable for a load/store quad operation
+(define_predicate "quad_int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  HOST_WIDE_INT r;
+
+  if (!TARGET_QUAD_MEMORY)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  r = REGNO (op);
+  if (r >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return (INT_REGNO_P (r) && ((r & 1) == 0));
+})
+
 ;; Return 1 if op is a register that is a condition register field.
 (define_predicate "cc_reg_operand"
   (match_operand 0 "register_operand")
@@ -302,6 +366,11 @@
 		      & (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)")
     (match_operand 0 "gpc_reg_operand")))
 
+;; Like reg_or_logical_cint_operand, but allow vsx registers
+(define_predicate "vsx_reg_or_cint_operand"
+  (ior (match_operand 0 "vsx_register_operand")
+       (match_operand 0 "reg_or_logical_cint_operand")))
+
 ;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
 ;; with no more than one instruction per word.
 (define_predicate "easy_fp_constant"
@@ -458,9 +527,11 @@
 	    (match_test "easy_altivec_constant (op, mode)")))
 {
   HOST_WIDE_INT val;
+  int elt;
   if (mode == V2DImode || mode == V2DFmode)
     return 0;
-  val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1);
+  elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+  val = const_vector_elt_as_int (op, elt);
   val = ((val & 0xff) ^ 0x80) - 0x80;
   return EASY_VECTOR_15_ADD_SELF (val);
 })
@@ -472,9 +543,11 @@
 	    (match_test "easy_altivec_constant (op, mode)")))
 {
   HOST_WIDE_INT val;
+  int elt;
   if (mode == V2DImode || mode == V2DFmode)
     return 0;
-  val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1);
+  elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+  val = const_vector_elt_as_int (op, elt);
   return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
 })
 
@@ -507,6 +580,54 @@
   (and (match_operand 0 "memory_operand")
        (match_test "offsettable_nonstrict_memref_p (op)")))
 
+;; Return 1 if the operand is suitable for load/store quad memory.
+(define_predicate "quad_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, op0, op1;
+  int ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = 0;
+
+  else if (!memory_operand (op, mode))
+    ret = 0;
+
+  else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
+    ret = 0;
+
+  else if (MEM_ALIGN (op) < 128)
+    ret = 0;
+
+  else
+    {
+      addr = XEXP (op, 0);
+      if (int_reg_operand (addr, Pmode))
+	ret = 1;
+
+      else if (GET_CODE (addr) != PLUS)
+	ret = 0;
+
+      else
+	{
+	  op0 = XEXP (addr, 0);
+	  op1 = XEXP (addr, 1);
+	  ret = (int_reg_operand (op0, Pmode)
+		 && GET_CODE (op1) == CONST_INT
+		 && IN_RANGE (INTVAL (op1), -32768, 32767)
+		 && (INTVAL (op1) & 15) == 0);
+	}
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false");
+      debug_rtx (op);
+    }
+
+  return ret;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand.
 (define_predicate "indexed_or_indirect_operand"
   (match_code "mem")
@@ -521,6 +642,19 @@
   return indexed_or_indirect_address (op, mode);
 })
 
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
+;; moves are supported.
+(define_predicate "reg_or_indexed_operand"
+  (match_code "mem,reg")
+{
+  if (MEM_P (op))
+    return indexed_or_indirect_operand (op, mode);
+  else if (TARGET_DIRECT_MOVE)
+    return register_operand (op, mode);
+  return
+    0;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand with an
 ;; AND -16 in it, used to recognize when we need to switch to Altivec loads
 ;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
@@ -991,9 +1125,16 @@
 						   GET_MODE (XEXP (op, 0))),
 			  1"))))
 
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; If we're assuming that FP operations cannot generate user-visible traps,
+;; then on e500 we can use the ordered-signaling instructions to implement
+;; the unordered-quiet FP comparison predicates modulo a reversal.
 (define_predicate "rs6000_cbranch_operator"
   (if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS")
-		(match_operand 0 "ordered_comparison_operator")
+		(if_then_else (match_test "flag_trapping_math")
+			      (match_operand 0 "ordered_comparison_operator")
+			      (ior (match_operand 0 "ordered_comparison_operator")
+				   (match_code ("unlt,unle,ungt,unge"))))
 		(match_operand 0 "comparison_operator")))
 
 ;; Return 1 if OP is a comparison operation that is valid for an SCC insn --
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a545fe3e448..1a5a709751d 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -30,7 +30,7 @@
    RS6000_BUILTIN_A -- ABS builtins
    RS6000_BUILTIN_D -- DST builtins
    RS6000_BUILTIN_E -- SPE EVSEL builtins.
-   RS6000_BUILTIN_P -- Altivec and VSX predicate builtins
+   RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
    RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins
    RS6000_BUILTIN_S -- SPE predicate builtins
    RS6000_BUILTIN_X -- special builtins
@@ -301,6 +301,108 @@
 		     | RS6000_BTC_SPECIAL),				\
 		    CODE_FOR_nothing)			/* ICODE */
 
+/* ISA 2.07 (power8) vector convenience macros.  */
+/* For the instructions that are encoded as altivec instructions use
+   __builtin_altivec_ as the builtin name.  */
+#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+/* For the instructions encoded as VSX instructions use __builtin_vsx as the
+   builtin name.  */
+#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_1(ENUM, NAME)					\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_2(ENUM, NAME)					\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* Crypto convenience macros.  */
+#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
 /* SPE convenience macros.  */
 #define BU_SPE_1(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -1012,7 +1114,7 @@ BU_VSX_1 (XVTSQRTSP_FG,	      "xvtsqrtsp_fg",	CONST,	vsx_tsqrtv4sf2_fg)
 BU_VSX_1 (XVRESP,	      "xvresp",		CONST,	vsx_frev4sf2)
 
 BU_VSX_1 (XSCVDPSP,	      "xscvdpsp",	CONST,	vsx_xscvdpsp)
-BU_VSX_1 (XSCVSPDP,	      "xscvspdp",	CONST,	vsx_xscvdpsp)
+BU_VSX_1 (XSCVSPDP,	      "xscvspdp",	CONST,	vsx_xscvspdp)
 BU_VSX_1 (XVCVDPSP,	      "xvcvdpsp",	CONST,	vsx_xvcvdpsp)
 BU_VSX_1 (XVCVSPDP,	      "xvcvspdp",	CONST,	vsx_xvcvspdp)
 BU_VSX_1 (XSTSQRTDP_FE,	      "xstsqrtdp_fe",	CONST,	vsx_tsqrtdf2_fe)
@@ -1132,6 +1234,139 @@ BU_VSX_OVERLOAD_2 (XXSPLTW,  "xxspltw")
 BU_VSX_OVERLOAD_X (LD,	     "ld")
 BU_VSX_OVERLOAD_X (ST,	     "st")
 
+/* 1 argument VSX instructions added in ISA 2.07.  */
+BU_P8V_VSX_1 (XSCVSPDPN,      "xscvspdpn",	CONST,	vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN,      "xscvdpspn",	CONST,	vsx_xscvdpspn)
+
+/* 1 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_1 (ABS_V2DI,	      "abs_v2di",	CONST,	absv2di2)
+BU_P8V_AV_1 (VUPKHSW,	      "vupkhsw",	CONST,	altivec_vupkhsw)
+BU_P8V_AV_1 (VUPKLSW,	      "vupklsw",	CONST,	altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB,	      "vclzb",		CONST,  clzv16qi2)
+BU_P8V_AV_1 (VCLZH,	      "vclzh",		CONST,  clzv8hi2)
+BU_P8V_AV_1 (VCLZW,	      "vclzw",		CONST,  clzv4si2)
+BU_P8V_AV_1 (VCLZD,	      "vclzd",		CONST,  clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB,	      "vpopcntb",	CONST,  popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH,	      "vpopcnth",	CONST,  popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW,	      "vpopcntw",	CONST,  popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD,	      "vpopcntd",	CONST,  popcountv2di2)
+BU_P8V_AV_1 (VGBBD,	      "vgbbd",		CONST,  p8v_vgbbd)
+
+/* 2 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VADDUDM,		"vaddudm",	CONST,	addv2di3)
+BU_P8V_AV_2 (VMINSD,		"vminsd",	CONST,	sminv2di3)
+BU_P8V_AV_2 (VMAXSD,		"vmaxsd",	CONST,	smaxv2di3)
+BU_P8V_AV_2 (VMINUD,		"vminud",	CONST,	uminv2di3)
+BU_P8V_AV_2 (VMAXUD,		"vmaxud",	CONST,	umaxv2di3)
+BU_P8V_AV_2 (VMRGEW,		"vmrgew",	CONST,	p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW,		"vmrgow",	CONST,	p8_vmrgow)
+BU_P8V_AV_2 (VPKUDUM,		"vpkudum",	CONST,	altivec_vpkudum)
+BU_P8V_AV_2 (VPKSDSS,		"vpksdss",	CONST,	altivec_vpksdss)
+BU_P8V_AV_2 (VPKUDUS,		"vpkudus",	CONST,	altivec_vpkudus)
+BU_P8V_AV_2 (VPKSDUS,		"vpksdus",	CONST,	altivec_vpkswus)
+BU_P8V_AV_2 (VRLD,		"vrld",		CONST,	vrotlv2di3)
+BU_P8V_AV_2 (VSLD,		"vsld",		CONST,	vashlv2di3)
+BU_P8V_AV_2 (VSRD,		"vsrd",		CONST,	vlshrv2di3)
+BU_P8V_AV_2 (VSRAD,		"vsrad",	CONST,	vashrv2di3)
+BU_P8V_AV_2 (VSUBUDM,		"vsubudm",	CONST,	subv2di3)
+
+BU_P8V_AV_2 (EQV_V16QI,		"eqv_v16qi",	CONST,	eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI,		"eqv_v8hi",	CONST,	eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI,		"eqv_v4si",	CONST,	eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI,		"eqv_v2di",	CONST,	eqvv2di3)
+BU_P8V_AV_2 (EQV_V4SF,		"eqv_v4sf",	CONST,	eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF,		"eqv_v2df",	CONST,	eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI,	"nand_v16qi",	CONST,	nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI,		"nand_v8hi",	CONST,	nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI,		"nand_v4si",	CONST,	nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI,		"nand_v2di",	CONST,	nandv2di3)
+BU_P8V_AV_2 (NAND_V4SF,		"nand_v4sf",	CONST,	nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF,		"nand_v2df",	CONST,	nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI,		"orc_v16qi",	CONST,	orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI,		"orc_v8hi",	CONST,	orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI,		"orc_v4si",	CONST,	orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI,		"orc_v2di",	CONST,	orcv2di3)
+BU_P8V_AV_2 (ORC_V4SF,		"orc_v4sf",	CONST,	orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF,		"orc_v2df",	CONST,	orcv2df3)
+
+/* Vector comparison instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VCMPEQUD,		"vcmpequd",	CONST,	vector_eqv2di)
+BU_P8V_AV_2 (VCMPGTSD,		"vcmpgtsd",	CONST,	vector_gtv2di)
+BU_P8V_AV_2 (VCMPGTUD,		"vcmpgtud",	CONST,	vector_gtuv2di)
+
+/* Vector comparison predicate instructions added in ISA 2.07.  */
+BU_P8V_AV_P (VCMPEQUD_P,	"vcmpequd_p",	CONST,	vector_eq_v2di_p)
+BU_P8V_AV_P (VCMPGTSD_P,	"vcmpgtsd_p",	CONST,	vector_gt_v2di_p)
+BU_P8V_AV_P (VCMPGTUD_P,	"vcmpgtud_p",	CONST,	vector_gtu_v2di_p)
+
+/* ISA 2.07 vector overloaded 1 argument functions.  */
+BU_P8V_OVERLOAD_1 (VUPKHSW,	"vupkhsw")
+BU_P8V_OVERLOAD_1 (VUPKLSW,	"vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ,	"vclz")
+BU_P8V_OVERLOAD_1 (VCLZB,	"vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH,	"vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW,	"vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD,	"vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT,	"vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB,	"vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH,	"vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW,	"vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD,	"vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD,	"vgbbd")
+
+/* ISA 2.07 vector overloaded 2 argument functions.  */
+BU_P8V_OVERLOAD_2 (EQV,		"eqv")
+BU_P8V_OVERLOAD_2 (NAND,	"nand")
+BU_P8V_OVERLOAD_2 (ORC,		"orc")
+BU_P8V_OVERLOAD_2 (VADDUDM,	"vaddudm")
+BU_P8V_OVERLOAD_2 (VMAXSD,	"vmaxsd")
+BU_P8V_OVERLOAD_2 (VMAXUD,	"vmaxud")
+BU_P8V_OVERLOAD_2 (VMINSD,	"vminsd")
+BU_P8V_OVERLOAD_2 (VMINUD,	"vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW,	"vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW,	"vmrgow")
+BU_P8V_OVERLOAD_2 (VPKSDSS,	"vpksdss")
+BU_P8V_OVERLOAD_2 (VPKSDUS,	"vpksdus")
+BU_P8V_OVERLOAD_2 (VPKUDUM,	"vpkudum")
+BU_P8V_OVERLOAD_2 (VPKUDUS,	"vpkudus")
+BU_P8V_OVERLOAD_2 (VRLD,	"vrld")
+BU_P8V_OVERLOAD_2 (VSLD,	"vsld")
+BU_P8V_OVERLOAD_2 (VSRAD,	"vsrad")
+BU_P8V_OVERLOAD_2 (VSRD,	"vsrd")
+BU_P8V_OVERLOAD_2 (VSUBUDM,	"vsubudm")
+
+
+/* 1 argument crypto functions.  */
+BU_CRYPTO_1 (VSBOX,		"vsbox",	  CONST, crypto_vsbox)
+
+/* 2 argument crypto functions.  */
+BU_CRYPTO_2 (VCIPHER,		"vcipher",	  CONST, crypto_vcipher)
+BU_CRYPTO_2 (VCIPHERLAST,	"vcipherlast",	  CONST, crypto_vcipherlast)
+BU_CRYPTO_2 (VNCIPHER,		"vncipher",	  CONST, crypto_vncipher)
+BU_CRYPTO_2 (VNCIPHERLAST,	"vncipherlast",	  CONST, crypto_vncipherlast)
+BU_CRYPTO_2 (VPMSUMB,		"vpmsumb",	  CONST, crypto_vpmsumb)
+BU_CRYPTO_2 (VPMSUMH,		"vpmsumh",	  CONST, crypto_vpmsumh)
+BU_CRYPTO_2 (VPMSUMW,		"vpmsumw",	  CONST, crypto_vpmsumw)
+BU_CRYPTO_2 (VPMSUMD,		"vpmsumd",	  CONST, crypto_vpmsumd)
+
+/* 3 argument crypto functions.  */
+BU_CRYPTO_3 (VPERMXOR_V2DI,	"vpermxor_v2di",  CONST, crypto_vpermxor_v2di)
+BU_CRYPTO_3 (VPERMXOR_V4SI,	"vpermxor_v4si",  CONST, crypto_vpermxor_v4si)
+BU_CRYPTO_3 (VPERMXOR_V8HI,	"vpermxor_v8hi",  CONST, crypto_vpermxor_v8hi)
+BU_CRYPTO_3 (VPERMXOR_V16QI,	"vpermxor_v16qi", CONST, crypto_vpermxor_v16qi)
+BU_CRYPTO_3 (VSHASIGMAW,	"vshasigmaw",	  CONST, crypto_vshasigmaw)
+BU_CRYPTO_3 (VSHASIGMAD,	"vshasigmad",	  CONST, crypto_vshasigmad)
+
+/* 2 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_2 (VPMSUM,	 "vpmsum")
+
+/* 3 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_3 (VPERMXOR,	 "vpermxor")
+BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma")
+
+
 /* 3 argument paired floating point builtins.  */
 BU_PAIRED_3 (MSUB,            "msub",           FP, 	fmsv2sf4)
 BU_PAIRED_3 (MADD,            "madd",           FP, 	fmav2sf4)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index a4f66ba8f1b..593b772ebd1 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -315,6 +315,8 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
   if ((flags & OPTION_MASK_POPCNTD) != 0)
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
+  if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
   if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
     rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
   if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
@@ -331,6 +333,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
     }
   if ((flags & OPTION_MASK_VSX) != 0)
     rs6000_define_or_undefine_macro (define_p, "__VSX__");
+  if ((flags & OPTION_MASK_P8_VECTOR) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+  if ((flags & OPTION_MASK_CRYPTO) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
 
   /* options from the builtin masks.  */
   if ((bu_mask & RS6000_BTM_SPE) != 0)
@@ -505,6 +511,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
@@ -577,12 +585,24 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
@@ -601,6 +621,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
@@ -651,6 +675,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
@@ -937,6 +973,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
@@ -975,6 +1015,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
@@ -1021,6 +1065,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
@@ -1418,6 +1466,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
@@ -1604,6 +1664,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
@@ -1786,6 +1858,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
     RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
     RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
@@ -1812,6 +1890,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
     RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
@@ -1824,6 +1906,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
@@ -1844,6 +1928,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
@@ -1868,6 +1956,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
@@ -2032,6 +2124,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
@@ -2056,6 +2152,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
@@ -2196,6 +2296,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
@@ -3327,6 +3439,20 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
@@ -3372,11 +3498,455 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
 
+  /* Power8 vector overloaded functions.  */
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, 0, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
+  /* Crypto builtins.  */
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+
   { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
 };
 
@@ -3650,11 +4220,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       TREE_USED (decl) = 1;
       TREE_TYPE (decl) = arg1_type;
       TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
-      DECL_INITIAL (decl) = arg1;
-      stmt = build1 (DECL_EXPR, arg1_type, decl);
-      TREE_ADDRESSABLE (decl) = 1;
-      SET_EXPR_LOCATION (stmt, loc);
-      stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
 
       innerptrtype = build_pointer_type (arg1_inner_type);
 
@@ -3729,11 +4308,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       TREE_USED (decl) = 1;
       TREE_TYPE (decl) = arg1_type;
       TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
-      DECL_INITIAL (decl) = arg1;
-      stmt = build1 (DECL_EXPR, arg1_type, decl);
-      TREE_ADDRESSABLE (decl) = 1;
-      SET_EXPR_LOCATION (stmt, loc);
-      stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
 
       innerptrtype = build_pointer_type (arg1_inner_type);
 
@@ -3824,7 +4412,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
 	    || rs6000_builtin_type_compatible (types[1], desc->op2))
 	&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
-	    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+	    || rs6000_builtin_type_compatible (types[2], desc->op3))
+	&& rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
       return altivec_build_resolved_builtin (args, n, desc);
 
  bad:
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 0564018b3f0..08346b61d17 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -28,7 +28,7 @@
      ALTIVEC, since in general it isn't a win on power6.  In ISA 2.04, fsel,
      fre, fsqrt, etc. were no longer documented as optional.  Group masks by
      server and embedded. */
-#define ISA_2_5_MASKS_EMBEDDED	(ISA_2_2_MASKS				\
+#define ISA_2_5_MASKS_EMBEDDED	(ISA_2_4_MASKS				\
 				 | OPTION_MASK_CMPB			\
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_PPC_GFXOPT		\
@@ -45,6 +45,14 @@
 				 | OPTION_MASK_VSX			\
 				 | OPTION_MASK_VSX_TIMODE)
 
+/* For now, don't provide an embedded version of ISA 2.07.  */
+#define ISA_2_7_MASKS_SERVER	(ISA_2_6_MASKS_SERVER			\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
+				 | OPTION_MASK_CRYPTO			\
+				 | OPTION_MASK_DIRECT_MOVE		\
+				 | OPTION_MASK_QUAD_MEMORY)
+
 #define POWERPC_7400_MASK	(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
 
 /* Deal with ports that do not have -mstrict-align.  */
@@ -61,7 +69,9 @@
 /* Mask of all options to set the default isa flags based on -mcpu=<xxx>.  */
 #define POWERPC_MASKS		(OPTION_MASK_ALTIVEC			\
 				 | OPTION_MASK_CMPB			\
+				 | OPTION_MASK_CRYPTO			\
 				 | OPTION_MASK_DFP			\
+				 | OPTION_MASK_DIRECT_MOVE		\
 				 | OPTION_MASK_DLMZB			\
 				 | OPTION_MASK_FPRND			\
 				 | OPTION_MASK_ISEL			\
@@ -69,11 +79,14 @@
 				 | OPTION_MASK_MFPGPR			\
 				 | OPTION_MASK_MULHW			\
 				 | OPTION_MASK_NO_UPDATE		\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
 				 | OPTION_MASK_POPCNTB			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_POWERPC64		\
 				 | OPTION_MASK_PPC_GFXOPT		\
 				 | OPTION_MASK_PPC_GPOPT		\
+				 | OPTION_MASK_QUAD_MEMORY		\
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
@@ -168,10 +181,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
 	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
 	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
 	    | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE)
-RS6000_CPU ("power8", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
-	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
-	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
-	    | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE)
+RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
 RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
 RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
 RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index fc843fd19ca..d528a4fd87a 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -30,21 +30,22 @@
 /* Processor type.  Order must match cpu attribute in MD file.  */
 enum processor_type
  {
-   PROCESSOR_RS64A,
-   PROCESSOR_MPCCORE,
-   PROCESSOR_PPC403,
-   PROCESSOR_PPC405,
-   PROCESSOR_PPC440,
-   PROCESSOR_PPC476,
    PROCESSOR_PPC601,
    PROCESSOR_PPC603,
    PROCESSOR_PPC604,
    PROCESSOR_PPC604e,
    PROCESSOR_PPC620,
    PROCESSOR_PPC630,
+
    PROCESSOR_PPC750,
    PROCESSOR_PPC7400,
    PROCESSOR_PPC7450,
+
+   PROCESSOR_PPC403,
+   PROCESSOR_PPC405,
+   PROCESSOR_PPC440,
+   PROCESSOR_PPC476,
+
    PROCESSOR_PPC8540,
    PROCESSOR_PPC8548,
    PROCESSOR_PPCE300C2,
@@ -53,15 +54,21 @@ enum processor_type
    PROCESSOR_PPCE500MC64,
    PROCESSOR_PPCE5500,
    PROCESSOR_PPCE6500,
+
    PROCESSOR_POWER4,
    PROCESSOR_POWER5,
    PROCESSOR_POWER6,
    PROCESSOR_POWER7,
+   PROCESSOR_POWER8,
+
+   PROCESSOR_RS64A,
+   PROCESSOR_MPCCORE,
    PROCESSOR_CELL,
    PROCESSOR_PPCA2,
    PROCESSOR_TITAN
 };
 
+
 /* FP processor type.  */
 enum fpu_type_t
 {
@@ -131,11 +138,14 @@ enum rs6000_cmodel {
   CMODEL_LARGE
 };
 
-/* Describe which vector unit to use for a given machine mode.  */
+/* Describe which vector unit to use for a given machine mode.  The
+   VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
+   P8_VECTOR are contiguous.  */
 enum rs6000_vector {
   VECTOR_NONE,			/* Type is not  a vector or not supported */
   VECTOR_ALTIVEC,		/* Use altivec for vector processing */
   VECTOR_VSX,			/* Use VSX for vector processing */
+  VECTOR_P8_VECTOR,		/* Use ISA 2.07 VSX for vector processing */
   VECTOR_PAIRED,		/* Use paired floating point for vectors */
   VECTOR_SPE,			/* Use SPE for vector processing */
   VECTOR_OTHER			/* Some other vector unit */
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index d9bcf1a41ed..25bad1bfb68 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx);
 extern rtx find_addr_reg (rtx);
 extern rtx gen_easy_altivec_constant (rtx);
 extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
@@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
 extern int registers_ok_for_quad_peep (rtx, rtx);
 extern int mems_ok_for_quad_peep (rtx, rtx);
 extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
 extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
 							    enum reg_class);
 extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
@@ -138,6 +141,7 @@ extern int rs6000_loop_align (rtx);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
+extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align);
 extern unsigned int rs6000_special_round_type_align (tree, unsigned int,
 						     unsigned int);
 extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index e82b24e22ce..2331c5029c2 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx);
    don't link in rs6000-c.c, so we can't call it directly.  */
 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
 
+/* Simplfy register classes into simpler classifications.  We assume
+   GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+   check for standard register classes (gpr/floating/altivec/vsx) and
+   floating/vector classes (float/altivec/vsx).  */
+
+enum rs6000_reg_type {
+  NO_REG_TYPE,
+  PSEUDO_REG_TYPE,
+  GPR_REG_TYPE,
+  VSX_REG_TYPE,
+  ALTIVEC_REG_TYPE,
+  FPR_REG_TYPE,
+  SPR_REG_TYPE,
+  CR_REG_TYPE,
+  SPE_ACC_TYPE,
+  SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type.  */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+   purpose, floating point, altivec, and VSX registers).  */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+/* Direct moves to/from vsx/gpr registers that need an additional register to
+   do the move.  */
+static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES];
+static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES];
+static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES];
+
 
 /* Target cpu costs.  */
 
@@ -831,6 +864,25 @@ struct processor_costs power7_cost = {
   12,			/* prefetch streams */
 };
 
+/* Instruction costs on POWER8 processors.  */
+static const
+struct processor_costs power8_cost = {
+  COSTS_N_INSNS (3),	/* mulsi */
+  COSTS_N_INSNS (3),	/* mulsi_const */
+  COSTS_N_INSNS (3),	/* mulsi_const9 */
+  COSTS_N_INSNS (3),	/* muldi */
+  COSTS_N_INSNS (19),	/* divsi */
+  COSTS_N_INSNS (35),	/* divdi */
+  COSTS_N_INSNS (3),	/* fp */
+  COSTS_N_INSNS (3),	/* dmul */
+  COSTS_N_INSNS (14),	/* sdiv */
+  COSTS_N_INSNS (17),	/* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  12,			/* prefetch streams */
+};
+
 /* Instruction costs on POWER A2 processors.  */
 static const
 struct processor_costs ppca2_cost = {
@@ -1023,6 +1075,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *,
 static void rs6000_print_builtin_options (FILE *, int, const char *,
 					  HOST_WIDE_INT);
 
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+					  enum rs6000_reg_type,
+					  enum machine_mode,
+					  secondary_reload_info *,
+					  bool);
+
 /* Hash table stuff for keeping track of TOC entries.  */
 
 struct GTY(()) toc_hash_struct
@@ -1547,6 +1606,15 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
 {
   int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
 
+  /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
+     register combinations, and use PTImode where we need to deal with quad
+     word memory operations.  Don't allow quad words in the argument or frame
+     pointer registers, just registers 0..31.  */
+  if (mode == PTImode)
+    return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && ((regno & 1) == 0));
+
   /* VSX registers that overlap the FPR registers are larger than for non-VSX
      implementations.  Don't allow an item to be split between a FP register
      and an Altivec register.  */
@@ -1559,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
 	return ALTIVEC_REGNO_P (last_regno);
     }
 
-  /* Allow TImode in all VSX registers if the user asked for it.  Note, PTImode
-     can only go in GPRs.  */
+  /* Allow TImode in all VSX registers if the user asked for it.  */
   if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno))
     return 1;
 
@@ -1678,6 +1745,16 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
 	  comma = "";
 	}
 
+      len += fprintf (stderr, "%sreg-class = %s", comma,
+		      reg_class_names[(int)rs6000_regno_regclass[r]]);
+      comma = ", ";
+
+      if (len > 70)
+	{
+	  fprintf (stderr, ",\n\t");
+	  comma = "";
+	}
+
       fprintf (stderr, "%sregno = %d\n", comma, r);
     }
 }
@@ -1710,6 +1787,7 @@ rs6000_debug_reg_global (void)
     "none",
     "altivec",
     "vsx",
+    "p8_vector",
     "paired",
     "spe",
     "other"
@@ -1802,8 +1880,11 @@ rs6000_debug_reg_global (void)
 	   "wf reg_class = %s\n"
 	   "wg reg_class = %s\n"
 	   "wl reg_class = %s\n"
+	   "wm reg_class = %s\n"
+	   "wr reg_class = %s\n"
 	   "ws reg_class = %s\n"
 	   "wt reg_class = %s\n"
+	   "wv reg_class = %s\n"
 	   "wx reg_class = %s\n"
 	   "wz reg_class = %s\n"
 	   "\n",
@@ -1815,8 +1896,11 @@ rs6000_debug_reg_global (void)
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
 
@@ -2050,6 +2134,10 @@ rs6000_debug_reg_global (void)
   if (targetm.lra_p ())
     fprintf (stderr, DEBUG_FMT_S, "lra", "true");
 
+  if (TARGET_P8_FUSION)
+    fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
+	     (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+
   fprintf (stderr, DEBUG_FMT_S, "plt-format",
 	   TARGET_SECURE_PLT ? "secure" : "bss");
   fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -2105,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
   rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
 
+  /* Precalculate register class to simpler reload register class.  We don't
+     need all of the register classes that are combinations of different
+     classes, just the simple ones that have constraint letters.  */
+  for (c = 0; c < N_REG_CLASSES; c++)
+    reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+  reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+  reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+  reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+  if (TARGET_VSX)
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
+    }
+  else
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+    }
+
   /* Precalculate vector information, this must be set up before the
      rs6000_hard_regno_nregs_internal below.  */
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2177,12 +2295,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	}
     }
 
-  /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
-     Altivec doesn't have 64-bit support.  */
+  /* V2DImode, full mode depends on ISA 2.07 vector mode.  Allow under VSX to
+     do insert/splat/extract.  Altivec doesn't have 64-bit integer support.  */
   if (TARGET_VSX)
     {
       rs6000_vector_mem[V2DImode] = VECTOR_VSX;
-      rs6000_vector_unit[V2DImode] = VECTOR_NONE;
+      rs6000_vector_unit[V2DImode]
+	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
       rs6000_vector_align[V2DImode] = align64;
     }
 
@@ -2240,13 +2359,30 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   if (TARGET_LFIWAX)
     rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
 
+  if (TARGET_DIRECT_MOVE)
+    rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+
+  if (TARGET_POWERPC64)
+    rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
+
+  if (TARGET_P8_VECTOR)
+    rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+
   if (TARGET_STFIWX)
     rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
 
   if (TARGET_LFIWZX)
     rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
 
-  /* Set up the reload helper functions.  */
+  /* Setup the direct move combinations.  */
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    {
+      reload_fpr_gpr[m] = CODE_FOR_nothing;
+      reload_gpr_vsx[m] = CODE_FOR_nothing;
+      reload_vsx_gpr[m] = CODE_FOR_nothing;
+    }
+
+  /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
       if (TARGET_64BIT)
@@ -2270,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	      rs6000_vector_reload[DDmode][0]  = CODE_FOR_reload_dd_di_store;
 	      rs6000_vector_reload[DDmode][1]  = CODE_FOR_reload_dd_di_load;
 	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      rs6000_vector_reload[SFmode][0]  = CODE_FOR_reload_sf_di_store;
+	      rs6000_vector_reload[SFmode][1]  = CODE_FOR_reload_sf_di_load;
+	      rs6000_vector_reload[SDmode][0]  = CODE_FOR_reload_sd_di_store;
+	      rs6000_vector_reload[SDmode][1]  = CODE_FOR_reload_sd_di_load;
+	    }
 	  if (TARGET_VSX_TIMODE)
 	    {
 	      rs6000_vector_reload[TImode][0]  = CODE_FOR_reload_ti_di_store;
 	      rs6000_vector_reload[TImode][1]  = CODE_FOR_reload_ti_di_load;
 	    }
+	  if (TARGET_DIRECT_MOVE)
+	    {
+	      if (TARGET_POWERPC64)
+		{
+		  reload_gpr_vsx[TImode]    = CODE_FOR_reload_gpr_from_vsxti;
+		  reload_gpr_vsx[V2DFmode]  = CODE_FOR_reload_gpr_from_vsxv2df;
+		  reload_gpr_vsx[V2DImode]  = CODE_FOR_reload_gpr_from_vsxv2di;
+		  reload_gpr_vsx[V4SFmode]  = CODE_FOR_reload_gpr_from_vsxv4sf;
+		  reload_gpr_vsx[V4SImode]  = CODE_FOR_reload_gpr_from_vsxv4si;
+		  reload_gpr_vsx[V8HImode]  = CODE_FOR_reload_gpr_from_vsxv8hi;
+		  reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi;
+		  reload_gpr_vsx[SFmode]    = CODE_FOR_reload_gpr_from_vsxsf;
+
+		  reload_vsx_gpr[TImode]    = CODE_FOR_reload_vsx_from_gprti;
+		  reload_vsx_gpr[V2DFmode]  = CODE_FOR_reload_vsx_from_gprv2df;
+		  reload_vsx_gpr[V2DImode]  = CODE_FOR_reload_vsx_from_gprv2di;
+		  reload_vsx_gpr[V4SFmode]  = CODE_FOR_reload_vsx_from_gprv4sf;
+		  reload_vsx_gpr[V4SImode]  = CODE_FOR_reload_vsx_from_gprv4si;
+		  reload_vsx_gpr[V8HImode]  = CODE_FOR_reload_vsx_from_gprv8hi;
+		  reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi;
+		  reload_vsx_gpr[SFmode]    = CODE_FOR_reload_vsx_from_gprsf;
+		}
+	      else
+		{
+		  reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi;
+		  reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd;
+		  reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf;
+		}
+	    }
 	}
       else
 	{
@@ -2297,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	      rs6000_vector_reload[DDmode][0]  = CODE_FOR_reload_dd_si_store;
 	      rs6000_vector_reload[DDmode][1]  = CODE_FOR_reload_dd_si_load;
 	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      rs6000_vector_reload[SFmode][0]  = CODE_FOR_reload_sf_si_store;
+	      rs6000_vector_reload[SFmode][1]  = CODE_FOR_reload_sf_si_load;
+	      rs6000_vector_reload[SDmode][0]  = CODE_FOR_reload_sd_si_store;
+	      rs6000_vector_reload[SDmode][1]  = CODE_FOR_reload_sd_si_load;
+	    }
 	  if (TARGET_VSX_TIMODE)
 	    {
 	      rs6000_vector_reload[TImode][0]  = CODE_FOR_reload_ti_si_store;
@@ -2520,16 +2699,18 @@ darwin_rs6000_override_options (void)
 HOST_WIDE_INT
 rs6000_builtin_mask_calculate (void)
 {
-  return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC  : 0)
-	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	  : 0)
-	  | ((TARGET_SPE)		    ? RS6000_BTM_SPE	  : 0)
-	  | ((TARGET_PAIRED_FLOAT)	    ? RS6000_BTM_PAIRED	  : 0)
-	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	  : 0)
-	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	  : 0)
-	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE  : 0)
-	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES : 0)
-	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD  : 0)
-	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL     : 0));
+  return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC   : 0)
+	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	   : 0)
+	  | ((TARGET_SPE)		    ? RS6000_BTM_SPE	   : 0)
+	  | ((TARGET_PAIRED_FLOAT)	    ? RS6000_BTM_PAIRED	   : 0)
+	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	   : 0)
+	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	   : 0)
+	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE   : 0)
+	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES  : 0)
+	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD   : 0)
+	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
+	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
+	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0));
 }
 
 /* Override command line options.  Mostly we process the processor type and
@@ -2803,7 +2984,9 @@ rs6000_option_override_internal (bool global_init_p)
 
   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
      unless the user explicitly used the -mno-<option> to disable the code.  */
-  if (TARGET_VSX)
+  if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+    rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+  else if (TARGET_VSX)
     rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
   else if (TARGET_POPCNTD)
     rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
@@ -2818,6 +3001,34 @@ rs6000_option_override_internal (bool global_init_p)
   else if (TARGET_ALTIVEC)
     rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
 
+  if (TARGET_CRYPTO && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
+	error ("-mcrypto requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
+    }
+
+  if (TARGET_DIRECT_MOVE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+	error ("-mdirect-move requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
   if (TARGET_VSX_TIMODE && !TARGET_VSX)
     {
       if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
@@ -2825,6 +3036,16 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
     }
 
+  /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+     silently turn off quad memory mode.  */
+  if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+      rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+    }
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
     rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
 
@@ -2951,7 +3172,8 @@ rs6000_option_override_internal (bool global_init_p)
 
   /* Place FP constants in the constant pool instead of TOC
      if section anchors enabled.  */
-  if (flag_section_anchors)
+  if (flag_section_anchors
+      && !global_options_set.x_TARGET_NO_FP_IN_TOC)
     TARGET_NO_FP_IN_TOC = 1;
 
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
@@ -3019,16 +3241,19 @@ rs6000_option_override_internal (bool global_init_p)
 			&& rs6000_cpu != PROCESSOR_POWER5
 			&& rs6000_cpu != PROCESSOR_POWER6
 			&& rs6000_cpu != PROCESSOR_POWER7
+			&& rs6000_cpu != PROCESSOR_POWER8
 			&& rs6000_cpu != PROCESSOR_PPCA2
 			&& rs6000_cpu != PROCESSOR_CELL
 			&& rs6000_cpu != PROCESSOR_PPC476);
   rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
 			 || rs6000_cpu == PROCESSOR_POWER5
-			 || rs6000_cpu == PROCESSOR_POWER7);
+			 || rs6000_cpu == PROCESSOR_POWER7
+			 || rs6000_cpu == PROCESSOR_POWER8);
   rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
 				 || rs6000_cpu == PROCESSOR_POWER5
 				 || rs6000_cpu == PROCESSOR_POWER6
 				 || rs6000_cpu == PROCESSOR_POWER7
+				 || rs6000_cpu == PROCESSOR_POWER8
 				 || rs6000_cpu == PROCESSOR_PPCE500MC
 				 || rs6000_cpu == PROCESSOR_PPCE500MC64
 				 || rs6000_cpu == PROCESSOR_PPCE5500
@@ -3272,6 +3497,10 @@ rs6000_option_override_internal (bool global_init_p)
 	rs6000_cost = &power7_cost;
 	break;
 
+      case PROCESSOR_POWER8:
+	rs6000_cost = &power8_cost;
+	break;
+
       case PROCESSOR_PPCA2:
 	rs6000_cost = &ppca2_cost;
 	break;
@@ -3444,7 +3673,8 @@ rs6000_loop_align (rtx label)
       && (rs6000_cpu == PROCESSOR_POWER4
 	  || rs6000_cpu == PROCESSOR_POWER5
 	  || rs6000_cpu == PROCESSOR_POWER6
-	  || rs6000_cpu == PROCESSOR_POWER7))
+	  || rs6000_cpu == PROCESSOR_POWER7
+	  || rs6000_cpu == PROCESSOR_POWER8))
     return 5;
   else
     return align_loops_log;
@@ -3983,6 +4213,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
       switch (fn)
 	{
+	case BUILT_IN_CLZIMAX:
+	case BUILT_IN_CLZLL:
+	case BUILT_IN_CLZL:
+	case BUILT_IN_CLZ:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+	    }
+	  break;
 	case BUILT_IN_COPYSIGN:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -3998,6 +4244,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
 	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
 	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
 	  break;
+	case BUILT_IN_POPCOUNTIMAX:
+	case BUILT_IN_POPCOUNTLL:
+	case BUILT_IN_POPCOUNTL:
+	case BUILT_IN_POPCOUNT:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+	    }
+	  break;
 	case BUILT_IN_SQRT:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -4395,7 +4657,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
   bitsize = GET_MODE_BITSIZE (inner);
   mask = GET_MODE_MASK (inner);
 
-  val = const_vector_elt_as_int (op, nunits - 1);
+  val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
   splat_val = val;
   msb_val = val > 0 ? 0 : -1;
 
@@ -4435,7 +4697,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
   for (i = 0; i < nunits - 1; ++i)
     {
       HOST_WIDE_INT desired_val;
-      if (((i + 1) & (step - 1)) == 0)
+      if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0)
 	desired_val = val;
       else
 	desired_val = msb_val;
@@ -4520,13 +4782,13 @@ gen_easy_altivec_constant (rtx op)
 {
   enum machine_mode mode = GET_MODE (op);
   int nunits = GET_MODE_NUNITS (mode);
-  rtx last = CONST_VECTOR_ELT (op, nunits - 1);
+  rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
   unsigned step = nunits / 4;
   unsigned copies = 1;
 
   /* Start with a vspltisw.  */
   if (vspltis_constant (op, step, copies))
-    return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, last));
+    return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
 
   /* Then try with a vspltish.  */
   if (step == 1)
@@ -4535,7 +4797,7 @@ gen_easy_altivec_constant (rtx op)
     step >>= 1;
 
   if (vspltis_constant (op, step, copies))
-    return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, last));
+    return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
 
   /* And finally a vspltisb.  */
   if (step == 1)
@@ -4544,7 +4806,7 @@ gen_easy_altivec_constant (rtx op)
     step >>= 1;
 
   if (vspltis_constant (op, step, copies))
-    return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, last));
+    return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
 
   gcc_unreachable ();
 }
@@ -4856,8 +5118,11 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 	{
 	  rtx freg = gen_reg_rtx (V4SFmode);
 	  rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+	  rtx cvt  = ((TARGET_XSCVDPSPN)
+		      ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+		      : gen_vsx_xscvdpsp_scalar (freg, sreg));
 
-	  emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+	  emit_insn (cvt);
 	  emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
 	}
       else
@@ -5119,6 +5384,48 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
   return false;
 }
 
+/* Return alignment of TYPE.  Existing alignment is ALIGN.  HOW
+   selects whether the alignment is abi mandated, optional, or
+   both abi and optional alignment.  */
+   
+unsigned int
+rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
+{
+  if (how != align_opt)
+    {
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	{
+	  if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
+	      || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
+	    {
+	      if (align < 64)
+		align = 64;
+	    }
+	  else if (align < 128)
+	    align = 128;
+	}
+      else if (TARGET_E500_DOUBLE
+	       && TREE_CODE (type) == REAL_TYPE
+	       && TYPE_MODE (type) == DFmode)
+	{
+	  if (align < 64)
+	    align = 64;
+	}
+    }
+
+  if (how != align_abi)
+    {
+      if (TREE_CODE (type) == ARRAY_TYPE
+	  && TYPE_MODE (TREE_TYPE (type)) == QImode)
+	{
+	  if (align < BITS_PER_WORD)
+	    align = BITS_PER_WORD;
+	}
+    }
+
+  return align;
+}
+
 /* AIX increases natural record alignment to doubleword if the first
    field is an FP double while the FP fields remain word aligned.  */
 
@@ -5240,6 +5547,72 @@ gpr_or_gpr_p (rtx op0, rtx op1)
 	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
 }
 
+/* Return true if this is a move direct operation between GPR registers and
+   floating point/VSX registers.  */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+  int regno0, regno1;
+
+  if (!REG_P (op0) || !REG_P (op1))
+    return false;
+
+  if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+    return false;
+
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+  if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+    return false;
+
+  if (INT_REGNO_P (regno0))
+    return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+  else if (INT_REGNO_P (regno1))
+    {
+      if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+	return true;
+
+      else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if this is a load or store quad operation.  */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+  bool ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = false;
+
+  else if (REG_P (op0) && MEM_P (op1))
+    ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+	   && quad_memory_operand (op1, GET_MODE (op1))
+	   && !reg_overlap_mentioned_p (op0, op1));
+
+  else if (MEM_P (op0) && REG_P (op1))
+    ret = (quad_memory_operand (op0, GET_MODE (op0))
+	   && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+  else
+    ret = false;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n========== quad_load_store, return %s\n",
+	       ret ? "true" : "false");
+      debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+    }
+
+  return ret;
+}
+
 /* Given an address, return a constant offset term if one exists.  */
 
 static rtx
@@ -5375,91 +5748,102 @@ virtual_stack_registers_memory_p (rtx op)
 	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
 }
 
-/* Return true if memory accesses to OP are known to never straddle
-   a 32k boundary.  */
+/* Return true if a MODE sized memory accesses to OP plus OFFSET
+   is known to not straddle a 32k boundary.  */
 
 static bool
 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
 			     enum machine_mode mode)
 {
   tree decl, type;
-  unsigned HOST_WIDE_INT dsize, dalign;
+  unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
 
   if (GET_CODE (op) != SYMBOL_REF)
     return false;
 
+  dsize = GET_MODE_SIZE (mode);
   decl = SYMBOL_REF_DECL (op);
   if (!decl)
     {
-      if (GET_MODE_SIZE (mode) == 0)
+      if (dsize == 0)
 	return false;
 
       /* -fsection-anchors loses the original SYMBOL_REF_DECL when
 	 replacing memory addresses with an anchor plus offset.  We
 	 could find the decl by rummaging around in the block->objects
 	 VEC for the given offset but that seems like too much work.  */
-      dalign = 1;
+      dalign = BITS_PER_UNIT;
       if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
 	  && SYMBOL_REF_ANCHOR_P (op)
 	  && SYMBOL_REF_BLOCK (op) != NULL)
 	{
 	  struct object_block *block = SYMBOL_REF_BLOCK (op);
-	  HOST_WIDE_INT lsb, mask;
 
-	  /* Given the alignment of the block..  */
 	  dalign = block->alignment;
-	  mask = dalign / BITS_PER_UNIT - 1;
-
-	  /* ..and the combined offset of the anchor and any offset
-	     to this block object..  */
 	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
-	  lsb = offset & -offset;
+	}
+      else if (CONSTANT_POOL_ADDRESS_P (op))
+	{
+	  /* It would be nice to have get_pool_align()..  */
+	  enum machine_mode cmode = get_pool_mode (op);
 
-	  /* ..find how many bits of the alignment we know for the
-	     object.  */
-	  mask &= lsb - 1;
-	  dalign = mask + 1;
+	  dalign = GET_MODE_ALIGNMENT (cmode);
 	}
-      return dalign >= GET_MODE_SIZE (mode);
     }
-
-  if (DECL_P (decl))
+  else if (DECL_P (decl))
     {
-      if (TREE_CODE (decl) == FUNCTION_DECL)
-	return true;
+      dalign = DECL_ALIGN (decl);
 
-      if (!DECL_SIZE_UNIT (decl))
-	return false;
+      if (dsize == 0)
+	{
+	  /* Allow BLKmode when the entire object is known to not
+	     cross a 32k boundary.  */
+	  if (!DECL_SIZE_UNIT (decl))
+	    return false;
 
-      if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
-	return false;
+	  if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
+	    return false;
 
-      dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
-      if (dsize > 32768)
-	return false;
+	  dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+	  if (dsize > 32768)
+	    return false;
 
-      dalign = DECL_ALIGN_UNIT (decl);
-      return dalign >= dsize;
+	  return dalign / BITS_PER_UNIT >= dsize;
+	}
     }
+  else
+    {
+      type = TREE_TYPE (decl);
 
-  type = TREE_TYPE (decl);
+      dalign = TYPE_ALIGN (type);
+      if (CONSTANT_CLASS_P (decl))
+	dalign = CONSTANT_ALIGNMENT (decl, dalign);
+      else
+	dalign = DATA_ALIGNMENT (decl, dalign);
 
-  if (TREE_CODE (decl) == STRING_CST)
-    dsize = TREE_STRING_LENGTH (decl);
-  else if (TYPE_SIZE_UNIT (type)
-	   && host_integerp (TYPE_SIZE_UNIT (type), 1))
-    dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
-  else
-    return false;
-  if (dsize > 32768)
-    return false;
+      if (dsize == 0)
+	{
+	  /* BLKmode, check the entire object.  */
+	  if (TREE_CODE (decl) == STRING_CST)
+	    dsize = TREE_STRING_LENGTH (decl);
+	  else if (TYPE_SIZE_UNIT (type)
+		   && host_integerp (TYPE_SIZE_UNIT (type), 1))
+	    dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+	  else
+	    return false;
+	  if (dsize > 32768)
+	    return false;
+
+	  return dalign / BITS_PER_UNIT >= dsize;
+	}
+    }
+
+  /* Find how many bits of the alignment we know for this access.  */
+  mask = dalign / BITS_PER_UNIT - 1;
+  lsb = offset & -offset;
+  mask &= lsb - 1;
+  dalign = mask + 1;
 
-  dalign = TYPE_ALIGN (type);
-  if (CONSTANT_CLASS_P (decl))
-    dalign = CONSTANT_ALIGNMENT (decl, dalign);
-  else
-    dalign = DATA_ALIGNMENT (decl, dalign);
-  dalign /= BITS_PER_UNIT;
   return dalign >= dsize;
 }
 
@@ -5747,8 +6131,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
 	return force_reg (Pmode, XEXP (x, 0));
 
+      /* For TImode with load/store quad, restrict addresses to just a single
+	 pointer, so it works with both GPRs and VSX registers.  */
       /* Make sure both operands are registers.  */
-      else if (GET_CODE (x) == PLUS)
+      else if (GET_CODE (x) == PLUS
+	       && (mode != TImode || !TARGET_QUAD_MEMORY))
 	return gen_rtx_PLUS (Pmode,
 			     force_reg (Pmode, XEXP (x, 0)),
 			     force_reg (Pmode, XEXP (x, 1)));
@@ -6405,7 +6792,6 @@ use_toc_relative_ref (rtx sym)
 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
 					       get_pool_mode (sym)))
 	  || (TARGET_CMODEL == CMODEL_MEDIUM
-	      && !CONSTANT_POOL_ADDRESS_P (sym)
 	      && SYMBOL_REF_LOCAL_P (sym)));
 }
 
@@ -6703,6 +7089,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
   if (reg_offset_p
       && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
     return 1;
+  /* For TImode, if we have load/store quad, only allow register indirect
+     addresses.  This will allow the values to go in either GPRs or VSX
+     registers without reloading.  The vector types would tend to go into VSX
+     registers, so we allow REG+REG, while TImode seems somewhat split, in that
+     some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY)
+    return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
       && reg_offset_p
@@ -9215,20 +9608,17 @@ setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
   if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
       && cfun->va_list_gpr_size)
     {
-      int nregs = GP_ARG_NUM_REG - first_reg_offset;
+      int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
 
       if (va_list_gpr_counter_field)
-	{
-	  /* V4 va_list_gpr_size counts number of registers needed.  */
-	  if (nregs > cfun->va_list_gpr_size)
-	    nregs = cfun->va_list_gpr_size;
-	}
+	/* V4 va_list_gpr_size counts number of registers needed.  */
+	n_gpr = cfun->va_list_gpr_size;
       else
-	{
-	  /* char * va_list instead counts number of bytes needed.  */
-	  if (nregs > cfun->va_list_gpr_size / reg_size)
-	    nregs = cfun->va_list_gpr_size / reg_size;
-	}
+	/* char * va_list instead counts number of bytes needed.  */
+	n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
+
+      if (nregs > n_gpr)
+	nregs = n_gpr;
 
       mem = gen_rtx_MEM (BLKmode,
 			 plus_constant (Pmode, save_area,
@@ -10578,6 +10968,27 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  return const0_rtx;
 	}
     }
+  else if (icode == CODE_FOR_crypto_vshasigmaw
+	   || icode == CODE_FOR_crypto_vshasigmad)
+    {
+      /* Check whether the 2nd and 3rd arguments are integer constants and in
+	 range and prepare arguments.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
+	{
+	  error ("argument 2 must be 0 or 1");
+	  return const0_rtx;
+	}
+
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
+	{
+	  error ("argument 3 must be in the range 0..15");
+	  return const0_rtx;
+	}
+    }
 
   if (target == 0
       || GET_MODE (target) != tmode
@@ -12268,6 +12679,10 @@ altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V4SI_type_node,
 				V4SI_type_node, NULL_TREE);
+  tree int_ftype_int_v2di_v2di
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V2DI_type_node,
+				V2DI_type_node, NULL_TREE);
   tree void_ftype_v4si
     = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_void
@@ -12350,6 +12765,8 @@ altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V2DF_type_node,
 				V2DF_type_node, NULL_TREE);
+  tree v2di_ftype_v2di
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
   tree v4si_ftype_v4si
     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_v8hi
@@ -12485,6 +12902,9 @@ altivec_init_builtins (void)
 	case VOIDmode:
 	  type = int_ftype_int_opaque_opaque;
 	  break;
+	case V2DImode:
+	  type = int_ftype_int_v2di_v2di;
+	  break;
 	case V4SImode:
 	  type = int_ftype_int_v4si_v4si;
 	  break;
@@ -12518,6 +12938,9 @@ altivec_init_builtins (void)
 
       switch (mode0)
 	{
+	case V2DImode:
+	  type = v2di_ftype_v2di;
+	  break;
 	case V4SImode:
 	  type = v4si_ftype_v4si;
 	  break;
@@ -12723,11 +13146,27 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
      are type correct.  */
   switch (builtin)
     {
+      /* unsigned 1 argument functions.  */
+    case CRYPTO_BUILTIN_VSBOX:
+    case P8V_BUILTIN_VGBBD:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      break;
+
       /* unsigned 2 argument functions.  */
     case ALTIVEC_BUILTIN_VMULEUB_UNS:
     case ALTIVEC_BUILTIN_VMULEUH_UNS:
     case ALTIVEC_BUILTIN_VMULOUB_UNS:
     case ALTIVEC_BUILTIN_VMULOUH_UNS:
+    case CRYPTO_BUILTIN_VCIPHER:
+    case CRYPTO_BUILTIN_VCIPHERLAST:
+    case CRYPTO_BUILTIN_VNCIPHER:
+    case CRYPTO_BUILTIN_VNCIPHERLAST:
+    case CRYPTO_BUILTIN_VPMSUMB:
+    case CRYPTO_BUILTIN_VPMSUMH:
+    case CRYPTO_BUILTIN_VPMSUMW:
+    case CRYPTO_BUILTIN_VPMSUMD:
+    case CRYPTO_BUILTIN_VPMSUM:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -12750,6 +13189,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
     case VSX_BUILTIN_XXSEL_8HI_UNS:
     case VSX_BUILTIN_XXSEL_4SI_UNS:
     case VSX_BUILTIN_XXSEL_2DI_UNS:
+    case CRYPTO_BUILTIN_VPERMXOR:
+    case CRYPTO_BUILTIN_VPERMXOR_V2DI:
+    case CRYPTO_BUILTIN_VPERMXOR_V4SI:
+    case CRYPTO_BUILTIN_VPERMXOR_V8HI:
+    case CRYPTO_BUILTIN_VPERMXOR_V16QI:
+    case CRYPTO_BUILTIN_VSHASIGMAW:
+    case CRYPTO_BUILTIN_VSHASIGMAD:
+    case CRYPTO_BUILTIN_VSHASIGMA:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -12891,8 +13338,23 @@ rs6000_common_init_builtins (void)
       else
 	{
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-	    continue;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
 
 	  type = builtin_function_type (insn_data[icode].operand[0].mode,
 					insn_data[icode].operand[1].mode,
@@ -12931,8 +13393,23 @@ rs6000_common_init_builtins (void)
       else
 	{
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-	    continue;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -12993,8 +13470,23 @@ rs6000_common_init_builtins (void)
       else
         {
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-	    continue;
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
+			 d->name);
+
+	      continue;
+	    }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -13747,29 +14239,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
   return NULL_TREE;
 }
 
-enum reload_reg_type {
-  GPR_REGISTER_TYPE,
-  VECTOR_REGISTER_TYPE,
-  OTHER_REGISTER_TYPE
-};
+/* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
+   on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+   only work on the traditional altivec registers, note if an altivec register
+   was choosen.  */
 
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
 {
-  switch (rclass)
+  HOST_WIDE_INT regno;
+  enum reg_class rclass;
+
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  if (!REG_P (reg))
+    return NO_REG_TYPE;
+
+  regno = REGNO (reg);
+  if (regno >= FIRST_PSEUDO_REGISTER)
     {
-    case GENERAL_REGS:
-    case BASE_REGS:
-      return GPR_REGISTER_TYPE;
+      if (!lra_in_progress && !reload_in_progress && !reload_completed)
+	return PSEUDO_REG_TYPE;
 
-    case FLOAT_REGS:
-    case ALTIVEC_REGS:
-    case VSX_REGS:
-      return VECTOR_REGISTER_TYPE;
+      regno = true_regnum (reg);
+      if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+	return PSEUDO_REG_TYPE;
+    }	
 
-    default:
-      return OTHER_REGISTER_TYPE;
+  gcc_assert (regno >= 0);
+
+  if (is_altivec && ALTIVEC_REGNO_P (regno))
+    *is_altivec = true;
+
+  rclass = rs6000_regno_regclass[regno];
+  return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+   different register classe is really a simple move.  */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode)
+{
+  int size;
+
+  /* Add support for various direct moves available.  In this function, we only
+     look at cases where we don't need any extra registers, and one or more
+     simple move insns are issued.  At present, 32-bit integers are not allowed
+     in FPR/VSX registers.  Single precision binary floating is not a simple
+     move because we need to convert to the single precision memory layout.
+     The 4-byte SDmode can be moved.  */
+  size = GET_MODE_SIZE (mode);
+  if (TARGET_DIRECT_MOVE
+      && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+      && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+	   && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+	       || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+   special direct moves that involve allocating an extra register, return the
+   insn code of the helper function if there is such a function or
+   CODE_FOR_nothing if not.  */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode,
+				     secondary_reload_info *sri,
+				     bool altivec_p)
+{
+  bool ret = false;
+  enum insn_code icode = CODE_FOR_nothing;
+  int cost = 0;
+  int size = GET_MODE_SIZE (mode);
+
+  if (TARGET_POWERPC64)
+    {
+      if (size == 16)
+	{
+	  /* Handle moving 128-bit values from GPRs to VSX point registers on
+	     power8 when running in 64-bit mode using XXPERMDI to glue the two
+	     64-bit values back together.  */
+	  if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	      icode = reload_vsx_gpr[(int)mode];
+	    }
+
+	  /* Handle moving 128-bit values from VSX point registers to GPRs on
+	     power8 when running in 64-bit mode using XXPERMDI to get access to the
+	     bottom 64-bit value.  */
+	  else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	      icode = reload_gpr_vsx[(int)mode];
+	    }
+	}
+
+      else if (mode == SFmode)
+	{
+	  if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* xscvdpspn, mfvsrd, and.  */
+	      icode = reload_gpr_vsx[(int)mode];
+	    }
+
+	  else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 2;			/* mtvsrz, xscvspdpn.  */
+	      icode = reload_vsx_gpr[(int)mode];
+	    }
+	}
     }
+
+  if (TARGET_POWERPC64 && size == 16)
+    {
+      /* Handle moving 128-bit values from GPRs to VSX point registers on
+	 power8 when running in 64-bit mode using XXPERMDI to glue the two
+	 64-bit values back together.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	  icode = reload_vsx_gpr[(int)mode];
+	}
+
+      /* Handle moving 128-bit values from VSX point registers to GPRs on
+	 power8 when running in 64-bit mode using XXPERMDI to get access to the
+	 bottom 64-bit value.  */
+      else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	  icode = reload_gpr_vsx[(int)mode];
+	}
+    }
+
+  else if (!TARGET_POWERPC64 && size == 8)
+    {
+      /* Handle moving 64-bit values from GPRs to floating point registers on
+	 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+	 values back together.  Altivec register classes must be handled
+	 specially since a different instruction is used, and the secondary
+	 reload support requires a single instruction class in the scratch
+	 register constraint.  However, right now TFmode is not allowed in
+	 Altivec registers, so the pattern will never match.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+	{
+	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
+	  icode = reload_fpr_gpr[(int)mode];
+	}
+    }
+
+  if (icode != CODE_FOR_nothing)
+    {
+      ret = true;
+      if (sri)
+	{
+	  sri->icode = icode;
+	  sri->extra_cost = cost;
+	}
+    }
+
+  return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+   directly (simple move) or via a pattern that uses a single extra temporary
+   (using power8's direct move in this case.  */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+			      enum rs6000_reg_type from_type,
+			      enum machine_mode mode,
+			      secondary_reload_info *sri,
+			      bool altivec_p)
+{
+  /* Fall back to load/store reloads if either type is not a register.  */
+  if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+    return false;
+
+  /* If we haven't allocated registers yet, assume the move can be done for the
+     standard register types.  */
+  if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+      || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+      || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+    return true;
+
+  /* Moves to the same set of registers is a simple move for non-specialized
+     registers.  */
+  if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+    return true;
+
+  /* Check whether a simple move can be done directly.  */
+  if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+    {
+      if (sri)
+	{
+	  sri->icode = CODE_FOR_nothing;
+	  sri->extra_cost = 0;
+	}
+      return true;
+    }
+
+  /* Now check if we can do it in a few steps.  */
+  return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+					      altivec_p);
 }
 
 /* Inform reload about cases where moving X with a mode MODE to a register in
@@ -13795,11 +14484,32 @@ rs6000_secondary_reload (bool in_p,
   bool default_p = false;
 
   sri->icode = CODE_FOR_nothing;
-
-  /* Convert vector loads and stores into gprs to use an additional base
-     register.  */
   icode = rs6000_vector_reload[mode][in_p != false];
-  if (icode != CODE_FOR_nothing)
+
+  if (REG_P (x) || register_operand (x, mode))
+    {
+      enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+      bool altivec_p = (rclass == ALTIVEC_REGS);
+      enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+      if (!in_p)
+	{
+	  enum rs6000_reg_type exchange = to_type;
+	  to_type = from_type;
+	  from_type = exchange;
+	}
+
+      if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+					altivec_p))
+	{
+	  icode = (enum insn_code)sri->icode;
+	  default_p = false;
+	  ret = NO_REGS;
+	}
+    }
+
+  /* Handle vector moves with reload helper functions.  */
+  if (ret == ALL_REGS && icode != CODE_FOR_nothing)
     {
       ret = NO_REGS;
       sri->icode = CODE_FOR_nothing;
@@ -13811,12 +14521,21 @@ rs6000_secondary_reload (bool in_p,
 
 	  /* Loads to and stores from gprs can do reg+offset, and wouldn't need
 	     an extra register in that case, but it would need an extra
-	     register if the addressing is reg+reg or (reg+reg)&(-16).  */
+	     register if the addressing is reg+reg or (reg+reg)&(-16).  Special
+	     case load/store quad.  */
 	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
 	    {
-	      if (!legitimate_indirect_address_p (addr, false)
-		  && !rs6000_legitimate_offset_address_p (PTImode, addr,
-							  false, true))
+	      if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+		  && GET_MODE_SIZE (mode) == 16
+		  && quad_memory_operand (x, mode))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = 2;
+		}
+
+	      else if (!legitimate_indirect_address_p (addr, false)
+		       && !rs6000_legitimate_offset_address_p (PTImode, addr,
+							       false, true))
 		{
 		  sri->icode = icode;
 		  /* account for splitting the loads, and converting the
@@ -13830,7 +14549,7 @@ rs6000_secondary_reload (bool in_p,
          else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
                   && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
                   && (legitimate_indirect_address_p (addr, false)
-                      || legitimate_indirect_address_p (XEXP (addr, 0), false)
+                      || legitimate_indirect_address_p (addr, false)
                       || rs6000_legitimate_offset_address_p (mode, addr,
                                                              false, true)))
 
@@ -13882,12 +14601,12 @@ rs6000_secondary_reload (bool in_p,
 	  else
 	    {
 	      enum reg_class xclass = REGNO_REG_CLASS (regno);
-	      enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
-	      enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+	      enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+	      enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
 
 	      /* If memory is needed, use default_secondary_reload to create the
 		 stack slot.  */
-	      if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+	      if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
 		default_p = true;
 	      else
 		ret = NO_REGS;
@@ -13897,7 +14616,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
     {
@@ -13936,7 +14655,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (!TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
     {
@@ -14499,42 +15218,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
    set and vice versa.  */
 
 static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
-				enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+				enum reg_class to_class,
 				enum machine_mode mode)
 {
-  if (class1 == class2)
-    return false;
-
-  /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
-     ALTIVEC_REGS, and FLOAT_REGS).  We don't need to use memory to copy
-     between these classes.  But we need memory for other things that can go in
-     FLOAT_REGS like SFmode.  */
-  if (TARGET_VSX
-      && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
-      && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
-	  || class1 == FLOAT_REGS))
-    return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
-	    && class2 != FLOAT_REGS);
+  enum rs6000_reg_type from_type, to_type;
+  bool altivec_p = ((from_class == ALTIVEC_REGS)
+		    || (to_class == ALTIVEC_REGS));
 
-  if (class1 == VSX_REGS || class2 == VSX_REGS)
-    return true;
+  /* If a simple/direct move is available, we don't need secondary memory  */
+  from_type = reg_class_to_reg_type[(int)from_class];
+  to_type = reg_class_to_reg_type[(int)to_class];
 
-  if (class1 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
-
-  if (class2 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
+  if (rs6000_secondary_reload_move (to_type, from_type, mode,
+				    (secondary_reload_info *)0, altivec_p))
+    return false;
 
-  if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+  /* If we have a floating point or vector register class, we need to use
+     memory to transfer the data.  */
+  if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
     return true;
 
   return false;
@@ -14542,17 +15244,19 @@ rs6000_secondary_memory_needed (enum reg_class class1,
 
 /* Debug version of rs6000_secondary_memory_needed.  */
 static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
-				      enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+				      enum reg_class to_class,
 				      enum machine_mode mode)
 {
-  bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+  bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
 
   fprintf (stderr,
-	   "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
-	   "class2 = %s, mode = %s\n",
-	   ret ? "true" : "false", reg_class_names[class1],
-	   reg_class_names[class2], GET_MODE_NAME (mode));
+	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+	   "to_class = %s, mode = %s\n",
+	   ret ? "true" : "false",
+	   reg_class_names[from_class],
+	   reg_class_names[to_class],
+	   GET_MODE_NAME (mode));
 
   return ret;
 }
@@ -14758,6 +15462,170 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from,
   return ret;
 }
 
+/* Return a string to do a move operation of 128 bits of data.  */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  enum machine_mode mode = GET_MODE (dest);
+  int dest_regno;
+  int src_regno;
+  bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p;
+  bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p;
+
+  if (REG_P (dest))
+    {
+      dest_regno = REGNO (dest);
+      dest_gpr_p = INT_REGNO_P (dest_regno);
+      dest_fp_p = FP_REGNO_P (dest_regno);
+      dest_av_p = ALTIVEC_REGNO_P (dest_regno);
+      dest_vsx_p = dest_fp_p | dest_av_p;
+    }
+  else
+    {
+      dest_regno = -1;
+      dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false;
+    }
+
+  if (REG_P (src))
+    {
+      src_regno = REGNO (src);
+      src_gpr_p = INT_REGNO_P (src_regno);
+      src_fp_p = FP_REGNO_P (src_regno);
+      src_av_p = ALTIVEC_REGNO_P (src_regno);
+      src_vsx_p = src_fp_p | src_av_p;
+    }
+  else
+    {
+      src_regno = -1;
+      src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false;
+    }
+
+  /* Register moves.  */
+  if (dest_regno >= 0 && src_regno >= 0)
+    {
+      if (dest_gpr_p)
+	{
+	  if (src_gpr_p)
+	    return "#";
+
+	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+	    return "#";
+	}
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (src_vsx_p)
+	    return "xxlor %x0,%x1,%x1";
+
+	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_av_p && src_av_p)
+	return "vor %0,%1,%1";
+
+      else if (dest_fp_p && src_fp_p)
+	return "#";
+    }
+
+  /* Loads.  */
+  else if (dest_regno >= 0 && MEM_P (src))
+    {
+      if (dest_gpr_p)
+	{
+	  if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
+	      && quad_memory_operand (src, mode)
+	      && !reg_overlap_mentioned_p (dest, src))
+	    {
+	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
+	      return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
+	    }
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_av_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "lvx %0,%y1";
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "lxvw4x %x0,%y1";
+	  else
+	    return "lxvd2x %x0,%y1";
+	}
+
+      else if (TARGET_ALTIVEC && dest_av_p)
+	return "lvx %0,%y1";
+
+      else if (dest_fp_p)
+	return "#";
+    }
+
+  /* Stores.  */
+  else if (src_regno >= 0 && MEM_P (dest))
+    {
+      if (src_gpr_p)
+	{
+	  if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
+	      && quad_memory_operand (dest, mode))
+	    {
+	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
+	      return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
+	    }
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && src_av_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "stvx %1,%y0";
+
+      else if (TARGET_VSX && src_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "stxvw4x %x1,%y0";
+	  else
+	    return "stxvd2x %x1,%y0";
+	}
+
+      else if (TARGET_ALTIVEC && src_av_p)
+	return "stvx %1,%y0";
+
+      else if (src_fp_p)
+	return "#";
+    }
+
+  /* Constants.  */
+  else if (dest_regno >= 0
+	   && (GET_CODE (src) == CONST_INT
+	       || GET_CODE (src) == CONST_DOUBLE
+	       || GET_CODE (src) == CONST_VECTOR))
+    {
+      if (dest_gpr_p)
+	return "#";
+
+      else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+	return "xxlxor %x0,%x0,%x0";
+
+      else if (TARGET_ALTIVEC && dest_av_p)
+	return output_vec_const_move (operands);
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n===== Bad 128 bit move:\n");
+      debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+    }
+
+  gcc_unreachable ();
+}
+
+
 /* Given a comparison operation, return the bit number in CCR to test.  We
    know this is a valid comparison.
 
@@ -15474,11 +16342,6 @@ print_operand (FILE *file, rtx x, int code)
 	 TOCs and the like.  */
       gcc_assert (GET_CODE (x) == SYMBOL_REF);
 
-      /* Mark the decl as referenced so that cgraph will output the
-	 function.  */
-      if (SYMBOL_REF_DECL (x))
-	mark_decl_referenced (SYMBOL_REF_DECL (x));
-
       /* For macho, check to see if we need a stub.  */
       if (TARGET_MACHO)
 	{
@@ -15887,16 +16750,41 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
     {
       rtx cmp, or_result, compare_result2;
       enum machine_mode op_mode = GET_MODE (op0);
+      bool reverse_p;
 
       if (op_mode == VOIDmode)
 	op_mode = GET_MODE (op1);
 
+      /* First reverse the condition codes that aren't directly supported.  */
+      switch (code)
+	{
+	  case NE:
+	  case UNLT:
+	  case UNLE:
+	  case UNGT:
+	  case UNGE:
+	    code = reverse_condition_maybe_unordered (code);
+	    reverse_p = true;
+	    break;
+
+	  case EQ:
+	  case LT:
+	  case LE:
+	  case GT:
+	  case GE:
+	    reverse_p = false;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	}
+
       /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
 	 This explains the following mess.  */
 
       switch (code)
 	{
-	case EQ: case UNEQ: case NE: case LTGT:
+	case EQ:
 	  switch (op_mode)
 	    {
 	    case SFmode:
@@ -15922,7 +16810,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	    }
 	  break;
 
-	case GT: case GTU: case UNGT: case UNGE: case GE: case GEU:
+	case GT:
+	case GE:
 	  switch (op_mode)
 	    {
 	    case SFmode:
@@ -15948,7 +16837,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	    }
 	  break;
 
-	case LT: case LTU: case UNLT: case UNLE: case LE: case LEU:
+	case LT: 
+	case LE:
 	  switch (op_mode)
 	    {
 	    case SFmode:
@@ -15973,24 +16863,16 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	      gcc_unreachable ();
 	    }
 	  break;
+
         default:
           gcc_unreachable ();
 	}
 
       /* Synthesize LE and GE from LT/GT || EQ.  */
-      if (code == LE || code == GE || code == LEU || code == GEU)
+      if (code == LE || code == GE)
 	{
 	  emit_insn (cmp);
 
-	  switch (code)
-	    {
-	    case LE: code = LT; break;
-	    case GE: code = GT; break;
-	    case LEU: code = LT; break;
-	    case GEU: code = GT; break;
-	    default: gcc_unreachable ();
-	    }
-
 	  compare_result2 = gen_reg_rtx (CCFPmode);
 
 	  /* Do the EQ.  */
@@ -16017,23 +16899,18 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
 	    default:
 	      gcc_unreachable ();
 	    }
+
 	  emit_insn (cmp);
 
 	  /* OR them together.  */
 	  or_result = gen_reg_rtx (CCFPmode);
 	  cmp = gen_e500_cr_ior_compare (or_result, compare_result,
-					   compare_result2);
+					 compare_result2);
 	  compare_result = or_result;
-	  code = EQ;
-	}
-      else
-	{
-	  if (code == NE || code == LTGT)
-	    code = NE;
-	  else
-	    code = EQ;
 	}
 
+      code = reverse_p ? NE : EQ;
+
       emit_insn (cmp);
     }
   else
@@ -16913,7 +17790,8 @@ emit_unlikely_jump (rtx cond, rtx label)
 }
 
 /* A subroutine of the atomic operation splitters.  Emit a load-locked
-   instruction in MODE.  */
+   instruction in MODE.  For QI/HImode, possibly use a pattern than includes
+   the zero_extend operation.  */
 
 static void
 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -16922,12 +17800,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_load_lockedqi;
+      break;
+    case HImode:
+      fn = gen_load_lockedhi;
+      break;
     case SImode:
-      fn = gen_load_lockedsi;
+      if (GET_MODE (mem) == QImode)
+	fn = gen_load_lockedqi_si;
+      else if (GET_MODE (mem) == HImode)
+	fn = gen_load_lockedhi_si;
+      else
+	fn = gen_load_lockedsi;
       break;
     case DImode:
       fn = gen_load_lockeddi;
       break;
+    case TImode:
+      fn = gen_load_lockedti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -16944,12 +17836,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_store_conditionalqi;
+      break;
+    case HImode:
+      fn = gen_store_conditionalhi;
+      break;
     case SImode:
       fn = gen_store_conditionalsi;
       break;
     case DImode:
       fn = gen_store_conditionaldi;
       break;
+    case TImode:
+      fn = gen_store_conditionalti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -17046,8 +17947,9 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
   shift = gen_reg_rtx (SImode);
   addr = gen_lowpart (SImode, addr);
   emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
-  shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
-			       shift, 1, OPTAB_LIB_WIDEN);
+  if (WORDS_BIG_ENDIAN)
+    shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
+			         shift, 1, OPTAB_LIB_WIDEN);
   *pshift = shift;
 
   /* Mask for insertion.  */
@@ -17095,7 +17997,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 {
   rtx boolval, retval, mem, oldval, newval, cond;
   rtx label1, label2, x, mask, shift;
-  enum machine_mode mode;
+  enum machine_mode mode, orig_mode;
   enum memmodel mod_s, mod_f;
   bool is_weak;
 
@@ -17107,22 +18009,29 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   is_weak = (INTVAL (operands[5]) != 0);
   mod_s = (enum memmodel) INTVAL (operands[6]);
   mod_f = (enum memmodel) INTVAL (operands[7]);
-  mode = GET_MODE (mem);
+  orig_mode = mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
   if (mode == QImode || mode == HImode)
     {
-      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
-      /* Shift and mask OLDVAL into position with the word.  */
+      /* Before power8, we didn't have access to lbarx/lharx, so generate a
+	 lwarx and shift/mask operations.  With power8, we need to do the
+	 comparison in SImode, but the store is still done in QI/HImode.  */
       oldval = convert_modes (SImode, mode, oldval, 1);
-      oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
-				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
-      /* Shift and mask NEWVAL into position within the word.  */
-      newval = convert_modes (SImode, mode, newval, 1);
-      newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
-				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      if (!TARGET_SYNC_HI_QI)
+	{
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+	  /* Shift and mask OLDVAL into position with the word.  */
+	  oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+	  /* Shift and mask NEWVAL into position within the word.  */
+	  newval = convert_modes (SImode, mode, newval, 1);
+	  newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
+					NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	}
 
       /* Prepare to adjust the return value.  */
       retval = gen_reg_rtx (SImode);
@@ -17151,7 +18060,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
     }
 
   cond = gen_reg_rtx (CCmode);
-  x = gen_rtx_COMPARE (CCmode, x, oldval);
+  /* If we have TImode, synthesize a comparison.  */
+  if (mode != TImode)
+    x = gen_rtx_COMPARE (CCmode, x, oldval);
+  else
+    {
+      rtx xor1_result = gen_reg_rtx (DImode);
+      rtx xor2_result = gen_reg_rtx (DImode);
+      rtx or_result = gen_reg_rtx (DImode);
+      rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+      rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+      rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+      rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+      emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+      emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+      emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+      x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+    }
+
   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17161,7 +18088,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   if (mask)
     x = rs6000_mask_atomic_subword (retval, newval, mask);
 
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (orig_mode, cond, mem, x);
 
   if (!is_weak)
     {
@@ -17179,6 +18106,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 
   if (shift)
     rs6000_finish_atomic_subword (operands[1], retval, shift);
+  else if (mode != GET_MODE (operands[1]))
+    convert_move (operands[1], retval, 1);
 
   /* In all cases, CR0 contains EQ on success, and NE on failure.  */
   x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -17202,7 +18131,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
   mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
-  if (mode == QImode || mode == HImode)
+  if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
     {
       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
@@ -17251,53 +18180,70 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 {
   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
   enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode store_mode = mode;
   rtx label, x, cond, mask, shift;
   rtx before = orig_before, after = orig_after;
 
   mask = shift = NULL_RTX;
+  /* On power8, we want to use SImode for the operation.  On previous systems,
+     use the operation in a subword and shift/mask to get the proper byte or
+     halfword.  */
   if (mode == QImode || mode == HImode)
     {
-      mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
-      /* Shift and mask VAL into position with the word.  */
-      val = convert_modes (SImode, mode, val, 1);
-      val = expand_simple_binop (SImode, ASHIFT, val, shift,
-				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      if (TARGET_SYNC_HI_QI)
+	{
+	  val = convert_modes (SImode, mode, val, 1);
 
-      switch (code)
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  mode = SImode;
+	}
+      else
 	{
-	case IOR:
-	case XOR:
-	  /* We've already zero-extended VAL.  That is sufficient to
-	     make certain that it does not affect other bits.  */
-	  mask = NULL;
-	  break;
+	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
-	case AND:
-	  /* If we make certain that all of the other bits in VAL are
-	     set, that will be sufficient to not affect other bits.  */
-	  x = gen_rtx_NOT (SImode, mask);
-	  x = gen_rtx_IOR (SImode, x, val);
-	  emit_insn (gen_rtx_SET (VOIDmode, val, x));
-	  mask = NULL;
-	  break;
+	  /* Shift and mask VAL into position with the word.  */
+	  val = convert_modes (SImode, mode, val, 1);
+	  val = expand_simple_binop (SImode, ASHIFT, val, shift,
+				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
-	case NOT:
-	case PLUS:
-	case MINUS:
-	  /* These will all affect bits outside the field and need
-	     adjustment via MASK within the loop.  */
-	  break;
+	  switch (code)
+	    {
+	    case IOR:
+	    case XOR:
+	      /* We've already zero-extended VAL.  That is sufficient to
+		 make certain that it does not affect other bits.  */
+	      mask = NULL;
+	      break;
 
-	default:
-	  gcc_unreachable ();
-	}
+	    case AND:
+	      /* If we make certain that all of the other bits in VAL are
+		 set, that will be sufficient to not affect other bits.  */
+	      x = gen_rtx_NOT (SImode, mask);
+	      x = gen_rtx_IOR (SImode, x, val);
+	      emit_insn (gen_rtx_SET (VOIDmode, val, x));
+	      mask = NULL;
+	      break;
 
-      /* Prepare to adjust the return value.  */
-      before = gen_reg_rtx (SImode);
-      if (after)
-	after = gen_reg_rtx (SImode);
-      mode = SImode;
+	    case NOT:
+	    case PLUS:
+	    case MINUS:
+	      /* These will all affect bits outside the field and need
+		 adjustment via MASK within the loop.  */
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  store_mode = mode = SImode;
+	}
     }
 
   mem = rs6000_pre_atomic_barrier (mem, model);
@@ -17330,9 +18276,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
       x = rs6000_mask_atomic_subword (before, x, mask);
     }
+  else if (store_mode != mode)
+    x = convert_modes (store_mode, mode, x, 1);
 
   cond = gen_reg_rtx (CCmode);
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (store_mode, cond, mem, x);
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
   emit_unlikely_jump (x, label);
@@ -17341,11 +18289,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 
   if (shift)
     {
+      /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+	 then do the calcuations in a SImode register.  */
       if (orig_before)
 	rs6000_finish_atomic_subword (orig_before, before, shift);
       if (orig_after)
 	rs6000_finish_atomic_subword (orig_after, after, shift);
     }
+  else if (store_mode != mode)
+    {
+      /* QImode/HImode on machines with lbarx/lharx where we do the native
+	 operation and then do the calcuations in a SImode register.  */
+      if (orig_before)
+	convert_move (orig_before, before, 1);
+      if (orig_after)
+	convert_move (orig_after, after, 1);
+    }
   else if (orig_after && after != orig_after)
     emit_move_insn (orig_after, after);
 }
@@ -22360,7 +23319,10 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
 	    fputs (DOUBLE_INT_ASM_OP, file);
 	  else
 	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
-	  fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+	  if (WORDS_BIG_ENDIAN)
+	    fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+	  else
+	    fprintf (file, "0x%lx\n", l & 0xffffffff);
 	  return;
 	}
       else
@@ -22951,6 +23913,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
                  || rs6000_cpu_attr == CPU_POWER4
                  || rs6000_cpu_attr == CPU_POWER5
 		 || rs6000_cpu_attr == CPU_POWER7
+		 || rs6000_cpu_attr == CPU_POWER8
                  || rs6000_cpu_attr == CPU_CELL)
                 && recog_memoized (dep_insn)
                 && (INSN_CODE (dep_insn) >= 0))
@@ -23233,7 +24196,8 @@ is_microcoded_insn (rtx insn)
   if (rs6000_cpu_attr == CPU_CELL)
     return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
 
-  if (rs6000_sched_groups)
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
     {
       enum attr_type type = get_attr_type (insn);
       if (type == TYPE_LOAD_EXT_U
@@ -23258,7 +24222,8 @@ is_cracked_insn (rtx insn)
       || GET_CODE (PATTERN (insn)) == CLOBBER)
     return false;
 
-  if (rs6000_sched_groups)
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
     {
       enum attr_type type = get_attr_type (insn);
       if (type == TYPE_LOAD_U || type == TYPE_STORE_U
@@ -23537,6 +24502,8 @@ rs6000_issue_rate (void)
   case CPU_POWER6:
   case CPU_POWER7:
     return 5;
+  case CPU_POWER8:
+    return 7;
   default:
     return 1;
   }
@@ -24164,6 +25131,39 @@ insn_must_be_first_in_group (rtx insn)
           break;
         }
       break;
+    case PROCESSOR_POWER8:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_CR_LOGICAL:
+        case TYPE_DELAYED_CR:
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_COMPARE:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_VAR_DELAYED_COMPARE:
+        case TYPE_IMUL_COMPARE:
+        case TYPE_LMUL_COMPARE:
+        case TYPE_SYNC:
+        case TYPE_ISYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT:
+        case TYPE_LOAD_EXT_U:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_UX:
+        case TYPE_VECSTORE:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+          return true;
+        default:
+          break;
+        }
+      break;
     default:
       break;
     }
@@ -24242,6 +25242,25 @@ insn_must_be_last_in_group (rtx insn)
         break;
     }
     break;
+  case PROCESSOR_POWER8:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_MFCR:
+      case TYPE_MTCR:
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+      case TYPE_LOAD_EXT_U:
+      case TYPE_LOAD_EXT_UX:
+      case TYPE_STORE_UX:
+        return true;
+      default:
+        break;
+    }
+    break;
   default:
     break;
   }
@@ -24331,8 +25350,9 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
       if (can_issue_more && !is_branch_slot_insn (next_insn))
 	can_issue_more--;
 
-      /* Power6 and Power7 have special group ending nop. */
-      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
+      /* Do we have a special group ending nop? */
+      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
+	  || rs6000_cpu_attr == CPU_POWER8)
 	{
 	  nop = gen_group_ending_nop ();
 	  emit_insn_before (nop, next_insn);
@@ -26513,7 +27533,8 @@ rs6000_register_move_cost (enum machine_mode mode,
       /* For those processors that have slow LR/CTR moves, make them more
          expensive than memory in order to bias spills to memory .*/
       else if ((rs6000_cpu == PROCESSOR_POWER6
-		|| rs6000_cpu == PROCESSOR_POWER7)
+		|| rs6000_cpu == PROCESSOR_POWER7
+		|| rs6000_cpu == PROCESSOR_POWER8)
 	       && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
         ret = 6 * hard_regno_nregs[0][mode];
 
@@ -26990,26 +28011,31 @@ bool
 altivec_expand_vec_perm_const (rtx operands[4])
 {
   struct altivec_perm_insn {
+    HOST_WIDE_INT mask;
     enum insn_code impl;
     unsigned char perm[16];
   };
   static const struct altivec_perm_insn patterns[] = {
-    { CODE_FOR_altivec_vpkuhum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
       {  1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
-    { CODE_FOR_altivec_vpkuwum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
       {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
-    { CODE_FOR_altivec_vmrghb,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb,
       {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
-    { CODE_FOR_altivec_vmrghh,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh,
       {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
-    { CODE_FOR_altivec_vmrghw,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw,
       {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
-    { CODE_FOR_altivec_vmrglb,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb,
       {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
-    { CODE_FOR_altivec_vmrglh,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh,
       {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
-    { CODE_FOR_altivec_vmrglw,
-      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw,
+      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+      {  0,  1,  2,  3, 16, 17, 18, 19,  8,  9, 10, 11, 24, 25, 26, 27 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+      {  4,  5,  6,  7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
   };
 
   unsigned int i, j, elt, which;
@@ -27109,6 +28135,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
     {
       bool swapped;
 
+      if ((patterns[j].mask & rs6000_isa_flags) == 0)
+	continue;
+
       elt = patterns[j].perm[0];
       if (perm[0] == elt)
 	swapped = false;
@@ -27742,6 +28771,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
 {
   { "altivec",			OPTION_MASK_ALTIVEC,		false, true  },
   { "cmpb",			OPTION_MASK_CMPB,		false, true  },
+  { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
+  { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
   { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
   { "fprnd",			OPTION_MASK_FPRND,		false, true  },
   { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
@@ -27750,13 +28781,17 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "mfpgpr",			OPTION_MASK_MFPGPR,		false, true  },
   { "mulhw",			OPTION_MASK_MULHW,		false, true  },
   { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
-  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
   { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
+  { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
+  { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
+  { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
+  { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
   { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
   { "string",			OPTION_MASK_STRING,		false, true  },
+  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
   { "vsx-timode",		OPTION_MASK_VSX_TIMODE,		false, true  },
 #ifdef OPTION_MASK_64BIT
@@ -27798,6 +28833,8 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
   { "frsqrtes",		 RS6000_BTM_FRSQRTES,	false, false },
   { "popcntd",		 RS6000_BTM_POPCNTD,	false, false },
   { "cell",		 RS6000_BTM_CELL,	false, false },
+  { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
+  { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
 };
 
 /* Option variables that we want to support inside attribute((target)) and
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6549347b9b7..633d7891157 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -92,7 +92,7 @@
 #ifdef HAVE_AS_POWER8
 #define ASM_CPU_POWER8_SPEC "-mpower8"
 #else
-#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec"
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
 #endif
 
 #ifdef HAVE_AS_DCI
@@ -164,6 +164,7 @@
 %{mcpu=e6500: -me6500} \
 %{maltivec: -maltivec} \
 %{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
+%{mpower8-vector|mcrypto|mdirect-move: %{!mcpu*: %(asm_cpu_power8)}} \
 -many"
 
 #define CPP_DEFAULT_SPEC ""
@@ -277,6 +278,19 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define TARGET_POPCNTD 0
 #endif
 
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
+   waitasecond instruction.  Allow -mpower8-fusion, since it does not add new
+   instructions.  */
+
+#ifndef HAVE_AS_POWER8
+#undef  TARGET_DIRECT_MOVE
+#undef  TARGET_CRYPTO
+#undef  TARGET_P8_VECTOR
+#define TARGET_DIRECT_MOVE 0
+#define TARGET_CRYPTO 0
+#define TARGET_P8_VECTOR 0
+#endif
+
 /* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync.  If
    not, generate the lwsync code as an integer constant.  */
 #ifdef HAVE_AS_LWSYNC
@@ -386,6 +400,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define TARGET_DEBUG_TARGET	(rs6000_debug & MASK_DEBUG_TARGET)
 #define TARGET_DEBUG_BUILTIN	(rs6000_debug & MASK_DEBUG_BUILTIN)
 
+/* Describe the vector unit used for arithmetic operations.  */
 extern enum rs6000_vector rs6000_vector_unit[];
 
 #define VECTOR_UNIT_NONE_P(MODE)			\
@@ -394,12 +409,25 @@ extern enum rs6000_vector rs6000_vector_unit[];
 #define VECTOR_UNIT_VSX_P(MODE)				\
   (rs6000_vector_unit[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_UNIT_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
+
 #define VECTOR_UNIT_ALTIVEC_P(MODE)			\
   (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
+   altivec (VMX) or VSX vector instructions.  P8 vector support is upwards
+   compatible, so allow it as well, rather than changing all of the uses of the
+   macro.  */
 #define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE)		\
-  (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC 	\
-   || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
 
 /* Describe whether to use VSX loads or Altivec loads.  For now, just use the
    same unit as the vector unit we are using, but we may want to migrate to
@@ -412,12 +440,21 @@ extern enum rs6000_vector rs6000_vector_mem[];
 #define VECTOR_MEM_VSX_P(MODE)				\
   (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_MEM_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
 #define VECTOR_MEM_ALTIVEC_P(MODE)			\
   (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
 #define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE)		\
-  (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC 	\
-   || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
 
 /* Return the alignment of a given vector type, which is set based on the
    vector unit use.  VSX for instance can load 32 or 64 bit aligned words
@@ -479,6 +516,15 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIDUZ	TARGET_POPCNTD
 #define TARGET_FCTIWUZ	TARGET_POPCNTD
 
+#define TARGET_XSCVDPSPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_XSCVSPDPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
+   in power7, so conditionalize them on p8 features.  TImode syncs need quad
+   memory support.  */
+#define TARGET_SYNC_HI_QI	(TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
+#define TARGET_SYNC_TI		TARGET_QUAD_MEMORY
+
 /* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
    to allocate the SDmode stack slot to get the value into the proper location
    in the register.  */
@@ -489,10 +535,13 @@ extern int rs6000_vector_align[];
    OPTION_MASK_<xxx> back into MASK_<xxx>.  */
 #define MASK_ALTIVEC			OPTION_MASK_ALTIVEC
 #define MASK_CMPB			OPTION_MASK_CMPB
+#define MASK_CRYPTO			OPTION_MASK_CRYPTO
 #define MASK_DFP			OPTION_MASK_DFP
+#define MASK_DIRECT_MOVE		OPTION_MASK_DIRECT_MOVE
 #define MASK_DLMZB			OPTION_MASK_DLMZB
 #define MASK_EABI			OPTION_MASK_EABI
 #define MASK_FPRND			OPTION_MASK_FPRND
+#define MASK_P8_FUSION			OPTION_MASK_P8_FUSION
 #define MASK_HARD_FLOAT			OPTION_MASK_HARD_FLOAT
 #define MASK_ISEL			OPTION_MASK_ISEL
 #define MASK_MFCRF			OPTION_MASK_MFCRF
@@ -500,6 +549,7 @@ extern int rs6000_vector_align[];
 #define MASK_MULHW			OPTION_MASK_MULHW
 #define MASK_MULTIPLE			OPTION_MASK_MULTIPLE
 #define MASK_NO_UPDATE			OPTION_MASK_NO_UPDATE
+#define MASK_P8_VECTOR			OPTION_MASK_P8_VECTOR
 #define MASK_POPCNTB			OPTION_MASK_POPCNTB
 #define MASK_POPCNTD			OPTION_MASK_POPCNTD
 #define MASK_PPC_GFXOPT			OPTION_MASK_PPC_GFXOPT
@@ -665,6 +715,11 @@ extern unsigned char rs6000_recip_bits[];
    instructions for them.  Might as well be consistent with bits and bytes.  */
 #define WORDS_BIG_ENDIAN 1
 
+/* This says that for the IBM long double the larger magnitude double
+   comes first.  It's really a two element double array, and arrays
+   don't index differently between little- and big-endian.  */
+#define LONG_DOUBLE_LARGE_FIRST 1
+
 #define MAX_BITS_PER_WORD 64
 
 /* Width of a word, in units (bytes).  */
@@ -758,12 +813,6 @@ extern unsigned rs6000_pointer_size;
 /* No data type wants to be aligned rounder than this.  */
 #define BIGGEST_ALIGNMENT 128
 
-/* A C expression to compute the alignment for a variables in the
-   local store.  TYPE is the data type, and ALIGN is the alignment
-   that the object would ordinarily have.  */
-#define LOCAL_ALIGNMENT(TYPE, ALIGN)				\
-  DATA_ALIGNMENT (TYPE, ALIGN)
-
 /* Alignment of field after `int : 0' in a structure.  */
 #define EMPTY_FIELD_BOUNDARY 32
 
@@ -773,8 +822,15 @@ extern unsigned rs6000_pointer_size;
 /* A bit-field declared as `int' forces `int' alignment for the struct.  */
 #define PCC_BITFIELD_TYPE_MATTERS 1
 
-/* Make strings word-aligned so strcpy from constants will be faster.
-   Make vector constants quadword aligned.  */
+enum data_align { align_abi, align_opt, align_both };
+
+/* A C expression to compute the alignment for a variables in the
+   local store.  TYPE is the data type, and ALIGN is the alignment
+   that the object would ordinarily have.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)				\
+  rs6000_data_alignment (TYPE, ALIGN, align_both)
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
 #define CONSTANT_ALIGNMENT(EXP, ALIGN)                           \
   (TREE_CODE (EXP) == STRING_CST	                         \
    && (STRICT_ALIGNMENT || !optimize_size)                       \
@@ -782,21 +838,14 @@ extern unsigned rs6000_pointer_size;
    ? BITS_PER_WORD                                               \
    : (ALIGN))
 
-/* Make arrays of chars word-aligned for the same reasons.
-   Align vectors to 128 bits.  Align SPE vectors and E500 v2 doubles to
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+  rs6000_data_alignment (TYPE, ALIGN, align_opt)
+
+/* Align vectors to 128 bits.  Align SPE vectors and E500 v2 doubles to
    64 bits.  */
-#define DATA_ALIGNMENT(TYPE, ALIGN)					\
-  (TREE_CODE (TYPE) == VECTOR_TYPE					\
-   ? (((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (TYPE)))		\
-       || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (TYPE)))) \
-      ? 64 : 128)							\
-   : ((TARGET_E500_DOUBLE						\
-       && TREE_CODE (TYPE) == REAL_TYPE					\
-       && TYPE_MODE (TYPE) == DFmode)					\
-      ? 64								\
-      : (TREE_CODE (TYPE) == ARRAY_TYPE					\
-	 && TYPE_MODE (TREE_TYPE (TYPE)) == QImode			\
-	 && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN)))
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+  rs6000_data_alignment (TYPE, ALIGN, align_abi)
 
 /* Nonzero if move instructions will actually fail to work
    when given unaligned data.  */
@@ -1002,7 +1051,9 @@ extern unsigned rs6000_pointer_size;
 
 #define REG_ALLOC_ORDER						\
   {32,								\
-   45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,		\
+   /* move fr13 (ie 45) later, so if we need TFmode, it does */	\
+   /* not use fr14 which is a saved register.  */		\
+   44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45,		\
    33,								\
    63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,		\
    50, 49, 48, 47, 46,						\
@@ -1062,8 +1113,14 @@ extern unsigned rs6000_pointer_size;
 #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
 
 /* Alternate name for any vector register supporting logical operations, no
-   matter which instruction set(s) are available.  */
-#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
+   matter which instruction set(s) are available.  For 64-bit mode, we also
+   allow logical operations in the GPRS.  This is to allow atomic quad word
+   builtins not to need the VSX registers for lqarx/stqcx.  It also helps with
+   __int128_t arguments that are passed in GPRs.  */
+#define VLOGICAL_REGNO_P(N)						\
+  (ALTIVEC_REGNO_P (N)							\
+   || (TARGET_VSX && FP_REGNO_P (N))					\
+   || (TARGET_VSX && TARGET_POWERPC64 && INT_REGNO_P (N)))
 
 /* Return number of consecutive hard regs needed starting at reg REGNO
    to hold something of mode MODE.  */
@@ -1124,7 +1181,7 @@ extern unsigned rs6000_pointer_size;
    when one has mode MODE1 and one has mode MODE2.
    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
    for any hard reg, then this must be 0 for correct output.  */
-#define MODES_TIEABLE_P(MODE1, MODE2) \
+#define MODES_TIEABLE_P(MODE1, MODE2)		\
   (SCALAR_FLOAT_MODE_P (MODE1)			\
    ? SCALAR_FLOAT_MODE_P (MODE2)		\
    : SCALAR_FLOAT_MODE_P (MODE2)		\
@@ -1137,14 +1194,14 @@ extern unsigned rs6000_pointer_size;
    ? SPE_VECTOR_MODE (MODE2)			\
    : SPE_VECTOR_MODE (MODE2)			\
    ? SPE_VECTOR_MODE (MODE1)			\
-   : ALTIVEC_VECTOR_MODE (MODE1)		\
-   ? ALTIVEC_VECTOR_MODE (MODE2)		\
-   : ALTIVEC_VECTOR_MODE (MODE2)		\
-   ? ALTIVEC_VECTOR_MODE (MODE1)		\
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
    ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
    ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
+   : ALTIVEC_VECTOR_MODE (MODE1)		\
+   ? ALTIVEC_VECTOR_MODE (MODE2)		\
+   : ALTIVEC_VECTOR_MODE (MODE2)		\
+   ? ALTIVEC_VECTOR_MODE (MODE1)		\
    : 1)
 
 /* Post-reload, we can't use any new AltiVec registers, as we already
@@ -1337,8 +1394,11 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wg,		/* FPR register for -mmfpgpr */
   RS6000_CONSTRAINT_wf,		/* VSX register for V4SF */
   RS6000_CONSTRAINT_wl,		/* FPR register for LFIWAX */
+  RS6000_CONSTRAINT_wm,		/* VSX register for direct move */
+  RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_ws,		/* VSX register for DF */
   RS6000_CONSTRAINT_wt,		/* VSX register for TImode */
+  RS6000_CONSTRAINT_wv,		/* Altivec register for power8 vector */
   RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
   RS6000_CONSTRAINT_wz,		/* FPR register for LFIWZX */
   RS6000_CONSTRAINT_MAX
@@ -2297,6 +2357,13 @@ extern char rs6000_reg_names[][8];	/* register names (0 vs. %r0).  */
 /* How to align the given loop. */
 #define LOOP_ALIGN(LABEL)  rs6000_loop_align(LABEL)
 
+/* Alignment guaranteed by __builtin_malloc.  */
+/* FIXME:  128-bit alignment is guaranteed by glibc for TARGET_64BIT.
+   However, specifying the stronger guarantee currently leads to
+   a regression in SPEC CPU2006 437.leslie3d.  The stronger
+   guarantee should be implemented here once that's fixed.  */
+#define MALLOC_ABI_ALIGNMENT (64)
+
 /* Pick up the return address upon entry to a procedure. Used for
    dwarf2 unwind information.  This also enables the table driven
    mechanism.  */
@@ -2365,6 +2432,8 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_ALWAYS	0		/* Always enabled.  */
 #define RS6000_BTM_ALTIVEC	MASK_ALTIVEC	/* VMX/altivec vectors.  */
 #define RS6000_BTM_VSX		MASK_VSX	/* VSX (vector/scalar).  */
+#define RS6000_BTM_P8_VECTOR	MASK_P8_VECTOR	/* ISA 2.07 vector.  */
+#define RS6000_BTM_CRYPTO	MASK_CRYPTO	/* crypto funcs.  */
 #define RS6000_BTM_SPE		MASK_STRING	/* E500 */
 #define RS6000_BTM_PAIRED	MASK_MULHW	/* 750CL paired insns.  */
 #define RS6000_BTM_FRE		MASK_POPCNTB	/* FRE instruction.  */
@@ -2376,6 +2445,8 @@ extern int frame_pointer_needed;
 
 #define RS6000_BTM_COMMON	(RS6000_BTM_ALTIVEC			\
 				 | RS6000_BTM_VSX			\
+				 | RS6000_BTM_P8_VECTOR			\
+				 | RS6000_BTM_CRYPTO			\
 				 | RS6000_BTM_FRE			\
 				 | RS6000_BTM_FRES			\
 				 | RS6000_BTM_FRSQRTE			\
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 1e65ac1cde0..010e21f7413 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -127,6 +127,13 @@
    UNSPEC_LFIWZX
    UNSPEC_FCTIWUZ
    UNSPEC_GRP_END_NOP
+   UNSPEC_P8V_FMRGOW
+   UNSPEC_P8V_MTVSRWZ
+   UNSPEC_P8V_RELOAD_FROM_GPR
+   UNSPEC_P8V_MTVSRD
+   UNSPEC_P8V_XXPERMDI
+   UNSPEC_P8V_RELOAD_FROM_VSX
+   UNSPEC_FUSION_GPR
   ])
 
 ;;
@@ -146,7 +153,7 @@
 
 ;; Define an insn type attribute.  This is used in function unit delay
 ;; computations.
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt"
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto"
   (const_string "integer"))
 
 ;; Define floating point instruction sub-types for use with Xfpu.md
@@ -166,9 +173,14 @@
 		(const_int 4)))
 
 ;; Processor type -- this attribute must exactly match the processor_type
-;; enumeration in rs6000.h.
-
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan"
+;; enumeration in rs6000-opts.h.
+(define_attr "cpu"
+  "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,
+   ppc750,ppc7400,ppc7450,
+   ppc403,ppc405,ppc440,ppc476,
+   ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,
+   power4,power5,power6,power7,power8,
+   rs64a,mpccore,cell,ppca2,titan"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
@@ -201,6 +213,7 @@
 (include "power5.md")
 (include "power6.md")
 (include "power7.md")
+(include "power8.md")
 (include "cell.md")
 (include "xfpu.md")
 (include "a2.md")
@@ -227,6 +240,12 @@
 ; extend modes for DImode
 (define_mode_iterator QHSI [QI HI SI])
 
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
 ; SImode or DImode, even if DImode doesn't fit in GPRs.
 (define_mode_iterator SDI [SI DI])
 
@@ -268,6 +287,15 @@
 (define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
 				(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
 
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI    "TARGET_VSX_TIMODE")
+				    (V16QI "")
+				    (V8HI  "")
+				    (V4SI  "")
+				    (V4SF  "")
+				    (V2DI  "")
+				    (V2DF  "")])
+
 ; Whether a floating point move is ok, don't allow SD without hardware FP
 (define_mode_attr fmove_ok [(SF "")
 			    (DF "")
@@ -284,11 +312,16 @@
 (define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
 (define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
 (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %0,%y1")	 (SD "lxsiwzx %0,%y1")])
 
 ; Definitions for store from 32-bit fpr register
 (define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
 (define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
 (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %1,%y0")	  (SD "stxsiwzx %1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
 
 ; These modes do not fit in integer registers in 32-bit mode.
 ; but on e500v2, the gpr are 64 bit registers
@@ -368,7 +401,7 @@
 (define_insn "*zero_extend<mode>di2_internal1"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
   "@
    l<wd>z%U1%X1 %0,%1
    rldicl %0,%1,0,<dbits>"
@@ -434,6 +467,29 @@
 		    (const_int 0)))]
   "")
 
+(define_insn "*zero_extendsidi2_lfiwzx"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm")
+	(zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWZX"
+  "@
+   lwz%U1%X1 %0,%1
+   rldicl %0,%1,0,32
+   mtvsrwz %x0,%1
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
 (define_insn "extendqidi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -581,10 +637,33 @@
   "TARGET_POWERPC64"
   "")
 
-(define_insn ""
+(define_insn "*extendsidi2_lfiwax"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm")
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWAX"
+  "@
+   lwa%U1%X1 %0,%1
+   extsw %0,%1
+   mtvsrwa %x0,%1
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
-  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
   "@
    lwa%U1%X1 %0,%1
    extsw %0,%1"
@@ -598,7 +677,7 @@
 	   (const_string "load_ext")))
        (const_string "exts")])])
 
-(define_insn ""
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
   "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
@@ -2035,7 +2114,9 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
   "TARGET_CMPB && TARGET_POPCNTB"
-  "prty<wd> %0,%1")
+  "prty<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "popcnt")])
 
 (define_expand "parity<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "")
@@ -4316,7 +4397,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -4348,7 +4429,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -5104,6 +5185,41 @@
   "frsqrtes %0,%1"
   [(set_attr "type" "fp")])
 
+;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
+;; builtins.c and optabs.c that are not correct for IBM long double
+;; when little-endian.
+(define_expand "signbittf2"
+  [(set (match_dup 2)
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+   (set (match_dup 3)
+   	(subreg:DI (match_dup 2) 0))
+   (set (match_dup 4)
+   	(match_dup 5))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+  	(match_dup 6))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+  operands[3] = gen_reg_rtx (DImode);
+  if (TARGET_POWERPC64)
+    {
+      operands[4] = gen_reg_rtx (DImode);
+      operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63));
+      operands[6] = gen_rtx_SUBREG (SImode, operands[4],
+				    WORDS_BIG_ENDIAN ? 4 : 0);
+    }
+  else
+    {
+      operands[4] = gen_reg_rtx (SImode);
+      operands[5] = gen_rtx_SUBREG (SImode, operands[3],
+				    WORDS_BIG_ENDIAN ? 0 : 4);
+      operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31));
+    }
+})
+
 (define_expand "copysign<mode>3"
   [(set (match_dup 3)
         (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
@@ -5553,12 +5669,15 @@
 ; We don't define lfiwax/lfiwzx with the normal definition, because we
 ; don't want to support putting SImode in FPR registers.
 (define_insn "lfiwax"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWAX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
-  "lfiwax %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1
+   mtvsrwa %x0,%1"
+  [(set_attr "type" "fpload,fpload,mffgpr")])
 
 ; This split must be run before register allocation because it allocates the
 ; memory slot that is needed to move values to/from the FPR.  We don't allocate
@@ -5580,7 +5699,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, false);
   else
     {
@@ -5629,12 +5749,15 @@
    (set_attr "type" "fpload")])
 
 (define_insn "lfiwzx"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWZX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
-  "lfiwzx %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1
+   mtvsrwz %x0,%1"
+  [(set_attr "type" "fpload,fpload,mftgpr")])
 
 (define_insn_and_split "floatunssi<mode>2_lfiwzx"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
@@ -5651,7 +5774,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, true);
   else
     {
@@ -5942,7 +6066,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -6036,7 +6160,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -8285,6 +8409,18 @@
 	(compare:CC (match_dup 0)
 		    (const_int 0)))]
   "")
+
+;; Eqv operation.
+(define_insn "*eqv<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(not:GPR
+	 (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		  (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+  ""
+  "eqv %0,%1,%2"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
 
 ;; Now define ways of moving data around.
 
@@ -8490,7 +8626,7 @@
    cmp<wd>i %2,%0,0
    mr. %0,%1
    #"
-  [(set_attr "type" "cmp,compare,cmp")
+  [(set_attr "type" "cmp,fast_compare,cmp")
    (set_attr "length" "4,4,8")])
 
 (define_split
@@ -8680,8 +8816,8 @@
 }")
 
 (define_insn "mov<mode>_hardfloat"
-  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r")
-	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))]
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -8694,6 +8830,10 @@
    xxlxor %x0,%x0,%x0
    <f32_li>
    <f32_si>
+   <f32_lv>
+   <f32_sv>
+   mtvsrwz %x0,%1
+   mfvsrwz %0,%x1
    mt%0 %1
    mf%1 %0
    nop
@@ -8732,16 +8872,20 @@
 	   (match_test "update_address_mem (operands[0], VOIDmode)")
 	   (const_string "fpstore_u")
 	   (const_string "fpstore")))
+       (const_string "fpload")
+       (const_string "fpstore")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
        (const_string "mtjmpr")
        (const_string "mfjmpr")
        (const_string "*")
        (const_string "*")
        (const_string "*")])
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
 
 (define_insn "*mov<mode>_softfloat"
   [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
-	(match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))]
+	(match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
@@ -8954,8 +9098,8 @@
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*mov<mode>_hardfloat64"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8980,7 +9124,9 @@
    #
    #
    mftgpr %0,%1
-   mffgpr %0,%1"
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
   [(set_attr_alternative "type"
       [(if_then_else
 	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9038,8 +9184,10 @@
        (const_string "*")
        (const_string "*")
        (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
        (const_string "mffgpr")])
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
 
 (define_insn "*mov<mode>_softfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
@@ -9154,8 +9302,8 @@
   "&& reload_completed"
   [(pc)]
 {
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
   emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word),
 		  operands[1]);
   emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word),
@@ -9384,8 +9532,8 @@
    && TARGET_LONG_DOUBLE_128"
   "
 {
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
   operands[3] = gen_reg_rtx (DFmode);
   operands[4] = gen_reg_rtx (CCFPmode);
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -9419,6 +9567,216 @@
 })
 
 
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode.  We use TF mode to get two registers to move the
+;; individual 32-bit parts across.  Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it.  We have two patterns to do the two moves between gprs and
+;; fprs.  There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions.  TFmode is
+;; currently limited to traditional FPR registers.  If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+	(unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+			 UNSPEC_P8V_FMRGOW))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "fmrgow %0,%1,%L1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+  [(set (match_operand:TF 0 "register_operand" "=d")
+	(unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %x0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+  [(set (match_operand:TF 0 "register_operand" "+d")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+	(unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+			 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=d"))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (SImode, src);
+  rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+  emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+  [(set (match_operand:TF 0 "register_operand" "=ws")
+	(unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+  [(set (match_operand:TF 0 "register_operand" "+ws")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+			     UNSPEC_P8V_XXPERMDI))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "xxpermdi %x0,%1,%L1,0"
+  [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+	 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DImode, src);
+  rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+  emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a VSX from a GPR register.  Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+  [(set (match_operand:SF 0 "register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+		   UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:DI 2 "register_operand" "=r"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+  rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_move_insn (op0_di, op2);
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+	 UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+  rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+  emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register.  Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+  emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+  emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+  emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+
 ;; Next come the multi-word integer load and store and the load and store
 ;; multiple insns.
 
@@ -9467,7 +9825,8 @@
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(match_operand:DI 1 "const_int_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 1))]
   "
@@ -9485,13 +9844,14 @@
   [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
         (match_operand:DIFD 1 "input_operand" ""))]
   "reload_completed && !TARGET_POWERPC64
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
 (define_insn "*movdi_internal64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
   "TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
@@ -9513,7 +9873,9 @@
    nop
    xxlxor %x0,%x0,%x0
    mftgpr %0,%1
-   mffgpr %0,%1"
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
   [(set_attr_alternative "type"
       [(if_then_else
 	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9562,8 +9924,10 @@
        (const_string "*")
        (const_string "vecsimple")
        (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
        (const_string "mffgpr")])
-   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")])
 
 ;; Generate all one-bits and clear left or right.
 ;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
@@ -9652,19 +10016,23 @@
 					  (const_string "conditional")))])
 
 (define_insn "*mov<mode>_ppc64"
-  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r")
-	(match_operand:TI2 1 "input_operand" "r,Y,r"))]
-  "(TARGET_POWERPC64
-   && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+  "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode)))"
-  "#"
-  [(set_attr "type" "store,load,*")])
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "store,load,*,*")
+   (set_attr "length" "8")])
 
 (define_split
-  [(set (match_operand:TI2 0 "gpc_reg_operand" "")
+  [(set (match_operand:TI2 0 "int_reg_operand" "")
 	(match_operand:TI2 1 "const_double_operand" ""))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64
+   && (VECTOR_MEM_NONE_P (<MODE>mode)
+       || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
   "
@@ -9691,7 +10059,9 @@
   [(set (match_operand:TI2 0 "nonimmediate_operand" "")
         (match_operand:TI2 1 "input_operand" ""))]
   "reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
@@ -12554,8 +12924,8 @@
    (match_dup 13)]
 {
   REAL_VALUE_TYPE rv;
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
 
   operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word);
   operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word);
@@ -14788,7 +15158,7 @@
 		   (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
   "TARGET_POPCNTD"
   "bpermd %0,%1,%2"
-  [(set_attr "type" "integer")])
+  [(set_attr "type" "popcnt")])
 
 
 ;; Builtin fma support.  Handle 
@@ -14931,3 +15301,4 @@
 (include "spe.md")
 (include "dfp.md")
 (include "paired.md")
+(include "crypto.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 17b77629fa1..9a078198130 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -517,4 +517,28 @@ Control whether we save the TOC in the prologue for indirect calls or generate t
 
 mvsx-timode
 Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
-; Allow/disallow TImode in VSX registers
+Allow 128-bit integers in VSX registers
+
+mpower8-fusion
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power8
+
+mpower8-fusion-sign
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
+Allow sign extension in fusion operations
+
+mpower8-vector
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
+Use/do not use vector and scalar instructions added in ISA 2.07.
+
+mcrypto
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
+Use ISA 2.07 crypto instructions
+
+mdirect-move
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
+Use ISA 2.07 direct move between GPR & VSX register instructions
+
+mquad-memory
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
+Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md
index cec2b430b82..bf10a5dc180 100644
--- a/gcc/config/rs6000/spe.md
+++ b/gcc/config/rs6000/spe.md
@@ -2604,8 +2604,8 @@
    && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
   "
 {
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
   operands[3] = gen_reg_rtx (DFmode);
   operands[4] = gen_reg_rtx (CCFPmode);
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -2627,8 +2627,8 @@
    && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
   "
 {
-  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
-  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
   operands[3] = gen_reg_rtx (DFmode);
   operands[4] = gen_reg_rtx (CCFPmode);
   operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 252e2690a98..8616b3eca5f 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -18,14 +18,23 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+(define_mode_attr larx [(QI "lbarx")
+			(HI "lharx")
+			(SI "lwarx")
+			(DI "ldarx")
+			(TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+			(HI "sthcx.")
+			(SI "stwcx.")
+			(DI "stdcx.")
+			(TI "stqcx.")])
 
 (define_code_iterator FETCHOP [plus minus ior xor and])
 (define_code_attr fetchop_name
   [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
 (define_code_attr fetchop_pred
-  [(plus "add_operand") (minus "gpc_reg_operand")
+  [(plus "add_operand") (minus "int_reg_operand")
    (ior "logical_operand") (xor "logical_operand") (and "and_operand")])
 
 (define_expand "mem_thread_fence"
@@ -129,16 +138,7 @@
     case MEMMODEL_CONSUME:
     case MEMMODEL_ACQUIRE:
     case MEMMODEL_SEQ_CST:
-      if (GET_MODE (operands[0]) == QImode)
-	emit_insn (gen_loadsync_qi (operands[0]));
-      else if (GET_MODE (operands[0]) == HImode)
-	emit_insn (gen_loadsync_hi (operands[0]));
-      else if (GET_MODE (operands[0]) == SImode)
-	emit_insn (gen_loadsync_si (operands[0]));
-      else if (GET_MODE (operands[0]) == DImode)
-	emit_insn (gen_loadsync_di (operands[0]));
-      else
-	gcc_unreachable ();
+      emit_insn (gen_loadsync_<mode> (operands[0]));
       break;
     default:
       gcc_unreachable ();
@@ -170,35 +170,109 @@
   DONE;
 })
 
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
-;; opcode that is "phased-in".  Not implemented as of Power7, so not yet used,
-;; but let's prepare the macros anyway.
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8.  TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+			      (HI "TARGET_SYNC_HI_QI")
+			      SI
+			      (DI "TARGET_POWERPC64")])
+
+;; Types that we should provide atomic instructions for.
 
-(define_mode_iterator ATOMIC    [SI (DI "TARGET_POWERPC64")])
+(define_mode_iterator AINT [QI
+			    HI
+			    SI
+			    (DI "TARGET_POWERPC64")
+			    (TI "TARGET_SYNC_TI")])
 
 (define_insn "load_locked<mode>"
-  [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
+  [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
 	(unspec_volatile:ATOMIC
          [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
   ""
   "<larx> %0,%y1"
   [(set_attr "type" "load_l")])
 
+(define_insn "load_locked<QHI:mode>_si"
+  [(set (match_operand:SI 0 "int_reg_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_HI_QI"
+  "<QHI:larx> %0,%y1"
+  [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs
+(define_expand "load_lockedti"
+  [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  /* Use a temporary register to force getting an even register for the
+     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
+     extra copy.  */
+  rtx pti = gen_reg_rtx (PTImode);
+  emit_insn (gen_load_lockedpti (pti, operands[1]));
+  emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+  DONE;
+})
+
+(define_insn "load_lockedpti"
+  [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+	(unspec_volatile:PTI
+         [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_TI
+   && !reg_mentioned_p (operands[0], operands[1])
+   && quad_int_reg_operand (operands[0], PTImode)"
+  "lqarx %0,%y1"
+  [(set_attr "type" "load_l")])
+
 (define_insn "store_conditional<mode>"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x")
 	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
    (set (match_operand:ATOMIC 1 "memory_operand" "=Z")
-	(match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
+	(match_operand:ATOMIC 2 "int_reg_operand" "r"))]
   ""
   "<stcx> %2,%y1"
   [(set_attr "type" "store_c")])
 
+(define_expand "store_conditionalti"
+  [(use (match_operand:CC 0 "cc_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))
+   (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
+  rtx pti_op2 = gen_reg_rtx (PTImode);
+
+  /* Use a temporary register to force getting an even register for the
+     lqarx/stqcrx. instructions.  Normal optimizations will eliminate this
+     extra copy.  */
+  emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
+  emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+  DONE;
+})
+
+(define_insn "store_conditionalpti"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:PTI 1 "memory_operand" "=Z")
+	(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+  "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+  "stqcx. %2,%y1"
+  [(set_attr "type" "store_c")])
+
 (define_expand "atomic_compare_and_swap<mode>"
-  [(match_operand:SI 0 "gpc_reg_operand" "")		;; bool out
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; val out
-   (match_operand:INT1 2 "memory_operand" "")		;; memory
-   (match_operand:INT1 3 "reg_or_short_operand" "")	;; expected
-   (match_operand:INT1 4 "gpc_reg_operand" "")		;; desired
+  [(match_operand:SI 0 "int_reg_operand" "")		;; bool out
+   (match_operand:AINT 1 "int_reg_operand" "")		;; val out
+   (match_operand:AINT 2 "memory_operand" "")		;; memory
+   (match_operand:AINT 3 "reg_or_short_operand" "")	;; expected
+   (match_operand:AINT 4 "int_reg_operand" "")		;; desired
    (match_operand:SI 5 "const_int_operand" "")		;; is_weak
    (match_operand:SI 6 "const_int_operand" "")		;; model succ
    (match_operand:SI 7 "const_int_operand" "")]		;; model fail
@@ -209,9 +283,9 @@
 })
 
 (define_expand "atomic_exchange<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; input
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; input
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -220,9 +294,9 @@
 })
 
 (define_expand "atomic_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 0)
-     (match_operand:INT1 1 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 0)
+     (match_operand:AINT 1 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -232,8 +306,8 @@
 })
 
 (define_expand "atomic_nand<mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (match_operand:AINT 1 "int_reg_operand" "")		;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -243,10 +317,10 @@
 })
 
 (define_expand "atomic_fetch_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 { 
@@ -256,9 +330,9 @@
 })
 
 (define_expand "atomic_fetch_nand<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -268,10 +342,10 @@
 })
 
 (define_expand "atomic_<fetchop_name>_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -281,9 +355,9 @@
 })
 
 (define_expand "atomic_nand_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux
index 017a293cde3..62a5b941389 100644
--- a/gcc/config/rs6000/t-linux
+++ b/gcc/config/rs6000/t-linux
@@ -2,7 +2,7 @@
 # or soft-float.
 ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float)))
 ifneq (,$(findstring spe,$(target)))
-MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1)
+MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1)
 else
 MULTIARCH_DIRNAME = powerpc-linux-gnu
 endif
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 52c18391556..5889d6d82d4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -60,6 +60,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/power5.md \
 	$(srcdir)/config/rs6000/power6.md \
 	$(srcdir)/config/rs6000/power7.md \
+	$(srcdir)/config/rs6000/power8.md \
 	$(srcdir)/config/rs6000/cell.md \
 	$(srcdir)/config/rs6000/xfpu.md \
 	$(srcdir)/config/rs6000/a2.md \
@@ -70,6 +71,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/vector.md \
 	$(srcdir)/config/rs6000/vsx.md \
 	$(srcdir)/config/rs6000/altivec.md \
+	$(srcdir)/config/rs6000/crypto.md \
 	$(srcdir)/config/rs6000/spe.md \
 	$(srcdir)/config/rs6000/dfp.md \
 	$(srcdir)/config/rs6000/paired.md
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index c1d00ca2a9b..6cfebdeebdc 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -24,13 +24,13 @@
 
 
 ;; Vector int modes
-(define_mode_iterator VEC_I [V16QI V8HI V4SI])
+(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
 
 ;; Vector float modes
 (define_mode_iterator VEC_F [V4SF V2DF])
 
 ;; Vector arithmetic modes
-(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF])
 
 ;; Vector modes that need alginment via permutes
 (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
@@ -45,7 +45,7 @@
 (define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
 
 ;; Vector comparison modes
-(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
 
 ;; Vector init/extract modes
 (define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -126,7 +126,9 @@
         (match_operand:VEC_L 1 "input_operand" ""))]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -730,9 +732,10 @@
   "")
 
 (define_expand "and<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
+  [(parallel [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+		   (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+			      (match_operand:VEC_L 2 "vlogical_operand" "")))
+	      (clobber (match_scratch:CC 3 ""))])]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && (<MODE>mode != TImode || TARGET_POWERPC64)"
   "")
@@ -746,8 +749,8 @@
 
 (define_expand "nor<mode>3"
   [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-			      (match_operand:VEC_L 2 "vlogical_operand" ""))))]
+	(and:VEC_L (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" ""))
+		   (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))))]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && (<MODE>mode != TImode || TARGET_POWERPC64)"
   "")
@@ -760,6 +763,47 @@
    && (<MODE>mode != TImode || TARGET_POWERPC64)"
   "")
 
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+  [(set (match_operand:VEC_L 0 "register_operand" "")
+	(not:VEC_L
+	 (xor:VEC_L (match_operand:VEC_L 1 "register_operand" "")
+		    (match_operand:VEC_L 2 "register_operand" ""))))]
+  "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+  [(set (match_operand:VEC_L 0 "register_operand" "")
+	(ior:VEC_L
+	 (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+	 (not:VEC_L (match_operand:VEC_L 2 "register_operand" ""))))]
+  "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; The canonical form is to have the negated elment first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+  [(set (match_operand:VEC_L 0 "register_operand" "")
+	(ior:VEC_L
+	 (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+	 (match_operand:VEC_L 2 "register_operand" "")))]
+  "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+	(clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
+
+;; Vector population count
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+        (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
+
+
 ;; Same size conversions
 (define_expand "float<VEC_int><mode>2"
   [(set (match_operand:VEC_F 0 "vfloat_operand" "")
@@ -1074,7 +1118,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		      (match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for arithmetic shift left on each vector element
@@ -1082,7 +1126,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		      (match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for logical shift right on each vector element
@@ -1090,7 +1134,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 			(match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for arithmetic shift right on each vector element
@@ -1098,7 +1142,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 			(match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Vector reduction expanders for VSX
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4adf6e5ac55..b87da826a95 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -36,6 +36,10 @@
 ;; Iterator for logical types supported by VSX
 (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
 
+;; Like VSX_L, but don't support TImode for doing logical instructions in
+;; 32-bit
+(define_mode_iterator VSX_L2 [V16QI V8HI V4SI V2DI V4SF V2DF])
+
 ;; Iterator for memory move.  Handle TImode specially to allow
 ;; it to use gprs as well as vsx registers.
 (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -191,6 +195,8 @@
    UNSPEC_VSX_CVDPSXWS
    UNSPEC_VSX_CVDPUXWS
    UNSPEC_VSX_CVSPDP
+   UNSPEC_VSX_CVSPDPN
+   UNSPEC_VSX_CVDPSPN
    UNSPEC_VSX_CVSXWDP
    UNSPEC_VSX_CVUXWDP
    UNSPEC_VSX_CVSXDSP
@@ -207,112 +213,31 @@
 
 ;; VSX moves
 (define_insn "*vsx_mov<mode>"
-  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
-	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
   "VECTOR_MEM_VSX_P (<MODE>mode)
    && (register_operand (operands[0], <MODE>mode) 
        || register_operand (operands[1], <MODE>mode))"
 {
-  switch (which_alternative)
-    {
-    case 0:
-    case 3:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stx<VSm>x %x1,%y0";
-
-    case 1:
-    case 4:
-      gcc_assert (MEM_P (operands[1])
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
-      return "lx<VSm>x %x0,%y1";
-
-    case 2:
-    case 5:
-      return "xxlor %x0,%x1,%x1";
-
-    case 6:
-    case 7:
-    case 8:
-    case 11:
-      return "#";
-
-    case 9:
-    case 10:
-      return "xxlxor %x0,%x0,%x0";
-
-    case 12:
-      return output_vec_const_move (operands);
-
-    case 13:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stvx %1,%y0";
-
-    case 14:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "lvx %0,%y1";
-
-    default:
-      gcc_unreachable ();
-    }
+  return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+   (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
 
 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
 ;; use of TImode is for unions.  However for plain data movement, slightly
 ;; favor the vector loads
 (define_insn "*vsx_movti_64bit"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r")
-	(match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+	(match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
    && (register_operand (operands[0], TImode) 
        || register_operand (operands[1], TImode))"
 {
-  switch (which_alternative)
-    {
-    case 0:
-      return "stxvd2x %x1,%y0";
-
-    case 1:
-      return "lxvd2x %x0,%y1";
-
-    case 2:
-      return "xxlor %x0,%x1,%x1";
-
-    case 3:
-      return "xxlxor %x0,%x0,%x0";
-
-    case 4:
-      return output_vec_const_move (operands);
-
-    case 5:
-      return "stvx %1,%y0";
-
-    case 6:
-      return "lvx %0,%y1";
-
-    case 7:
-    case 8:
-    case 9:
-    case 10:
-      return "#";
-
-    default:
-      gcc_unreachable ();
-    }
+  return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*")
-   (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,8,8,8,8")])
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+   (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
 
 (define_insn "*vsx_movti_32bit"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
@@ -1003,6 +928,40 @@
   "xscvspdp %x0,%x1"
   [(set_attr "type" "fp")])
 
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
 ;; Convert from 64-bit to 32-bit types
 ;; Note, favor the Altivec registers since the usual use of these instructions
 ;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1088,70 +1047,368 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 
-;; Logical operations
-;; Do not support TImode logical instructions on 32-bit at present, because the
-;; compiler will see that we have a TImode and when it wanted DImode, and
-;; convert the DImode to TImode, store it on the stack, and load it in a VSX
-;; register.
-(define_insn "*vsx_and<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (and:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+;; Logical operations.  Do not support TImode logical instructions on 32-bit at
+;; present, because the compiler will see that we have a TImode and when it
+;; wanted DImode, and convert the DImode to TImode, store it on the stack, and
+;; load it in a VSX register or generate extra logical instructions in GPR
+;; registers.
+
+;; When we are splitting the operations to GPRs, we use three alternatives, two
+;; where the first/second inputs and output are in the same register, and the
+;; third where the output specifies an early clobber so that we don't have to
+;; worry about overlapping registers.
+
+(define_insn "*vsx_and<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (and:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+		    (match_operand:VSX_L2 2 "vlogical_operand" "wa")))
+   (clobber (match_scratch:CC 3 "X"))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxland %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
 
-(define_insn "*vsx_ior<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-		   (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_and<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+        (and:VSX_L
+	 (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r")
+	 (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))
+   (clobber (match_scratch:CC 3 "X,X,X,X"))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxland %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(parallel [(set (match_dup 4) (and:DI (match_dup 5) (match_dup 6)))
+	      (clobber (match_dup 3))])
+   (parallel [(set (match_dup 7) (and:DI (match_dup 8) (match_dup 9)))
+	      (clobber (match_dup 3))])]
+{
+  operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[7] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[9] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_ior<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (ior:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+		    (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxlor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_ior<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+        (ior:VSX_L
+	 (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+	 (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlor %x0,%x1,%x2
+   #
+   #
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
 
-(define_insn "*vsx_xor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (xor:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+  if (operands[5] == constm1_rtx)
+    emit_move_insn (operands[3], constm1_rtx);
+
+  else if (operands[5] == const0_rtx)
+    {
+      if (!rtx_equal_p (operands[3], operands[4]))
+	emit_move_insn (operands[3], operands[4]);
+    }
+  else
+    emit_insn (gen_iordi3 (operands[3], operands[4], operands[5]));
+
+  if (operands[8] == constm1_rtx)
+    emit_move_insn (operands[8], constm1_rtx);
+
+  else if (operands[8] == const0_rtx)
+    {
+      if (!rtx_equal_p (operands[6], operands[7]))
+	emit_move_insn (operands[6], operands[7]);
+    }
+  else
+    emit_insn (gen_iordi3 (operands[6], operands[7], operands[8]));
+  DONE;
+}
+  [(set_attr "type" "vecsimple,two,two,two,three,three")
+   (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_xor<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+		    (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_POWERPC64"
   "xxlxor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
 
-(define_insn "*vsx_one_cmpl<mode>2"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_xor<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+        (xor:VSX_L
+	 (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+	 (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlxor %x0,%x1,%x2
+   #
+   #
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (xor:DI (match_dup 4) (match_dup 5)))
+   (set (match_dup 6) (xor:DI (match_dup 7) (match_dup 8)))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two,three,three")
+   (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_one_cmpl<mode>2_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxlnor %x0,%x1,%x1"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_one_cmpl<mode>2_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,&?r")
+        (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlnor %x0,%x1,%x1
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 2) (not:DI (match_dup 3)))
+   (set (match_dup 4) (not:DI (match_dup 5)))]
+{
+  operands[2] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[3] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two")
+   (set_attr "length" "4,8,8")])
   
-(define_insn "*vsx_nor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (ior:VSX_L
-	  (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn "*vsx_nor<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(and:VSX_L2
+	 (not:VSX_L2 (match_operand:VSX_L 1 "vlogical_operand" "%wa"))
+	 (not:VSX_L2 (match_operand:VSX_L 2 "vlogical_operand" "wa"))))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxlnor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nor<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+	(and:VSX_L
+	 (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r"))
+	 (not:VSX_L (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlnor %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+   (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_andc<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+        (and:VSX_L2
+	 (not:VSX_L2
+	  (match_operand:VSX_L2 2 "vlogical_operand" "wa"))
+	 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlandc %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
 
-(define_insn "*vsx_andc<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+(define_insn_and_split "*vsx_andc<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
         (and:VSX_L
 	 (not:VSX_L
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (<MODE>mode != TImode || TARGET_POWERPC64)"
-  "xxlandc %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
+	  (match_operand:VSX_L 2 "vlogical_operand" "wa,0,r,r"))
+	 (match_operand:VSX_L 1 "vlogical_operand" "wa,r,0,r")))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlandc %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+;; Power8 vector logical instructions.
+(define_insn "*vsx_eqv<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(not:VSX_L2
+	 (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")
+		     (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+  "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxleqv %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_eqv<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+	(not:VSX_L
+	 (xor:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r")
+		    (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+  "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxleqv %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+   && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (not:DI (xor:DI (match_dup 4) (match_dup 5))))
+   (set (match_dup 6) (not:DI (xor:DI (match_dup 7) (match_dup 8))))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+;; Rewrite nand into canonical form
+(define_insn "*vsx_nand<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(ior:VSX_L2
+	 (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+	 (not:VSX_L2 (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+  "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlnand %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nand<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "register_operand" "=wa,?r,?r,?r")
+	(ior:VSX_L
+	 (not:VSX_L (match_operand:VSX_L 1 "register_operand" "wa,0,r,r"))
+	 (not:VSX_L (match_operand:VSX_L 2 "register_operand" "wa,r,0,r"))))]
+  "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlnand %x0,%x1,%x2
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+   && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+   (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
+
+;; Rewrite or complement into canonical form, by reversing the arguments
+(define_insn "*vsx_orc<mode>3_32bit"
+  [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+	(ior:VSX_L2
+	 (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+	 (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+  "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlorc %x0,%x2,%x1"
+  [(set_attr "type" "vecsimple")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_orc<mode>3_64bit"
+  [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+	(ior:VSX_L
+	 (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r"))
+	 (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))]
+  "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxlorc %x0,%x2,%x1
+   #
+   #
+   #"
+  "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+   && VECTOR_MEM_VSX_P (<MODE>mode)
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+  operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+  operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+  operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+  operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+  operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+  operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+  [(set_attr "type" "vecsimple,two,two,two")
+   (set_attr "length" "4,8,8,8")])
 
 
 ;; Permute operations
diff --git a/gcc/config/rx/rx-opts.h b/gcc/config/rx/rx-opts.h
index f00de76a901..4d5455e8d8d 100644
--- a/gcc/config/rx/rx-opts.h
+++ b/gcc/config/rx/rx-opts.h
@@ -24,7 +24,8 @@ enum rx_cpu_types
 {
   RX600,
   RX610,
-  RX200
+  RX200,
+  RX100
 };
 
 #endif
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index 15d5359ea1d..d781bb73d28 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -975,6 +975,8 @@ rx_gen_move_template (rtx * operands, bool is_movu)
 	   loading an immediate into a register.  */
 	extension = ".W";
       break;
+    case DFmode:
+    case DImode:
     case SFmode:
     case SImode:
       extension = ".L";
@@ -988,19 +990,44 @@ rx_gen_move_template (rtx * operands, bool is_movu)
     }
 
   if (MEM_P (src) && rx_pid_data_operand (XEXP (src, 0)) == PID_UNENCODED)
-    src_template = "(%A1-__pid_base)[%P1]";
+    {
+      gcc_assert (GET_MODE (src) != DImode);
+      gcc_assert (GET_MODE (src) != DFmode);
+      
+      src_template = "(%A1 - __pid_base)[%P1]";
+    }
   else if (MEM_P (src) && rx_small_data_operand (XEXP (src, 0)))
-    src_template = "%%gp(%A1)[%G1]";
+    {
+      gcc_assert (GET_MODE (src) != DImode);
+      gcc_assert (GET_MODE (src) != DFmode);
+      
+      src_template = "%%gp(%A1)[%G1]";
+    }
   else
     src_template = "%1";
 
   if (MEM_P (dest) && rx_small_data_operand (XEXP (dest, 0)))
-    dst_template = "%%gp(%A0)[%G0]";
+    {
+      gcc_assert (GET_MODE (dest) != DImode);
+      gcc_assert (GET_MODE (dest) != DFmode);
+      
+      dst_template = "%%gp(%A0)[%G0]";
+    }
   else
     dst_template = "%0";
 
-  sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
-	   extension, src_template, dst_template);
+  if (GET_MODE (dest) == DImode || GET_MODE (dest) == DFmode)
+    {
+      gcc_assert (! is_movu);
+
+      if (REG_P (src) && REG_P (dest) && (REGNO (dest) == REGNO (src) + 1))
+	sprintf (out_template, "mov.L\t%H1, %H0 | mov.L\t%1, %0");
+      else
+	sprintf (out_template, "mov.L\t%1, %0 | mov.L\t%H1, %H0");
+    }
+  else
+    sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
+	     extension, src_template, dst_template);
   return out_template;
 }
 
@@ -3240,6 +3267,12 @@ rx_ok_to_inline (tree caller, tree callee)
     || lookup_attribute ("gnu_inline", DECL_ATTRIBUTES (callee)) != NULL_TREE;
 }
 
+static bool
+rx_enable_lra (void)
+{
+  return TARGET_ENABLE_LRA || 1;
+}
+
 
 #undef  TARGET_NARROW_VOLATILE_BITFIELD
 #define TARGET_NARROW_VOLATILE_BITFIELD		rx_narrow_volatile_bitfield
@@ -3391,6 +3424,9 @@ rx_ok_to_inline (tree caller, tree callee)
 #undef  TARGET_WARN_FUNC_RETURN
 #define TARGET_WARN_FUNC_RETURN 		rx_warn_func_return
 
+#undef  TARGET_LRA_P
+#define TARGET_LRA_P 				rx_enable_lra
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rx.h"
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index 092fd7659a2..72aee2fe214 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -29,9 +29,22 @@
           builtin_define ("__RX610__");		\
           builtin_assert ("machine=RX610");	\
 	}					\
-     else					\
-        builtin_assert ("machine=RX600");	\
-      						\
+      else if (rx_cpu_type == RX100)		\
+	{					\
+          builtin_define ("__RX100__");		\
+          builtin_assert ("machine=RX100");	\
+	}					\
+      else if (rx_cpu_type == RX200)		\
+	{					\
+          builtin_define ("__RX200__");		\
+          builtin_assert ("machine=RX200");	\
+        }					\
+      else if (rx_cpu_type == RX600)		\
+        {					\
+          builtin_define ("__RX600__");		\
+          builtin_assert ("machine=RX600");	\
+        }					\
+						\
       if (TARGET_BIG_ENDIAN_DATA)		\
 	builtin_define ("__RX_BIG_ENDIAN__");	\
       else					\
@@ -60,6 +73,7 @@
 #undef  CC1_SPEC
 #define CC1_SPEC "\
   %{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}} \
+  %{mcpu=rx100:%{fpu:%erx100 cpu does not have FPU hardware}} \
   %{mcpu=rx200:%{fpu:%erx200 cpu does not have FPU hardware}}"
 
 #undef  STARTFILE_SPEC
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 3a95567a43f..692b7d220a3 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -30,7 +30,7 @@
 ;; then all operations on doubles have to be handled by
 ;; library functions.
 (define_mode_iterator register_modes
-  [(SF "ALLOW_RX_FPU_INSNS") (SI "") (HI "") (QI "")])
+  [(SF "") (SI "") (HI "") (QI "")])
 
 (define_constants
   [
@@ -2621,3 +2621,21 @@
   ""
   ""
 )
+
+(define_insn "movdi"
+  [(set:DI (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	   (match_operand:DI 1 "general_operand"      "rmi"))]
+  "TARGET_ENABLE_LRA || 1"
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "16")
+   (set_attr "timings" "22")]
+)
+
+(define_insn "movdf"
+  [(set:DF (match_operand:DF 0 "nonimmediate_operand" "=rm")
+	   (match_operand:DF 1 "general_operand"      "rmi"))]
+  "TARGET_ENABLE_LRA || 1"
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "16")
+   (set_attr "timings" "22")]
+)
diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt
index 09d93c3e5f1..12312cfef6b 100644
--- a/gcc/config/rx/rx.opt
+++ b/gcc/config/rx/rx.opt
@@ -61,6 +61,9 @@ Enum(rx_cpu_types) String(rx200) Value(RX200)
 EnumValue
 Enum(rx_cpu_types) String(rx600) Value(RX600)
 
+EnumValue
+Enum(rx_cpu_types) String(rx100) Value(RX100)
+
 ;---------------------------------------------------
 
 mbig-endian-data
@@ -132,3 +135,7 @@ Enable the use of the old, broken, ABI where all stacked function arguments are
 mrx-abi
 Target RejectNegative Report InverseMask(GCC_ABI)
 Enable the use the standard RX ABI where all stacked function arguments are naturally aligned.  This is the default.
+
+mlra
+Target Report Mask(ENABLE_LRA)
+Enable the use of the LRA register allocator.
diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx
index 97079859240..41a3d3a98dc 100644
--- a/gcc/config/rx/t-rx
+++ b/gcc/config/rx/t-rx
@@ -28,7 +28,7 @@ MULTILIB_DIRNAMES   =  64-bit-double  no-fpu-libs   big-endian-data   pid
 # MULTILIB_OPTIONS    += mgcc-abi
 # MULTILIB_DIRNAMES   +=  gcc-abi
 
-MULTILIB_MATCHES    = nofpu=mnofpu  nofpu=mcpu?rx200
+MULTILIB_MATCHES    = nofpu=mnofpu  nofpu=mcpu?rx200  nofpu=mcpu?rx100
 
 MULTILIB_EXCEPTIONS =
 MULTILIB_EXTRA_OPTS = 
diff --git a/gcc/config/s390/htmintrin.h b/gcc/config/s390/htmintrin.h
new file mode 100644
index 00000000000..7aaa9f5bf7c
--- /dev/null
+++ b/gcc/config/s390/htmintrin.h
@@ -0,0 +1,57 @@
+/* GNU compiler hardware transactional execution intrinsics
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _HTMINTRIN_H
+#define _HTMINTRIN_H
+
+
+/* Condition codes generated by tbegin  */
+#define _HTM_TBEGIN_STARTED       0
+#define _HTM_TBEGIN_INDETERMINATE 1
+#define _HTM_TBEGIN_TRANSIENT     2
+#define _HTM_TBEGIN_PERSISTENT    3
+
+/* The abort codes below this threshold are reserved for machine
+   use.  */
+#define _HTM_FIRST_USER_ABORT_CODE 256
+
+/* The transaction diagnostic block is it is defined in the Principles
+   of Operation chapter 5-91.  */
+
+struct __htm_tdb {
+  unsigned char format;                /*   0 */
+  unsigned char flags;
+  unsigned char reserved1[4];
+  unsigned short nesting_depth;
+  unsigned long long abort_code;       /*   8 */
+  unsigned long long conflict_token;   /*  16 */
+  unsigned long long atia;             /*  24 */
+  unsigned char eaid;                  /*  32 */
+  unsigned char dxc;
+  unsigned char reserved2[2];
+  unsigned int program_int_id;
+  unsigned long long exception_id;     /*  40 */
+  unsigned long long bea;              /*  48 */
+  unsigned char reserved3[72];         /*  56 */
+  unsigned long long gprs[16];         /* 128 */
+} __attribute__((__packed__, __aligned__ (8)));
+
+
+#endif /* _HTMINTRIN_H */
diff --git a/gcc/config/s390/htmxlintrin.h b/gcc/config/s390/htmxlintrin.h
new file mode 100644
index 00000000000..bb142195b2b
--- /dev/null
+++ b/gcc/config/s390/htmxlintrin.h
@@ -0,0 +1,182 @@
+/* XL compiler hardware transactional execution intrinsics
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _HTMXLINTRIN_H
+#define _HTMXLINTRIN_H
+
+#include <stdint.h>
+
+#include <htmintrin.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These intrinsics are being made available for compatibility with
+   the IBM XL compiler.  For documentation please see the "z/OS XL
+   C/C++ Programming Guide" publically available on the web.  */
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_simple_begin ()
+{
+  return __builtin_tbegin_nofloat (0);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_begin (void* const tdb)
+{
+  return __builtin_tbegin_nofloat (tdb);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_end ()
+{
+  return __builtin_tend ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_abort ()
+{
+  return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_named_abort (unsigned char const code)
+{
+  return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + code);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_non_transactional_store (void* const addr, long long const value)
+{
+  __builtin_non_tx_store ((uint64_t*)addr, (uint64_t)value);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_nesting_depth (void* const tdb_ptr)
+{
+  int depth = __builtin_tx_nesting_depth ();
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  if (depth != 0)
+    return depth;
+
+  if (tdb->format == 0)
+    return 0;
+  return tdb->nesting_depth;
+}
+
+/* Transaction failure diagnostics */
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_user_abort (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  if (tdb->format == 0)
+    return 0;
+
+  return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  if (tdb->format == 0)
+    return 0;
+
+  if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)
+    {
+      *code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;
+      return 1;
+    }
+  return 0;
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_illegal (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return (tdb->format == 0
+	  && (tdb->abort_code == 4 /* unfiltered program interruption */
+	      || tdb->abort_code == 11 /* restricted instruction */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_footprint_exceeded (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return (tdb->format == 0
+	  && (tdb->abort_code == 7 /* fetch overflow */
+	      || tdb->abort_code == 8 /* store overflow */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_nested_too_deep (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return tdb->format == 0 && tdb->abort_code == 13; /* depth exceeded */
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_conflict (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return (tdb->format == 0
+	  && (tdb->abort_code == 9 /* fetch conflict */
+	      || tdb->abort_code == 10 /* store conflict */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_failure_persistent (long const result)
+{
+  return result == _HTM_TBEGIN_PERSISTENT;
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_address (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+#ifdef __s390x__
+  return tdb->atia;
+#else
+  return tdb->atia & 0xffffffff;
+#endif
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_code (void* const tdb_ptr)
+{
+  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+  return tdb->abort_code;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTMXLINTRIN_H */
diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 523326e177d..069b42489a7 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -176,7 +176,11 @@
 {
   if (GET_CODE (XEXP (op, 0)) != REG
       || REGNO (XEXP (op, 0)) != CC_REGNUM
-      || XEXP (op, 1) != const0_rtx)
+      || (XEXP (op, 1) != const0_rtx
+          && !(CONST_INT_P (XEXP (op, 1))
+	       && GET_MODE (XEXP (op, 0)) == CCRAWmode
+	       && INTVAL (XEXP (op, 1)) >= 0
+               && INTVAL (XEXP (op, 1)) <= 15)))
     return false;
 
   return (s390_branch_condition_mask (op) >= 0);
@@ -224,7 +228,11 @@
 
   if (GET_CODE (XEXP (op, 0)) != REG
       || REGNO (XEXP (op, 0)) != CC_REGNUM
-      || XEXP (op, 1) != const0_rtx)
+      || (XEXP (op, 1) != const0_rtx
+          && !(CONST_INT_P (XEXP (op, 1))
+	       && GET_MODE (XEXP (op, 0)) == CCRAWmode
+	       && INTVAL (XEXP (op, 1)) >= 0
+               && INTVAL (XEXP (op, 1)) <= 15)))
     return false;
 
   switch (GET_MODE (XEXP (op, 0)))
diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
index 419108fb473..5e0b50cafa1 100644
--- a/gcc/config/s390/s390-modes.def
+++ b/gcc/config/s390/s390-modes.def
@@ -152,6 +152,14 @@ The compare and swap instructions sets the condition code to 0/1 if the
 operands were equal/unequal. The CCZ1 mode ensures the result can be
 effectively placed into a register.
 
+CCRAW
+
+The cc mode generated by a non-compare instruction.  The condition
+code mask for the CC consumer is determined by the comparison operator
+(only EQ and NE allowed) and the immediate value given as second
+operand to the operator.  For the other CC modes this value used to be
+0.
+
 */
 
 
@@ -172,3 +180,4 @@ CC_MODE (CCT);
 CC_MODE (CCT1);
 CC_MODE (CCT2);
 CC_MODE (CCT3);
+CC_MODE (CCRAW);
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 1a8205359e4..67283df4553 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -58,7 +58,7 @@ extern bool s390_match_ccmode (rtx, enum machine_mode);
 extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool);
 extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx);
 extern rtx s390_emit_compare (enum rtx_code, rtx, rtx);
-extern void s390_emit_jump (rtx, rtx);
+extern rtx s390_emit_jump (rtx, rtx);
 extern bool symbolic_reference_mentioned_p (rtx);
 extern bool tls_symbolic_reference_mentioned_p (rtx);
 extern bool legitimate_la_operand_p (rtx);
@@ -87,6 +87,7 @@ extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx,
 				rtx, rtx, bool);
 extern void s390_expand_atomic (enum machine_mode, enum rtx_code,
 				rtx, rtx, rtx, bool);
+extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
 extern rtx s390_return_addr_rtx (int, rtx);
 extern rtx s390_back_chain_rtx (void);
 extern rtx s390_emit_call (rtx, rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 30c34901f8d..2cacf6f52ad 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -367,6 +367,10 @@ struct GTY(()) machine_function
   const char *some_ld_name;
 
   bool has_landing_pad_p;
+
+  /* True if the current function may contain a tbegin clobbering
+     FPRs.  */
+  bool tbegin_p;
 };
 
 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
@@ -824,9 +828,9 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
       *op1 = constm1_rtx;
     }
 
-  /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible.  */
+  /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
   if (GET_CODE (*op0) == UNSPEC
-      && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
+      && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
       && XVECLEN (*op0, 0) == 1
       && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
@@ -852,25 +856,35 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 	}
     }
 
-  /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible.  */
+  /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
   if (GET_CODE (*op0) == UNSPEC
-      && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
+      && XINT (*op0, 1) == UNSPEC_CC_TO_INT
       && XVECLEN (*op0, 0) == 1
-      && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
-      && *op1 == const0_rtx)
+      && CONST_INT_P (*op1))
     {
       enum rtx_code new_code = UNKNOWN;
-      switch (*code)
+      switch (GET_MODE (XVECEXP (*op0, 0, 0)))
 	{
-	  case EQ: new_code = EQ;  break;
-	  case NE: new_code = NE;  break;
-	  default: break;
+	case CCZmode:
+	case CCRAWmode:
+	  switch (*code)
+	    {
+	    case EQ: new_code = EQ;  break;
+	    case NE: new_code = NE;  break;
+	    default: break;
+	    }
+	  break;
+	default: break;
 	}
 
       if (new_code != UNKNOWN)
 	{
+	  /* For CCRAWmode put the required cc mask into the second
+	     operand.  */
+	  if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode)
+	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
 	  *op0 = XVECEXP (*op0, 0, 0);
 	  *code = new_code;
 	}
@@ -942,10 +956,11 @@ s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
 			    const0_rtx);
 }
 
-/* Emit a jump instruction to TARGET.  If COND is NULL_RTX, emit an
-   unconditional jump, else a conditional jump under condition COND.  */
+/* Emit a jump instruction to TARGET and return it.  If COND is
+   NULL_RTX, emit an unconditional jump, else a conditional jump under
+   condition COND.  */
 
-void
+rtx
 s390_emit_jump (rtx target, rtx cond)
 {
   rtx insn;
@@ -955,7 +970,7 @@ s390_emit_jump (rtx target, rtx cond)
     target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
 
   insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
-  emit_jump_insn (insn);
+  return emit_jump_insn (insn);
 }
 
 /* Return branch condition mask to implement a branch
@@ -971,7 +986,10 @@ s390_branch_condition_mask (rtx code)
 
   gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
   gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
-  gcc_assert (XEXP (code, 1) == const0_rtx);
+  gcc_assert (XEXP (code, 1) == const0_rtx
+	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+		  && CONST_INT_P (XEXP (code, 1))));
+
 
   switch (GET_MODE (XEXP (code, 0)))
     {
@@ -1145,6 +1163,17 @@ s390_branch_condition_mask (rtx code)
         }
       break;
 
+    case CCRAWmode:
+      switch (GET_CODE (code))
+	{
+	case EQ:
+	  return INTVAL (XEXP (code, 1));
+	case NE:
+	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
+	default:
+	  gcc_unreachable ();
+	}
+
     default:
       return -1;
     }
@@ -1204,7 +1233,9 @@ s390_branch_condition_mnemonic (rtx code, int inv)
 
   if (GET_CODE (XEXP (code, 0)) == REG
       && REGNO (XEXP (code, 0)) == CC_REGNUM
-      && XEXP (code, 1) == const0_rtx)
+      && (XEXP (code, 1) == const0_rtx
+	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+	      && CONST_INT_P (XEXP (code, 1)))))
     mask = s390_branch_condition_mask (code);
   else
     mask = s390_compare_and_branch_condition_mask (code);
@@ -1602,6 +1633,11 @@ s390_option_override (void)
   if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
     target_flags |= MASK_HARD_DFP;
 
+  /* Enable hardware transactions if available and not explicitly
+     disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
+  if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
+    target_flags |= MASK_OPT_HTM;
+
   if (TARGET_HARD_DFP && !TARGET_DFP)
     {
       if (target_flags_explicit & MASK_HARD_DFP)
@@ -2017,14 +2053,18 @@ s390_decompose_address (rtx addr, struct s390_address *out)
 	 Thus we don't check the displacement for validity here.  If after
 	 elimination the displacement turns out to be invalid after all,
 	 this is fixed up by reload in any case.  */
-      if (base != arg_pointer_rtx
-	  && indx != arg_pointer_rtx
-	  && base != return_address_pointer_rtx
-	  && indx != return_address_pointer_rtx
-	  && base != frame_pointer_rtx
-	  && indx != frame_pointer_rtx
-	  && base != virtual_stack_vars_rtx
-	  && indx != virtual_stack_vars_rtx)
+      /* LRA maintains always displacements up to date and we need to
+	 know the displacement is right during all LRA not only at the
+	 final elimination.  */
+      if (lra_in_progress
+	  || (base != arg_pointer_rtx
+	      && indx != arg_pointer_rtx
+	      && base != return_address_pointer_rtx
+	      && indx != return_address_pointer_rtx
+	      && base != frame_pointer_rtx
+	      && indx != frame_pointer_rtx
+	      && base != virtual_stack_vars_rtx
+	      && indx != virtual_stack_vars_rtx))
 	if (!DISP_IN_RANGE (offset))
 	  return false;
     }
@@ -3189,7 +3229,9 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
 
   /* We need a scratch register when loading a PLUS expression which
      is not a legitimate operand of the LOAD ADDRESS instruction.  */
-  if (in_p && s390_plus_operand (x, mode))
+  /* LRA can deal with transformation of plus op very well -- so we
+     don't need to prompt LRA in this case.  */
+  if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
     sri->icode = (TARGET_64BIT ?
 		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
 
@@ -7017,7 +7059,7 @@ s390_chunkify_start (void)
       if (LABEL_P (insn)
 	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
 	{
-	  rtx vec_insn = next_real_insn (insn);
+	  rtx vec_insn = NEXT_INSN (insn);
 	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
 	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
 	}
@@ -7027,6 +7069,8 @@ s390_chunkify_start (void)
       else if (JUMP_P (insn))
 	{
           rtx pat = PATTERN (insn);
+          rtx table;
+
 	  if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
 	    pat = XVECEXP (pat, 0, 0);
 
@@ -7040,28 +7084,18 @@ s390_chunkify_start (void)
 		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
 		}
             }
-	  else if (GET_CODE (pat) == PARALLEL
-		   && XVECLEN (pat, 0) == 2
-		   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
-		   && GET_CODE (XVECEXP (pat, 0, 1)) == USE
-		   && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
-	    {
-	      /* Find the jump table used by this casesi jump.  */
-	      rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
-	      rtx vec_insn = next_real_insn (vec_label);
-	      if (vec_insn && JUMP_TABLE_DATA_P (vec_insn))
-		{
-		  rtx vec_pat = PATTERN (vec_insn);
-		  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
-
-		  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
-		    {
-		      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
-
-		      if (s390_find_pool (pool_list, label)
-			  != s390_find_pool (pool_list, insn))
-			bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
-		    }
+         else if (tablejump_p (insn, NULL, &table))
+           {
+             rtx vec_pat = PATTERN (table);
+             int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
+
+             for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
+               {
+                 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
+
+                 if (s390_find_pool (pool_list, label)
+                     != s390_find_pool (pool_list, insn))
+                   bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
 		}
 	    }
         }
@@ -7336,11 +7370,11 @@ s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *d
   if (GET_CODE (setreg) == SUBREG)
     {
       rtx inner = SUBREG_REG (setreg);
-      if (!GENERAL_REG_P (inner))
+      if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
 	return;
       regno = subreg_regno (setreg);
     }
-  else if (GENERAL_REG_P (setreg))
+  else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
     regno = REGNO (setreg);
   else
     return;
@@ -7363,13 +7397,13 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered)
   rtx cur_insn;
   unsigned int i;
 
-  memset (regs_ever_clobbered, 0, 16 * sizeof (int));
+  memset (regs_ever_clobbered, 0, 32 * sizeof (int));
 
   /* For non-leaf functions we have to consider all call clobbered regs to be
      clobbered.  */
   if (!crtl->is_leaf)
     {
-      for (i = 0; i < 16; i++)
+      for (i = 0; i < 32; i++)
 	regs_ever_clobbered[i] = call_really_used_regs[i];
     }
 
@@ -7391,7 +7425,7 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered)
      See expand_builtin_unwind_init.  For regs_ever_live this is done by
      reload.  */
   if (cfun->has_nonlocal_label)
-    for (i = 0; i < 16; i++)
+    for (i = 0; i < 32; i++)
       if (!call_really_used_regs[i])
 	regs_ever_clobbered[i] = 1;
 
@@ -7457,17 +7491,6 @@ s390_register_info (int clobbered_regs[])
 {
   int i, j;
 
-  /* fprs 8 - 15 are call saved for 64 Bit ABI.  */
-  cfun_frame_layout.fpr_bitmap = 0;
-  cfun_frame_layout.high_fprs = 0;
-  if (TARGET_64BIT)
-    for (i = 24; i < 32; i++)
-      if (df_regs_ever_live_p (i) && !global_regs[i])
-	{
-	  cfun_set_fpr_bit (i - 16);
-	  cfun_frame_layout.high_fprs++;
-	}
-
   /* Find first and last gpr to be saved.  We trust regs_ever_live
      data, except that we don't save and restore global registers.
 
@@ -7476,6 +7499,29 @@ s390_register_info (int clobbered_regs[])
 
   s390_regs_ever_clobbered (clobbered_regs);
 
+  /* fprs 8 - 15 are call saved for 64 Bit ABI.  */
+  if (!epilogue_completed)
+    {
+      cfun_frame_layout.fpr_bitmap = 0;
+      cfun_frame_layout.high_fprs = 0;
+      if (TARGET_64BIT)
+	for (i = 24; i < 32; i++)
+	  /* During reload we have to use the df_regs_ever_live infos
+	     since reload is marking FPRs used as spill slots there as
+	     live before actually making the code changes.  Without
+	     this we fail during elimination offset verification.  */
+	  if ((clobbered_regs[i]
+	       || (df_regs_ever_live_p (i)
+		   && (lra_in_progress
+		       || reload_in_progress
+		       || crtl->saves_all_registers)))
+	      && !global_regs[i])
+	    {
+	      cfun_set_fpr_bit (i - 16);
+	      cfun_frame_layout.high_fprs++;
+	    }
+    }
+
   for (i = 0; i < 16; i++)
     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
 
@@ -7726,7 +7772,7 @@ s390_init_frame_layout (void)
 {
   HOST_WIDE_INT frame_size;
   int base_used;
-  int clobbered_regs[16];
+  int clobbered_regs[32];
 
   /* On S/390 machines, we may need to perform branch splitting, which
      will require both base and return address register.  We have no
@@ -7761,6 +7807,157 @@ s390_init_frame_layout (void)
   while (frame_size != cfun_frame_layout.frame_size);
 }
 
+/* Remove the FPR clobbers from a tbegin insn if it can be proven that
+   the TX is nonescaping.  A transaction is considered escaping if
+   there is at least one path from tbegin returning CC0 to the
+   function exit block without an tend.
+
+   The check so far has some limitations:
+   - only single tbegin/tend BBs are supported
+   - the first cond jump after tbegin must separate the CC0 path from ~CC0
+   - when CC is copied to a GPR and the CC0 check is done with the GPR
+     this is not supported
+*/
+
+static void
+s390_optimize_nonescaping_tx (void)
+{
+  const unsigned int CC0 = 1 << 3;
+  basic_block tbegin_bb = NULL;
+  basic_block tend_bb = NULL;
+  basic_block bb;
+  rtx insn;
+  bool result = true;
+  int bb_index;
+  rtx tbegin_insn = NULL_RTX;
+
+  if (!cfun->machine->tbegin_p)
+    return;
+
+  for (bb_index = 0; bb_index < n_basic_blocks; bb_index++)
+    {
+      bb = BASIC_BLOCK (bb_index);
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  rtx ite, cc, pat, target;
+	  unsigned HOST_WIDE_INT mask;
+
+	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+	    continue;
+
+	  pat = PATTERN (insn);
+
+	  if (GET_CODE (pat) == PARALLEL)
+	    pat = XVECEXP (pat, 0, 0);
+
+	  if (GET_CODE (pat) != SET
+	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
+	    continue;
+
+	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
+	    {
+	      rtx tmp;
+
+	      tbegin_insn = insn;
+
+	      /* Just return if the tbegin doesn't have clobbers.  */
+	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
+		return;
+
+	      if (tbegin_bb != NULL)
+		return;
+
+	      /* Find the next conditional jump.  */
+	      for (tmp = NEXT_INSN (insn);
+		   tmp != NULL_RTX;
+		   tmp = NEXT_INSN (tmp))
+		{
+		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
+		    return;
+		  if (!JUMP_P (tmp))
+		    continue;
+
+		  ite = SET_SRC (PATTERN (tmp));
+		  if (GET_CODE (ite) != IF_THEN_ELSE)
+		    continue;
+
+		  cc = XEXP (XEXP (ite, 0), 0);
+		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
+		      || GET_MODE (cc) != CCRAWmode
+		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
+		    return;
+
+		  if (bb->succs->length () != 2)
+		    return;
+
+		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
+		  if (GET_CODE (XEXP (ite, 0)) == NE)
+		    mask ^= 0xf;
+
+		  if (mask == CC0)
+		    target = XEXP (ite, 1);
+		  else if (mask == (CC0 ^ 0xf))
+		    target = XEXP (ite, 2);
+		  else
+		    return;
+
+		  {
+		    edge_iterator ei;
+		    edge e1, e2;
+
+		    ei = ei_start (bb->succs);
+		    e1 = ei_safe_edge (ei);
+		    ei_next (&ei);
+		    e2 = ei_safe_edge (ei);
+
+		    if (e2->flags & EDGE_FALLTHRU)
+		      {
+			e2 = e1;
+			e1 = ei_safe_edge (ei);
+		      }
+
+		    if (!(e1->flags & EDGE_FALLTHRU))
+		      return;
+
+		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
+		  }
+		  if (tmp == BB_END (bb))
+		    break;
+		}
+	    }
+
+	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
+	    {
+	      if (tend_bb != NULL)
+		return;
+	      tend_bb = bb;
+	    }
+	}
+    }
+
+  /* Either we successfully remove the FPR clobbers here or we are not
+     able to do anything for this TX.  Both cases don't qualify for
+     another look.  */
+  cfun->machine->tbegin_p = false;
+
+  if (tbegin_bb == NULL || tend_bb == NULL)
+    return;
+
+  calculate_dominance_info (CDI_POST_DOMINATORS);
+  result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
+  free_dominance_info (CDI_POST_DOMINATORS);
+
+  if (!result)
+    return;
+
+  PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0);
+  INSN_CODE (tbegin_insn) = -1;
+  df_insn_rescan (tbegin_insn);
+
+  return;
+}
+
 /* Update frame layout.  Recompute actual register save data based on
    current info and update regs_ever_live for the special registers.
    May be called multiple times, but may never cause *more* registers
@@ -7769,7 +7966,7 @@ s390_init_frame_layout (void)
 static void
 s390_update_frame_layout (void)
 {
-  int clobbered_regs[16];
+  int clobbered_regs[32];
 
   s390_register_info (clobbered_regs);
 
@@ -7868,6 +8065,13 @@ s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
   return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 }
 
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+s390_lra_p (void)
+{
+  return s390_lra_flag;
+}
+
 /* Return true if register FROM can be eliminated via register TO.  */
 
 static bool
@@ -8199,8 +8403,10 @@ s390_emit_prologue (void)
   int offset;
   int next_fpr = 0;
 
-  /* Complete frame layout.  */
+  /* Try to get rid of the FPR clobbers.  */
+  s390_optimize_nonescaping_tx ();
 
+  /* Complete frame layout.  */
   s390_update_frame_layout ();
 
   /* Annotate all constant pool references to let the scheduler know
@@ -9348,6 +9554,294 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
   return build_va_arg_indirect_ref (addr);
 }
 
+/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
+   expanders.
+   DEST  - Register location where CC will be stored.
+   TDB   - Pointer to a 256 byte area where to store the transaction.
+           diagnostic block. NULL if TDB is not needed.
+   RETRY - Retry count value.  If non-NULL a retry loop for CC2
+           is emitted
+   CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
+                    of the tbegin instruction pattern.  */
+
+void
+s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
+{
+  const int CC0 = 1 << 3;
+  const int CC1 = 1 << 2;
+  const int CC3 = 1 << 0;
+  rtx abort_label = gen_label_rtx ();
+  rtx leave_label = gen_label_rtx ();
+  rtx retry_reg = gen_reg_rtx (SImode);
+  rtx retry_label = NULL_RTX;
+  rtx jump;
+  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
+
+  if (retry != NULL_RTX)
+    {
+      emit_move_insn (retry_reg, retry);
+      retry_label = gen_label_rtx ();
+      emit_label (retry_label);
+    }
+
+  if (clobber_fprs_p)
+    emit_insn (gen_tbegin_1 (tdb,
+		 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+  else
+    emit_insn (gen_tbegin_nofloat_1 (tdb,
+		 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+
+  jump = s390_emit_jump (abort_label,
+			 gen_rtx_NE (VOIDmode,
+				     gen_rtx_REG (CCRAWmode, CC_REGNUM),
+				     gen_rtx_CONST_INT (VOIDmode, CC0)));
+
+  JUMP_LABEL (jump) = abort_label;
+  LABEL_NUSES (abort_label) = 1;
+  add_reg_note (jump, REG_BR_PROB, very_unlikely);
+
+  /* Initialize CC return value.  */
+  emit_move_insn (dest, const0_rtx);
+
+  s390_emit_jump (leave_label, NULL_RTX);
+  LABEL_NUSES (leave_label) = 1;
+  emit_barrier ();
+
+  /* Abort handler code.  */
+
+  emit_label (abort_label);
+  if (retry != NULL_RTX)
+    {
+      rtx count = gen_reg_rtx (SImode);
+      jump = s390_emit_jump (leave_label,
+			     gen_rtx_EQ (VOIDmode,
+			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
+			       gen_rtx_CONST_INT (VOIDmode, CC1 | CC3)));
+      LABEL_NUSES (leave_label) = 2;
+      add_reg_note (jump, REG_BR_PROB, very_unlikely);
+
+      /* CC2 - transient failure. Perform retry with ppa.  */
+      emit_move_insn (count, retry);
+      emit_insn (gen_subsi3 (count, count, retry_reg));
+      emit_insn (gen_tx_assist (count));
+      jump = emit_jump_insn (gen_doloop_si64 (retry_label,
+					      retry_reg,
+					      retry_reg));
+      JUMP_LABEL (jump) = retry_label;
+      LABEL_NUSES (retry_label) = 1;
+    }
+
+  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
+					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
+								   CC_REGNUM)),
+					UNSPEC_CC_TO_INT));
+  emit_label (leave_label);
+}
+
+/* Builtins.  */
+
+enum s390_builtin
+{
+  S390_BUILTIN_TBEGIN,
+  S390_BUILTIN_TBEGIN_NOFLOAT,
+  S390_BUILTIN_TBEGIN_RETRY,
+  S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+  S390_BUILTIN_TBEGINC,
+  S390_BUILTIN_TEND,
+  S390_BUILTIN_TABORT,
+  S390_BUILTIN_NON_TX_STORE,
+  S390_BUILTIN_TX_NESTING_DEPTH,
+  S390_BUILTIN_TX_ASSIST,
+
+  S390_BUILTIN_max
+};
+
+static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
+  CODE_FOR_tbegin,
+  CODE_FOR_tbegin_nofloat,
+  CODE_FOR_tbegin_retry,
+  CODE_FOR_tbegin_retry_nofloat,
+  CODE_FOR_tbeginc,
+  CODE_FOR_tend,
+  CODE_FOR_tabort,
+  CODE_FOR_ntstg,
+  CODE_FOR_etnd,
+  CODE_FOR_tx_assist
+};
+
+static void
+s390_init_builtins (void)
+{
+  tree ftype, uint64_type;
+
+  /* void foo (void) */
+  ftype = build_function_type_list (void_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
+			BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* void foo (int) */
+  ftype = build_function_type_list (void_type_node, integer_type_node,
+				    NULL_TREE);
+  add_builtin_function ("__builtin_tabort", ftype,
+			S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_tx_assist", ftype,
+			S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* int foo (void *) */
+  ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_tbegin_nofloat", ftype,
+			S390_BUILTIN_TBEGIN_NOFLOAT,
+			BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* int foo (void *, int) */
+  ftype = build_function_type_list (integer_type_node, ptr_type_node,
+				    integer_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tbegin_retry", ftype,
+			S390_BUILTIN_TBEGIN_RETRY,
+			BUILT_IN_MD,
+			NULL, NULL_TREE);
+  add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
+			S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+			BUILT_IN_MD,
+			NULL, NULL_TREE);
+
+  /* int foo (void) */
+  ftype = build_function_type_list (integer_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_tx_nesting_depth", ftype,
+			S390_BUILTIN_TX_NESTING_DEPTH,
+			BUILT_IN_MD, NULL, NULL_TREE);
+  add_builtin_function ("__builtin_tend", ftype,
+			S390_BUILTIN_TEND, BUILT_IN_MD,	NULL, NULL_TREE);
+
+  /* void foo (uint64_t *, uint64_t) */
+  if (TARGET_64BIT)
+    uint64_type = long_unsigned_type_node;
+  else
+    uint64_type = long_long_unsigned_type_node;
+
+   ftype = build_function_type_list (void_type_node,
+ 				    build_pointer_type (uint64_type),
+				    uint64_type, NULL_TREE);
+  add_builtin_function ("__builtin_non_tx_store", ftype,
+			S390_BUILTIN_NON_TX_STORE,
+			BUILT_IN_MD, NULL, NULL_TREE);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 2
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum insn_code icode;
+  rtx op[MAX_ARGS], pat;
+  int arity;
+  bool nonvoid;
+  tree arg;
+  call_expr_arg_iterator iter;
+
+  if (fcode >= S390_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = code_for_builtin[fcode];
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  if (!TARGET_ZEC12)
+    error ("Transactional execution builtins require zEC12 or later\n");
+
+  if (!TARGET_HTM && TARGET_ZEC12)
+    error ("Transactional execution builtins not enabled (-mtx)\n");
+
+  /* Set a flag in the machine specific cfun part in order to support
+     saving/restoring of FPRs.  */
+  if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
+    cfun->machine->tbegin_p = true;
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  arity = 0;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (arity >= MAX_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+	{
+	  if (insn_op->predicate == memory_operand)
+	    {
+	      /* Don't move a NULL pointer into a register. Otherwise
+		 we have to rely on combine being able to move it back
+		 in order to get an immediate 0 in the instruction.  */
+	      if (op[arity] != const0_rtx)
+		op[arity] = copy_to_mode_reg (Pmode, op[arity]);
+	      op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
+	    }
+	  else
+	    op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+	}
+
+      arity++;
+    }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+    }
+
+  switch (arity)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      if (nonvoid)
+        pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      if (nonvoid)
+	pat = GEN_FCN (icode) (target, op[0], op[1]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
 /* Output assembly code for the trampoline template to
    stdio stream FILE.
 
@@ -11003,6 +11497,11 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
 
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS s390_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN s390_expand_builtin
+
 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
 
@@ -11105,6 +11604,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P s390_lra_p
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE s390_can_eliminate
 
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 43e24d5d112..d53fed7a6f2 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -34,7 +34,8 @@ enum processor_flags
   PF_DFP = 16,
   PF_Z10 = 32,
   PF_Z196 = 64,
-  PF_ZEC12 = 128
+  PF_ZEC12 = 128,
+  PF_TX = 256
 };
 
 /* This is necessary to avoid a warning about comparing different enum
@@ -61,6 +62,8 @@ enum processor_flags
  	(s390_arch_flags & PF_Z196)
 #define TARGET_CPU_ZEC12 \
  	(s390_arch_flags & PF_ZEC12)
+#define TARGET_CPU_HTM \
+ 	(s390_arch_flags & PF_TX)
 
 /* These flags indicate that the generated code should run on a cpu
    providing the respective hardware facility when run in
@@ -78,6 +81,8 @@ enum processor_flags
        (TARGET_ZARCH && TARGET_CPU_Z196)
 #define TARGET_ZEC12 \
        (TARGET_ZARCH && TARGET_CPU_ZEC12)
+#define TARGET_HTM \
+       (TARGET_ZARCH && TARGET_CPU_HTM && TARGET_OPT_HTM)
 
 
 #define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
@@ -93,23 +98,25 @@ enum processor_flags
 #define TARGET_TPF 0
 
 /* Target CPU builtins.  */
-#define TARGET_CPU_CPP_BUILTINS()			\
-  do							\
-    {							\
-      builtin_assert ("cpu=s390");			\
-      builtin_assert ("machine=s390");			\
-      builtin_define ("__s390__");			\
-      if (TARGET_ZARCH)					\
-	builtin_define ("__zarch__");			\
-      if (TARGET_64BIT)					\
-        builtin_define ("__s390x__");			\
-      if (TARGET_LONG_DOUBLE_128)			\
-        builtin_define ("__LONG_DOUBLE_128__");		\
-    }							\
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      builtin_assert ("cpu=s390");					\
+      builtin_assert ("machine=s390");					\
+      builtin_define ("__s390__");					\
+      if (TARGET_ZARCH)							\
+	builtin_define ("__zarch__");					\
+      if (TARGET_64BIT)							\
+        builtin_define ("__s390x__");					\
+      if (TARGET_LONG_DOUBLE_128)					\
+        builtin_define ("__LONG_DOUBLE_128__");				\
+      if (TARGET_HTM)							\
+	builtin_define ("__HTM__");					\
+    }									\
   while (0)
 
 #ifdef DEFAULT_TARGET_64BIT
-#define TARGET_DEFAULT             (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP)
+#define TARGET_DEFAULT             (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM)
 #else
 #define TARGET_DEFAULT             0
 #endif
@@ -221,7 +228,7 @@ enum processor_flags
 
 /* Alignment on even addresses for LARL instruction.  */
 #define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
-#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
 
 /* Alignment is not required by the hardware.  */
 #define STRICT_ALIGNMENT 0
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index cad4f5f579a..e12d1538a50 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -59,11 +59,17 @@
 (define_c_enum "unspec" [
    ; Miscellaneous
    UNSPEC_ROUND
-   UNSPEC_CCU_TO_INT
-   UNSPEC_CCZ_TO_INT
    UNSPEC_ICM
    UNSPEC_TIE
 
+   ; Convert CC into a str comparison result and copy it into an
+   ; integer register
+   ; cc0->0, cc1->1, cc2->-1, (cc3->-1)
+   UNSPEC_STRCMPCC_TO_INT
+
+   ; Copy CC as is into the lower 2 bits of an integer register
+   UNSPEC_CC_TO_INT
+
    ; GOT/PLT and lt-relative accesses
    UNSPEC_LTREL_OFFSET
    UNSPEC_LTREL_BASE
@@ -138,6 +144,15 @@
    ; Atomic Support
    UNSPECV_CAS
    UNSPECV_ATOMIC_OP
+
+   ; Transactional Execution support
+   UNSPECV_TBEGIN
+   UNSPECV_TBEGINC
+   UNSPECV_TEND
+   UNSPECV_TABORT
+   UNSPECV_ETND
+   UNSPECV_NTSTG
+   UNSPECV_PPA
   ])
 
 ;;
@@ -191,6 +206,9 @@
    (PFPO_OP1_TYPE_SHIFT           8)
   ])
 
+; Immediate operands for tbegin and tbeginc
+(define_constants [(TBEGIN_MASK  65292)]) ; 0xff0c
+(define_constants [(TBEGINC_MASK 65288)]) ; 0xff08
 
 ;; Instruction operand type as used in the Principles of Operation.
 ;; Used to determine defaults for length and other attribute values.
@@ -277,7 +295,8 @@
 (define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12"
   (const (symbol_ref "s390_tune_attr")))
 
-(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12"
+(define_attr "cpu_facility"
+  "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12"
   (const_string "standard"))
 
 (define_attr "enabled" ""
@@ -304,6 +323,10 @@
 	      (match_test "TARGET_DFP"))
 	 (const_int 1)
 
+         (and (eq_attr "cpu_facility" "cpu_zarch")
+              (match_test "TARGET_CPU_ZARCH"))
+	 (const_int 1)
+
          (and (eq_attr "cpu_facility" "z10")
               (match_test "TARGET_Z10"))
 	 (const_int 1)
@@ -2246,7 +2269,7 @@
 
 (define_insn "movcc"
   [(set (match_operand:CC 0 "nonimmediate_operand" "=d,c,d,d,d,R,T")
-	(match_operand:CC 1 "nonimmediate_operand" "d,d,c,R,T,d,d"))]
+	(match_operand:CC 1 "nonimmediate_operand" " d,d,c,R,T,d,d"))]
   ""
   "@
    lr\t%0,%1
@@ -2578,7 +2601,7 @@
      (use (reg:SI 0))])
    (parallel
     [(set (match_operand:SI 0 "register_operand" "=d")
-	  (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_CCU_TO_INT))
+	  (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_STRCMPCC_TO_INT))
      (clobber (reg:CC CC_REGNUM))])]
   ""
 {
@@ -2690,7 +2713,7 @@
   "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
   "#"
   [(set_attr "type"         "cs")
-   (set_attr "cpu_facility" "*,*,z10,*")])
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
 
 (define_split
   [(set (match_operand:BLK 0 "memory_operand" "")
@@ -2820,7 +2843,7 @@
                      (match_dup 2)]
                      UNSPEC_TDC_INSN))
    (set (match_operand:SI 0 "register_operand" "=d")
-        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))]
+        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_HARD_FLOAT"
 {
   operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET);
@@ -2832,12 +2855,21 @@
                      (match_dup 2)]
                      UNSPEC_TDC_INSN))
    (set (match_operand:SI 0 "register_operand" "=d")
-        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))]
+        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_HARD_FLOAT"
 {
   operands[2] = GEN_INT (S390_TDC_INFINITY);
 })
 
+(define_insn_and_split "*cc_to_int"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand 1 "register_operand" "0")]
+                   UNSPEC_CC_TO_INT))]
+  "operands != NULL"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))])
+
 ; This insn is used to generate all variants of the Test Data Class
 ; instruction, namely tcxb, tcdb, and tceb.  The insn's first operand
 ; is the register to be tested and the second one is the bit mask
@@ -2853,14 +2885,6 @@
    [(set_attr "op_type" "RXE")
     (set_attr "type"  "fsimp<mode>")])
 
-(define_insn_and_split "*ccz_to_int"
-  [(set (match_operand:SI 0 "register_operand" "=d")
-        (unspec:SI [(match_operand:CCZ 1 "register_operand" "0")]
-                   UNSPEC_CCZ_TO_INT))]
-  ""
-  "#"
-  "reload_completed"
-  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))])
 
 
 ;
@@ -2899,7 +2923,7 @@
   "(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)"
   "#"
   [(set_attr "type" "cs")
-   (set_attr "cpu_facility" "*,*,z10,*")])
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
 
 (define_split
   [(set (match_operand:BLK 0 "memory_operand" "")
@@ -3075,7 +3099,7 @@
   "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
   "#"
   [(set_attr "type" "cs")
-   (set_attr "cpu_facility" "*,*,z10,*")])
+   (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
 
 (define_split
   [(set (reg:CCU CC_REGNUM)
@@ -3205,7 +3229,7 @@
 (define_insn_and_split "cmpint"
   [(set (match_operand:SI 0 "register_operand" "=d")
         (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
-                   UNSPEC_CCU_TO_INT))
+                   UNSPEC_STRCMPCC_TO_INT))
    (clobber (reg:CC CC_REGNUM))]
   ""
   "#"
@@ -3218,10 +3242,10 @@
 (define_insn_and_split "*cmpint_cc"
   [(set (reg CC_REGNUM)
         (compare (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
-                            UNSPEC_CCU_TO_INT)
+                            UNSPEC_STRCMPCC_TO_INT)
                  (const_int 0)))
    (set (match_operand:SI 0 "register_operand" "=d")
-        (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT))]
+        (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT))]
   "s390_match_ccmode (insn, CCSmode)"
   "#"
   "&& reload_completed"
@@ -3238,7 +3262,7 @@
 (define_insn_and_split "*cmpint_sign"
   [(set (match_operand:DI 0 "register_operand" "=d")
         (sign_extend:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
-                                   UNSPEC_CCU_TO_INT)))
+                                   UNSPEC_STRCMPCC_TO_INT)))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_ZARCH"
   "#"
@@ -3252,11 +3276,11 @@
   [(set (reg CC_REGNUM)
         (compare (ashiftrt:DI (ashift:DI (subreg:DI
                    (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
-                              UNSPEC_CCU_TO_INT) 0)
+                              UNSPEC_STRCMPCC_TO_INT) 0)
                    (const_int 32)) (const_int 32))
                  (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=d")
-        (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT)))]
+        (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT)))]
   "s390_match_ccmode (insn, CCSmode) && TARGET_ZARCH"
   "#"
   "&& reload_completed"
@@ -5507,7 +5531,7 @@
 	(if_then_else:GPR
 	  (match_operator 1 "s390_comparison"
 	    [(match_operand 2 "cc_reg_operand"        " c,c, c, c, c, c, c")
-	     (const_int 0)])
+	     (match_operand 5 "const_int_operand"     "")])
 	  (match_operand:GPR 3 "nonimmediate_operand" " d,0,QS, 0, d, 0,QS")
 	  (match_operand:GPR 4 "nonimmediate_operand" " 0,d, 0,QS, 0, d,QS")))]
   "TARGET_Z196"
@@ -7907,7 +7931,8 @@
 (define_insn "*cjump_64"
   [(set (pc)
         (if_then_else
-          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+					       (match_operand 2 "const_int_operand" "")])
           (label_ref (match_operand 0 "" ""))
           (pc)))]
   "TARGET_CPU_ZARCH"
@@ -7926,7 +7951,8 @@
 (define_insn "*cjump_31"
   [(set (pc)
         (if_then_else
-          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+					       (match_operand 2 "const_int_operand" "")])
           (label_ref (match_operand 0 "" ""))
           (pc)))]
   "!TARGET_CPU_ZARCH"
@@ -9795,3 +9821,217 @@
   "cpsdr\t%0,%2,%1"
   [(set_attr "op_type"  "RRF")
    (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Transactional execution instructions
+;;
+
+; This splitter helps combine to make use of CC directly when
+; comparing the integer result of a tbegin builtin with a constant.
+; The unspec is already removed by canonicalize_comparison. So this
+; splitters only job is to turn the PARALLEL into separate insns
+; again.  Unfortunately this only works with the very first cc/int
+; compare since combine is not able to deal with data flow across
+; basic block boundaries.
+
+; It needs to be an insn pattern as well since combine does not apply
+; the splitter directly.  Combine would only use it if it actually
+; would reduce the number of instructions.
+(define_insn_and_split "*ccraw_to_int"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "s390_eqne_operator"
+			 [(reg:CCRAW CC_REGNUM)
+			  (match_operand 1 "const_int_operand" "")])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 3)
+	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCRAW CC_REGNUM) (match_dup 1)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+; Non-constrained transaction begin
+
+(define_expand "tbegin"
+  [(match_operand:SI 0 "register_operand" "=d")
+   (match_operand:BLK 1 "memory_operand"  "=Q")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], NULL_RTX, true);
+  DONE;
+})
+
+(define_expand "tbegin_nofloat"
+  [(match_operand:SI 0 "register_operand" "=d")
+   (match_operand:BLK 1 "memory_operand"  "=Q")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], NULL_RTX, false);
+  DONE;
+})
+
+(define_expand "tbegin_retry"
+  [(match_operand:SI 0 "register_operand" "=d")
+   (match_operand:BLK 1 "memory_operand"  "=Q")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], operands[2], true);
+  DONE;
+})
+
+(define_expand "tbegin_retry_nofloat"
+  [(match_operand:SI 0 "register_operand" "=d")
+   (match_operand:BLK 1 "memory_operand"  "=Q")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_HTM"
+{
+  s390_expand_tbegin (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_insn "tbegin_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand"    "=Q")
+				(match_operand     1 "const_int_operand" " D")]
+			       UNSPECV_TBEGIN))
+   (clobber (reg:DF 16))
+   (clobber (reg:DF 17))
+   (clobber (reg:DF 18))
+   (clobber (reg:DF 19))
+   (clobber (reg:DF 20))
+   (clobber (reg:DF 21))
+   (clobber (reg:DF 22))
+   (clobber (reg:DF 23))
+   (clobber (reg:DF 24))
+   (clobber (reg:DF 25))
+   (clobber (reg:DF 26))
+   (clobber (reg:DF 27))
+   (clobber (reg:DF 28))
+   (clobber (reg:DF 29))
+   (clobber (reg:DF 30))
+   (clobber (reg:DF 31))]
+; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is
+; not supposed to be used for immediates (see genpreds.c).
+  "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
+  "tbegin\t%0,%x1"
+  [(set_attr "op_type" "SIL")])
+
+; Same as above but without the FPR clobbers
+(define_insn "tbegin_nofloat_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand"    "=Q")
+				(match_operand     1 "const_int_operand" " D")]
+			       UNSPECV_TBEGIN))]
+  "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
+  "tbegin\t%0,%x1"
+  [(set_attr "op_type" "SIL")])
+
+
+; Constrained transaction begin
+
+(define_expand "tbeginc"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(const_int TBEGINC_MASK)]
+			       UNSPECV_TBEGINC))]
+  "TARGET_HTM"
+  "")
+
+(define_insn "*tbeginc_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" " D")]
+			       UNSPECV_TBEGINC))]
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbeginc\t0,%x0"
+  [(set_attr "op_type" "SIL")])
+
+; Transaction end
+
+(define_expand "tend"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))
+   (set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  "TARGET_HTM"
+  "")
+
+(define_insn "*tend_1"
+  [(set (reg:CCRAW CC_REGNUM)
+	(unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))]
+  "TARGET_HTM"
+  "tend"
+  [(set_attr "op_type" "S")])
+
+; Transaction abort
+
+(define_expand "tabort"
+  [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")]
+		    UNSPECV_TABORT)]
+  "TARGET_HTM && operands != NULL"
+{
+  if (CONST_INT_P (operands[0])
+      && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 255)
+    {
+      error ("Invalid transaction abort code: " HOST_WIDE_INT_PRINT_DEC
+	     ".  Values in range 0 through 255 are reserved.",
+	     INTVAL (operands[0]));
+      FAIL;
+    }
+})
+
+(define_insn "*tabort_1"
+  [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")]
+		    UNSPECV_TABORT)]
+  "TARGET_HTM && operands != NULL"
+  "tabort\t%Y0"
+  [(set_attr "op_type" "S")])
+
+; Transaction extract nesting depth
+
+(define_insn "etnd"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_ETND))]
+  "TARGET_HTM"
+  "etnd\t%0"
+  [(set_attr "op_type" "RRE")])
+
+; Non-transactional store
+
+(define_insn "ntstg"
+  [(set (match_operand:DI 0 "memory_operand" "=RT")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "d")]
+			    UNSPECV_NTSTG))]
+  "TARGET_HTM"
+  "ntstg\t%1,%0"
+  [(set_attr "op_type" "RXY")])
+
+; Transaction perform processor assist
+
+(define_expand "tx_assist"
+  [(set (match_dup 1) (const_int 0))
+   (unspec_volatile [(match_operand:SI 0 "register_operand" "d")
+		     (match_dup 1)
+		     (const_int 1)]
+		    UNSPECV_PPA)]
+  "TARGET_HTM"
+{
+  operands[1] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*ppa"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")
+		     (match_operand:SI 1 "register_operand" "d")
+		     (match_operand 2 "const_int_operand" "I")]
+		    UNSPECV_PPA)]
+  "TARGET_HTM && INTVAL (operands[2]) < 16"
+  "ppa\t%0,%1,1"
+  [(set_attr "op_type" "RRF")])
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index b326441173c..7dedb836701 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -104,6 +104,10 @@ mlong-double-64
 Target Report RejectNegative Negative(mlong-double-128) InverseMask(LONG_DOUBLE_128)
 Use 64-bit long double
 
+mhtm
+Target Report Mask(OPT_HTM)
+Use hardware transactional execution instructions
+
 mpacked-stack
 Target Report Mask(PACKED_STACK)
 Use packed stack layout
@@ -149,3 +153,7 @@ Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1)
 Set the branch costs for conditional branch instructions.  Reasonable
 values are small, non-negative integers.  The default branch cost is
 1.
+
+mlra
+Target Report Var(s390_lra_flag) Init(1) Save
+Use LRA instead of reload
diff --git a/gcc/config/s390/s390intrin.h b/gcc/config/s390/s390intrin.h
new file mode 100644
index 00000000000..e1a00ce58e3
--- /dev/null
+++ b/gcc/config/s390/s390intrin.h
@@ -0,0 +1,33 @@
+/* S/390 System z specific intrinsics
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef  _S390INTRIN_H
+#define _S390INTRIN_H
+
+#ifndef __s390__
+  #error s390intrin.h included on wrong platform/compiler
+#endif
+
+#ifdef __HTM__
+#include <htmintrin.h>
+#endif
+
+
+#endif /* _S390INTRIN_H*/
diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index 25949c62d23..998ba7300ad 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -398,9 +398,13 @@
 (define_predicate "general_extend_operand"
   (match_code "subreg,reg,mem,truncate")
 {
-  return (GET_CODE (op) == TRUNCATE
-	  ? arith_operand
-	  : nonimmediate_operand) (op, mode);
+  if (GET_CODE (op) == TRUNCATE)
+    return arith_operand (op, mode);
+
+  if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))
+    return general_movsrc_operand (op, mode);
+
+  return nonimmediate_operand (op, mode);
 })
 
 ;; Returns 1 if OP is a simple register address.
@@ -468,17 +472,36 @@
 	return 0;
     }
 
-  if ((mode == QImode || mode == HImode)
-      && mode == GET_MODE (op)
-      && (MEM_P (op)
-	  || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
+  if (mode == GET_MODE (op)
+      && (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
     {
-      rtx x = XEXP ((MEM_P (op) ? op : SUBREG_REG (op)), 0);
+      rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op);
+      rtx x = XEXP (mem_rtx, 0);
 
-      if (GET_CODE (x) == PLUS
+      if ((mode == QImode || mode == HImode)
+	  && GET_CODE (x) == PLUS
 	  && REG_P (XEXP (x, 0))
 	  && CONST_INT_P (XEXP (x, 1)))
 	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
+
+      /* Allow reg+reg addressing here without validating the register
+	 numbers.  Usually one of the regs must be R0 or a pseudo reg.
+	 In some cases it can happen that arguments from hard regs are
+	 propagated directly into address expressions.  In this cases reload
+	 will have to fix it up later.  However, allow this only for native
+	 1, 2 or 4 byte addresses.  */
+      if (can_create_pseudo_p () && GET_CODE (x) == PLUS
+	  && GET_MODE_SIZE (mode) <= 4
+	  && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
+	return true;
+
+      /* 'general_operand' does not allow volatile mems during RTL expansion to
+	 avoid matching arithmetic that operates on mems, it seems.
+	 On SH this leads to redundant sign extensions for QImode or HImode
+	 loads.  Thus we mimic the behavior but allow volatile mems.  */
+        if (memory_address_addr_space_p (GET_MODE (mem_rtx), x,
+					 MEM_ADDR_SPACE (mem_rtx)))
+	  return true;
     }
 
   if (TARGET_SHMEDIA
@@ -489,6 +512,7 @@
       && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
       && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
     /* FIXME */ abort (); /* return 1; */
+
   return general_operand (op, mode);
 })
 
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 5976206f8b4..60f45452036 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -19,12 +19,6 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* FIXME: This is a temporary hack, so that we can include <algorithm>
-   below.  <algorithm> will try to include <cstdlib> which will reference
-   malloc & co, which are poisoned by "system.h".  The proper solution is
-   to include <cstdlib> in "system.h" instead of <stdlib.h>.  */
-#include <cstdlib>
-
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 71ad1c1a2f6..8a140687654 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -12251,10 +12251,10 @@ label:
 
 ;; FMA (fused multiply-add) patterns
 (define_expand "fmasf4"
-  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
-	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
-		(match_operand:SF 2 "fp_arith_reg_operand" "")
-		(match_operand:SF 3 "fp_arith_reg_operand" "")))]
+  [(set (match_operand:SF 0 "fp_arith_reg_operand")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand")
+		(match_operand:SF 2 "fp_arith_reg_operand")
+		(match_operand:SF 3 "fp_arith_reg_operand")))]
   "TARGET_SH2E || TARGET_SHMEDIA_FPU"
 {
   if (TARGET_SH2E)
@@ -12285,6 +12285,43 @@ label:
   "fmac.s %1, %2, %0"
   [(set_attr "type" "fparith_media")])
 
+;; For some cases such as 'a * b + a' the FMA pattern is not generated by
+;; previous transformations.  If FMA is generally allowed, let the combine
+;; pass utilize it.
+(define_insn_and_split "*fmasf4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 4 "fpscr_operand"))]
+  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "fmac	%1,%2,%0"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (fma:SF (match_dup 1) (match_dup 2) (match_dup 3)))
+	      (use (match_dup 4))])]
+{
+  /* Change 'b * a + a' into 'a * b + a'.
+     This is better for register allocation.  */
+  if (REGNO (operands[2]) == REGNO (operands[3]))
+    {
+      rtx tmp = operands[1];
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+}
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*fmasf4_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "fmac.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
 (define_expand "divsf3"
   [(set (match_operand:SF 0 "arith_reg_operand" "")
 	(div:SF (match_operand:SF 1 "arith_reg_operand" "")
diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
index 718134e2b90..7c7c429db3d 100644
--- a/gcc/config/sol2.c
+++ b/gcc/config/sol2.c
@@ -29,7 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"
 #include "diagnostic-core.h"
 #include "ggc.h"
-#include "hashtab.h"
+#include "hash-table.h"
 
 tree solaris_pending_aligns, solaris_pending_inits, solaris_pending_finis;
 
@@ -157,10 +157,6 @@ solaris_assemble_visibility (tree decl, int vis ATTRIBUTE_UNUSED)
 #endif
 }
 
-/* Hash table of group signature symbols.  */
-
-static htab_t solaris_comdat_htab;
-
 /* Group section information entry stored in solaris_comdat_htab.  */
 
 typedef struct comdat_entry
@@ -171,25 +167,34 @@ typedef struct comdat_entry
   const char *sig;
 } comdat_entry;
 
-/* Helper routines for maintaining solaris_comdat_htab.  */
+/* Helpers for maintaining solaris_comdat_htab.  */
 
-static hashval_t
-comdat_hash (const void *p)
+struct comdat_entry_hasher : typed_noop_remove <comdat_entry>
+{
+  typedef comdat_entry value_type;
+  typedef comdat_entry compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+  static inline void remove (value_type *);
+};
+
+inline hashval_t
+comdat_entry_hasher::hash (const value_type *entry)
 {
-  const comdat_entry *entry = (const comdat_entry *) p;
-
   return htab_hash_string (entry->sig);
 }
 
-static int
-comdat_eq (const void *p1, const void *p2)
+inline bool
+comdat_entry_hasher::equal (const value_type *entry1,
+			    const compare_type *entry2)
 {
-  const comdat_entry *entry1 = (const comdat_entry *) p1;
-  const comdat_entry *entry2 = (const comdat_entry *) p2;
-
   return strcmp (entry1->sig, entry2->sig) == 0;
 }
 
+/* Hash table of group signature symbols.  */
+
+static hash_table <comdat_entry_hasher> solaris_comdat_htab;
+
 /* Output assembly to switch to COMDAT group section NAME with attributes
    FLAGS and group signature symbol DECL, using Sun as syntax.  */
 
@@ -229,12 +234,11 @@ solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl)
      identify the missing ones without changing the affected frontents,
      remember the signature symbols and emit those not marked
      TREE_SYMBOL_REFERENCED in solaris_file_end.  */
-  if (solaris_comdat_htab == NULL)
-    solaris_comdat_htab = htab_create_alloc (37, comdat_hash, comdat_eq, NULL,
-					     xcalloc, free);
+  if (!solaris_comdat_htab.is_created ())
+    solaris_comdat_htab.create (37);
 
   entry.sig = signature;
-  slot = (comdat_entry **) htab_find_slot (solaris_comdat_htab, &entry, INSERT);
+  slot = solaris_comdat_htab.find_slot (&entry, INSERT);
 
   if (*slot == NULL)
     {
@@ -250,10 +254,11 @@ solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl)
 
 /* Define unreferenced COMDAT group signature symbol corresponding to SLOT.  */
 
-static int
-solaris_define_comdat_signature (void **slot, void *aux ATTRIBUTE_UNUSED)
+int
+solaris_define_comdat_signature (comdat_entry **slot,
+				 void *aux ATTRIBUTE_UNUSED)
 {
-  comdat_entry *entry = *(comdat_entry **) slot;
+  comdat_entry *entry = *slot;
   tree decl = entry->decl;
 
   if (TREE_CODE (decl) != IDENTIFIER_NODE)
@@ -277,10 +282,10 @@ solaris_define_comdat_signature (void **slot, void *aux ATTRIBUTE_UNUSED)
 void
 solaris_file_end (void)
 {
-  if (solaris_comdat_htab == NULL)
+  if (!solaris_comdat_htab.is_created ())
     return;
 
-  htab_traverse (solaris_comdat_htab, solaris_define_comdat_signature, NULL);
+  solaris_comdat_htab.traverse <void *, solaris_define_comdat_signature> (NULL);
 }
 
 void
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 1dc4e3600a8..d473d6fdd7f 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -11527,7 +11527,7 @@ sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
     }
 
   /* Always perform the final addition/merge within the bmask insn.  */
-  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
+  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
 }
 
 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
@@ -11766,7 +11766,7 @@ static void
 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
 		      enum machine_mode inner_mode)
 {
-  rtx t1, final_insn;
+  rtx t1, final_insn, sel;
   int bmask;
 
   t1 = gen_reg_rtx (mode);
@@ -11792,8 +11792,8 @@ vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
       gcc_unreachable ();
     }
 
-  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
-			      force_reg (SImode, GEN_INT (bmask))));
+  sel = force_reg (SImode, GEN_INT (bmask));
+  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
   emit_insn (final_insn);
 }
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 292cb205271..7f8d4250502 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -5499,7 +5499,7 @@
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f"))
 		 (float_extend:DF (match_operand:SF 2 "register_operand" "f"))))]
-  "(TARGET_V8 || TARGET_V9) && TARGET_FPU"
+  "(TARGET_V8 || TARGET_V9) && TARGET_FPU && !sparc_fix_ut699"
   "fsmuld\t%1, %2, %0"
   [(set_attr "type" "fpmul")
    (set_attr "fptype" "double")])
@@ -5528,20 +5528,37 @@
   "fdivq\t%1, %2, %0"
   [(set_attr "type" "fpdivd")])
 
-(define_insn "divdf3"
+(define_expand "divdf3"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(div:DF (match_operand:DF 1 "register_operand" "e")
 		(match_operand:DF 2 "register_operand" "e")))]
   "TARGET_FPU"
+  "")
+
+(define_insn "*divdf3_nofix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU && !sparc_fix_ut699"
   "fdivd\t%1, %2, %0"
   [(set_attr "type" "fpdivd")
    (set_attr "fptype" "double")])
 
+(define_insn "*divdf3_fix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU && sparc_fix_ut699"
+  "fdivd\t%1, %2, %0\n\tstd\t%0, [%%sp-8]"
+  [(set_attr "type" "fpdivd")
+   (set_attr "fptype" "double")
+   (set_attr "length" "2")])
+
 (define_insn "divsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(div:SF (match_operand:SF 1 "register_operand" "f")
 		(match_operand:SF 2 "register_operand" "f")))]
-  "TARGET_FPU"
+  "TARGET_FPU && !sparc_fix_ut699"
   "fdivs\t%1, %2, %0"
   [(set_attr "type" "fpdivs")])
 
@@ -5742,18 +5759,33 @@
   "fsqrtq\t%1, %0"
   [(set_attr "type" "fpsqrtd")])
 
-(define_insn "sqrtdf2"
+(define_expand "sqrtdf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
   "TARGET_FPU"
+  "")
+
+(define_insn "*sqrtdf2_nofix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && !sparc_fix_ut699"
   "fsqrtd\t%1, %0"
   [(set_attr "type" "fpsqrtd")
    (set_attr "fptype" "double")])
 
+(define_insn "*sqrtdf2_fix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && sparc_fix_ut699"
+  "fsqrtd\t%1, %0\n\tstd\t%0, [%%sp-8]"
+  [(set_attr "type" "fpsqrtd")
+   (set_attr "fptype" "double")
+   (set_attr "length" "2")])
+
 (define_insn "sqrtsf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
-  "TARGET_FPU"
+  "TARGET_FPU && !sparc_fix_ut699"
   "fsqrts\t%1, %0"
   [(set_attr "type" "fpsqrts")])
 
@@ -8557,7 +8589,7 @@
     mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4);
   sel = force_reg (SImode, gen_int_mode (mask, SImode));
 
-  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
+  emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
   emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2]));
   DONE;
 })
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index 764c652e837..3b50c6c21f9 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -201,6 +201,10 @@ Target Report RejectNegative Var(sparc_fix_at697f)
 Enable workaround for single erratum of AT697F processor
 (corresponding to erratum #13 of AT697E processor)
 
+mfix-ut699
+Target Report RejectNegative Var(sparc_fix_ut699)
+Enable workarounds for the FP errata of the UT699 processor
+
 Mask(LONG_DOUBLE_128)
 ;; Use 128-bit long double
 
diff --git a/gcc/config/t-sol2 b/gcc/config/t-sol2
index 25b825017f6..142de89de95 100644
--- a/gcc/config/t-sol2
+++ b/gcc/config/t-sol2
@@ -34,5 +34,5 @@ sol2-stubs.o: $(srcdir)/config/sol2-stubs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
 
 # Solaris-specific attributes
 sol2.o: $(srcdir)/config/sol2.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
-  tree.h output.h $(TM_H) $(TARGET_H) $(TM_P_H) $(GGC_H)
+  tree.h output.h $(TM_H) $(TARGET_H) $(TM_P_H) $(GGC_H) $(HASH_TABLE_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
author	Aldy Hernandez <aldyh@redhat.com>	2013-06-28 09:57:43 -0700
committer	Aldy Hernandez <aldyh@redhat.com>	2013-06-28 09:57:43 -0700
commit	7fb75753fa7e7c54af3b5e0aea65d8051feac55d (patch)
tree	568d89cbf5521cbb882c33a3a42fb332ff2e49b8 /gcc/config
parent	db2127098137dea6c246041e0d763a57a174fa3c (diff)
parent	2814409c2f46b5f71706f08358f395dddc9d8a81 (diff)
download	gcc-7fb75753fa7e7c54af3b5e0aea65d8051feac55d.tar.gz