merge from trunkprofile-stdlib

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/profile-stdlib@154052 138bc75d-0d04-0410-961f-82ee72b054a4
author: rus <rus@138bc75d-0d04-0410-961f-82ee72b054a4> 2009-11-09 20:58:24 +0000
committer: rus <rus@138bc75d-0d04-0410-961f-82ee72b054a4> 2009-11-09 20:58:24 +0000
commit: 7f4db7c80779ecbc57d1146654daf0acfe18de66 (patch)
tree: 3af522a3b5e149c3fd498ecb1255994daae2129a /gcc/config
parent: 611349f0ec42a37591db2cd02974a11a48d10edb (diff)
download: gcc-7f4db7c80779ecbc57d1146654daf0acfe18de66.tar.gz
144 files changed, 15397 insertions, 4774 deletions
diff --git a/gcc/config/alpha/osf.h b/gcc/config/alpha/osf.h
index 2b5165c0754..81c12aa14fc 100644
--- a/gcc/config/alpha/osf.h
+++ b/gcc/config/alpha/osf.h
@@ -167,10 +167,6 @@ __enable_execute_stack (void *addr)					\
 #define LD_INIT_SWITCH "-init"
 #define LD_FINI_SWITCH "-fini"
 
-/* The linker needs a space after "-o".  This allows -oldstyle_liblookup to
-   be passed to ld.  */
-#define SWITCHES_NEED_SPACES "o"
-
 /* Select a format to encode pointers in exception handling data.  CODE
    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
    true if the symbol may be affected by dynamic relocations.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index cd5a0ed1403..4c7fcb65854 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -133,11 +133,12 @@ static enum machine_mode arm_promote_function_mode (const_tree,
 						    const_tree, int);
 static bool arm_return_in_memory (const_tree, const_tree);
 static rtx arm_function_value (const_tree, const_tree, bool);
-static rtx arm_libcall_value (enum machine_mode, rtx);
+static rtx arm_libcall_value (enum machine_mode, const_rtx);
 
 static void arm_internal_label (FILE *, const char *, unsigned long);
 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 				 tree);
+static bool arm_have_conditional_execution (void);
 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
@@ -445,6 +446,9 @@ static const struct attribute_spec arm_attribute_table[] =
 #define TARGET_HAVE_TLS true
 #endif
 
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION
+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 
@@ -520,14 +524,11 @@ enum processor_type arm_tune = arm_none;
 /* The default processor used if not overridden by commandline.  */
 static enum processor_type arm_default_cpu = arm_none;
 
-/* Which floating point model to use.  */
-enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available.  */
-enum fputype arm_fpu_arch;
-
 /* Which floating point hardware to schedule for.  */
-enum fputype arm_fpu_tune;
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use.  */
+const struct arm_fpu_desc *arm_fpu_desc;
 
 /* Whether to use floating point hardware.  */
 enum float_abi_type arm_float_abi;
@@ -805,46 +806,21 @@ static struct arm_cpu_select arm_select[] =
 
 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
 
-struct fpu_desc
-{
-  const char * name;
-  enum fputype fpu;
-};
-
-
 /* Available values for -mfpu=.  */
 
-static const struct fpu_desc all_fpus[] =
-{
-  {"fpa",		FPUTYPE_FPA},
-  {"fpe2",		FPUTYPE_FPA_EMU2},
-  {"fpe3",		FPUTYPE_FPA_EMU2},
-  {"maverick",		FPUTYPE_MAVERICK},
-  {"vfp",		FPUTYPE_VFP},
-  {"vfp3",		FPUTYPE_VFP3},
-  {"vfpv3",		FPUTYPE_VFP3},
-  {"vfpv3-d16",		FPUTYPE_VFP3D16},
-  {"neon",		FPUTYPE_NEON},
-  {"neon-fp16",		FPUTYPE_NEON_FP16}
-};
-
-
-/* Floating point models used by the different hardware.
-   See fputype in arm.h.  */
-
-static const enum arm_fp_model fp_model_for_fpu[] =
-{
-  /* No FP hardware.  */
-  ARM_FP_MODEL_UNKNOWN,		/* FPUTYPE_NONE  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU2  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU3  */
-  ARM_FP_MODEL_MAVERICK,	/* FPUTYPE_MAVERICK  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP3D16  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP3  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_NEON  */
-  ARM_FP_MODEL_VFP		/* FPUTYPE_NEON_FP16  */
+static const struct arm_fpu_desc all_fpus[] =
+{
+  {"fpa",		ARM_FP_MODEL_FPA, 0, 0, false, false},
+  {"fpe2",		ARM_FP_MODEL_FPA, 2, 0, false, false},
+  {"fpe3",		ARM_FP_MODEL_FPA, 3, 0, false, false},
+  {"maverick",		ARM_FP_MODEL_MAVERICK, 0, 0, false, false},
+  {"vfp",		ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+  {"vfpv3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+  {"vfpv3-d16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+  {"neon",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+  {"neon-fp16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+  /* Compatibility aliases.  */
+  {"vfp3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
 };
 
 
@@ -1298,13 +1274,6 @@ arm_override_options (void)
   enum processor_type target_arch_cpu = arm_none;
   enum processor_type selected_cpu = arm_none;
 
-  /* Ideally we would want to use CFI directives to generate
-     debug info.  However this also creates the .eh_frame
-     section, so disable them until GAS can handle
-     this properly.  See PR40521. */
-  if (TARGET_AAPCS_BASED)
-    flag_dwarf2_cfi_asm = 0;
-
   /* Set up the flags based on the cpu/architecture selected by the user.  */
   for (i = ARRAY_SIZE (arm_select); i--;)
     {
@@ -1618,7 +1587,6 @@ arm_override_options (void)
   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
     error ("iwmmxt abi requires an iwmmxt capable cpu");
 
-  arm_fp_model = ARM_FP_MODEL_UNKNOWN;
   if (target_fpu_name == NULL && target_fpe_name != NULL)
     {
       if (streq (target_fpe_name, "2"))
@@ -1629,46 +1597,52 @@ arm_override_options (void)
 	error ("invalid floating point emulation option: -mfpe=%s",
 	       target_fpe_name);
     }
-  if (target_fpu_name != NULL)
-    {
-      /* The user specified a FPU.  */
-      for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
-	{
-	  if (streq (all_fpus[i].name, target_fpu_name))
-	    {
-	      arm_fpu_arch = all_fpus[i].fpu;
-	      arm_fpu_tune = arm_fpu_arch;
-	      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
-	      break;
-	    }
-	}
-      if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
-	error ("invalid floating point option: -mfpu=%s", target_fpu_name);
-    }
-  else
+
+  if (target_fpu_name == NULL)
     {
 #ifdef FPUTYPE_DEFAULT
-      /* Use the default if it is specified for this platform.  */
-      arm_fpu_arch = FPUTYPE_DEFAULT;
-      arm_fpu_tune = FPUTYPE_DEFAULT;
+      target_fpu_name = FPUTYPE_DEFAULT;
 #else
-      /* Pick one based on CPU type.  */
-      /* ??? Some targets assume FPA is the default.
-      if ((insn_flags & FL_VFP) != 0)
-	arm_fpu_arch = FPUTYPE_VFP;
-      else
-      */
       if (arm_arch_cirrus)
-	arm_fpu_arch = FPUTYPE_MAVERICK;
+	target_fpu_name = "maverick";
       else
-	arm_fpu_arch = FPUTYPE_FPA_EMU2;
+	target_fpu_name = "fpe2";
 #endif
-      if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
-	arm_fpu_tune = FPUTYPE_FPA;
+    }
+
+  arm_fpu_desc = NULL;
+  for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
+    {
+      if (streq (all_fpus[i].name, target_fpu_name))
+	{
+	  arm_fpu_desc = &all_fpus[i];
+	  break;
+	}
+    }
+  if (!arm_fpu_desc)
+    error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+
+  switch (arm_fpu_desc->model)
+    {
+    case ARM_FP_MODEL_FPA:
+      if (arm_fpu_desc->rev == 2)
+	arm_fpu_attr = FPU_FPE2;
+      else if (arm_fpu_desc->rev == 3)
+	arm_fpu_attr = FPU_FPE3;
       else
-	arm_fpu_tune = arm_fpu_arch;
-      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
-      gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
+	arm_fpu_attr = FPU_FPA;
+      break;
+
+    case ARM_FP_MODEL_MAVERICK:
+      arm_fpu_attr = FPU_MAVERICK;
+      break;
+
+    case ARM_FP_MODEL_VFP:
+      arm_fpu_attr = FPU_VFP;
+      break;
+
+    default:
+      gcc_unreachable();
     }
 
   if (target_float_abi_name != NULL)
@@ -1690,7 +1664,7 @@ arm_override_options (void)
     arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
 
   if (TARGET_AAPCS_BASED
-      && (arm_fp_model == ARM_FP_MODEL_FPA))
+      && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
     error ("FPA is unsupported in the AAPCS");
 
   if (TARGET_AAPCS_BASED)
@@ -1718,7 +1692,7 @@ arm_override_options (void)
 
   /* If soft-float is specified then don't use FPU.  */
   if (TARGET_SOFT_FLOAT)
-    arm_fpu_arch = FPUTYPE_NONE;
+    arm_fpu_attr = FPU_NONE;
 
   if (TARGET_AAPCS_BASED)
     {
@@ -1745,8 +1719,7 @@ arm_override_options (void)
   /* For arm2/3 there is no need to do any scheduling if there is only
      a floating point emulator, or we are doing software floating-point.  */
   if ((TARGET_SOFT_FLOAT
-       || arm_fpu_tune == FPUTYPE_FPA_EMU2
-       || arm_fpu_tune == FPUTYPE_FPA_EMU3)
+       || (TARGET_FPA && arm_fpu_desc->rev))
       && (tune_flags & FL_MODE32) == 0)
     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
 
@@ -1871,6 +1844,23 @@ arm_override_options (void)
         max_insns_skipped = 3;
     }
 
+  /* Hot/Cold partitioning is not currently supported, since we can't
+     handle literal pool placement in that case.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      inform (input_location,
+	      "-freorder-blocks-and-partition not supported on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
+  /* Ideally we would want to use CFI directives to generate
+     debug info.  However this also creates the .eh_frame
+     section, so disable them until GAS can handle
+     this properly.  See PR40521. */
+  if (TARGET_AAPCS_BASED)
+    flag_dwarf2_cfi_asm = 0;
+
   /* Register global variables with the garbage collector.  */
   arm_add_gc_roots ();
 }
@@ -2393,20 +2383,24 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
 			   1);
 }
 
-/* Return the number of ARM instructions required to synthesize the given
-   constant.  */
+/* Return the number of instructions required to synthesize the given
+   constant, if we start emitting them from bit-position I.  */
 static int
 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
 {
   HOST_WIDE_INT temp1;
+  int step_size = TARGET_ARM ? 2 : 1;
   int num_insns = 0;
+
+  gcc_assert (TARGET_ARM || i == 0);
+
   do
     {
       int end;
 
       if (i <= 0)
 	i += 32;
-      if (remainder & (3 << (i - 2)))
+      if (remainder & (((1 << step_size) - 1) << (i - step_size)))
 	{
 	  end = i - 8;
 	  if (end < 0)
@@ -2415,13 +2409,77 @@ count_insns_for_constant (HOST_WIDE_INT remainder, int i)
 				    | ((i < end) ? (0xff >> (32 - end)) : 0));
 	  remainder &= ~temp1;
 	  num_insns++;
-	  i -= 6;
+	  i -= 8 - step_size;
 	}
-      i -= 2;
+      i -= step_size;
     } while (remainder);
   return num_insns;
 }
 
+static int
+find_best_start (unsigned HOST_WIDE_INT remainder)
+{
+  int best_consecutive_zeros = 0;
+  int i;
+  int best_start = 0;
+
+  /* If we aren't targetting ARM, the best place to start is always at
+     the bottom.  */
+  if (! TARGET_ARM)
+    return 0;
+
+  for (i = 0; i < 32; i += 2)
+    {
+      int consecutive_zeros = 0;
+
+      if (!(remainder & (3 << i)))
+	{
+	  while ((i < 32) && !(remainder & (3 << i)))
+	    {
+	      consecutive_zeros += 2;
+	      i += 2;
+	    }
+	  if (consecutive_zeros > best_consecutive_zeros)
+	    {
+	      best_consecutive_zeros = consecutive_zeros;
+	      best_start = i - consecutive_zeros;
+	    }
+	  i -= 2;
+	}
+    }
+
+  /* So long as it won't require any more insns to do so, it's
+     desirable to emit a small constant (in bits 0...9) in the last
+     insn.  This way there is more chance that it can be combined with
+     a later addressing insn to form a pre-indexed load or store
+     operation.  Consider:
+
+	   *((volatile int *)0xe0000100) = 1;
+	   *((volatile int *)0xe0000110) = 2;
+
+     We want this to wind up as:
+
+	    mov rA, #0xe0000000
+	    mov rB, #1
+	    str rB, [rA, #0x100]
+	    mov rB, #2
+	    str rB, [rA, #0x110]
+
+     rather than having to synthesize both large constants from scratch.
+
+     Therefore, we calculate how many insns would be required to emit
+     the constant starting from `best_start', and also starting from
+     zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
+     yield a shorter sequence, we may as well use zero.  */
+  if (best_start != 0
+      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
+      && (count_insns_for_constant (remainder, 0) <=
+	  count_insns_for_constant (remainder, best_start)))
+    best_start = 0;
+
+  return best_start;
+}
+
 /* Emit an instruction with the indicated PATTERN.  If COND is
    non-NULL, conditionalize the execution of the instruction on COND
    being true.  */
@@ -2445,6 +2503,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
 {
   int can_invert = 0;
   int can_negate = 0;
+  int final_invert = 0;
   int can_negate_initial = 0;
   int can_shift = 0;
   int i;
@@ -2456,6 +2515,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
   int insns = 0;
   unsigned HOST_WIDE_INT temp1, temp2;
   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+  int step_size = TARGET_ARM ? 2 : 1;
 
   /* Find out which operations are safe for a given CODE.  Also do a quick
      check for degenerate cases; these can occur when DImode operations
@@ -2529,14 +2589,15 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
 	  return 1;
 	}
 
-      /* We don't know how to handle other cases yet.  */
-      gcc_assert (remainder == 0xffffffff);
-
-      if (generate)
-	emit_constant_insn (cond,
-			    gen_rtx_SET (VOIDmode, target,
-					 gen_rtx_NOT (mode, source)));
-      return 1;
+      if (remainder == 0xffffffff)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     gen_rtx_NOT (mode, source)));
+	  return 1;
+	}
+      break;
 
     case MINUS:
       /* We treat MINUS as (val - source), since (source - val) is always
@@ -2987,9 +3048,25 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
 
   if ((code == AND)
       || (code != IOR && can_invert && num_bits_set > 16))
-    remainder = (~remainder) & 0xffffffff;
+    remainder ^= 0xffffffff;
   else if (code == PLUS && num_bits_set > 16)
     remainder = (-remainder) & 0xffffffff;
+
+  /* For XOR, if more than half the bits are set and there's a sequence
+     of more than 8 consecutive ones in the pattern then we can XOR by the
+     inverted constant and then invert the final result; this may save an
+     instruction and might also lead to the final mvn being merged with
+     some other operation.  */
+  else if (code == XOR && num_bits_set > 16
+	   && (count_insns_for_constant (remainder ^ 0xffffffff,
+					 find_best_start
+					 (remainder ^ 0xffffffff))
+	       < count_insns_for_constant (remainder,
+					   find_best_start (remainder))))
+    {
+      remainder ^= 0xffffffff;
+      final_invert = 1;
+    }
   else
     {
       can_invert = 0;
@@ -3008,63 +3085,8 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
   /* ??? Use thumb2 replicated constants when the high and low halfwords are
      the same.  */
   {
-    int best_start = 0;
-    if (!TARGET_THUMB2)
-      {
-	int best_consecutive_zeros = 0;
-
-	for (i = 0; i < 32; i += 2)
-	  {
-	    int consecutive_zeros = 0;
-
-	    if (!(remainder & (3 << i)))
-	      {
-		while ((i < 32) && !(remainder & (3 << i)))
-		  {
-		    consecutive_zeros += 2;
-		    i += 2;
-		  }
-		if (consecutive_zeros > best_consecutive_zeros)
-		  {
-		    best_consecutive_zeros = consecutive_zeros;
-		    best_start = i - consecutive_zeros;
-		  }
-		i -= 2;
-	      }
-	  }
-
-	/* So long as it won't require any more insns to do so, it's
-	   desirable to emit a small constant (in bits 0...9) in the last
-	   insn.  This way there is more chance that it can be combined with
-	   a later addressing insn to form a pre-indexed load or store
-	   operation.  Consider:
-
-		   *((volatile int *)0xe0000100) = 1;
-		   *((volatile int *)0xe0000110) = 2;
-
-	   We want this to wind up as:
-
-		    mov rA, #0xe0000000
-		    mov rB, #1
-		    str rB, [rA, #0x100]
-		    mov rB, #2
-		    str rB, [rA, #0x110]
-
-	   rather than having to synthesize both large constants from scratch.
-
-	   Therefore, we calculate how many insns would be required to emit
-	   the constant starting from `best_start', and also starting from
-	   zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
-	   yield a shorter sequence, we may as well use zero.  */
-	if (best_start != 0
-	    && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
-	    && (count_insns_for_constant (remainder, 0) <=
-		count_insns_for_constant (remainder, best_start)))
-	  best_start = 0;
-      }
-
     /* Now start emitting the insns.  */
-    i = best_start;
+    i = find_best_start (remainder);
     do
       {
 	int end;
@@ -3092,7 +3114,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
 		  }
 		else
 		  {
-		    if (remainder && subtargets)
+		    if ((final_invert || remainder) && subtargets)
 		      new_src = gen_reg_rtx (mode);
 		    else
 		      new_src = target;
@@ -3127,21 +3149,23 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
 	      code = PLUS;
 
 	    insns++;
-	    if (TARGET_ARM)
-	      i -= 6;
-	    else
-	      i -= 7;
+	    i -= 8 - step_size;
 	  }
 	/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
 	   shifts.  */
-	if (TARGET_ARM)
-	  i -= 2;
-	else
-	  i--;
+	i -= step_size;
       }
     while (remainder);
   }
 
+  if (final_invert)
+    {
+      if (generate)
+	emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+					       gen_rtx_NOT (mode, source)));
+      insns++;
+    }
+
   return insns;
 }
 
@@ -3264,7 +3288,7 @@ add_libcall (htab_t htab, rtx libcall)
 }
 
 static bool
-arm_libcall_uses_aapcs_base (rtx libcall)
+arm_libcall_uses_aapcs_base (const_rtx libcall)
 {
   static bool init_done = false;
   static htab_t libcall_htab;
@@ -3311,7 +3335,7 @@ arm_libcall_uses_aapcs_base (rtx libcall)
 }
 
 rtx
-arm_libcall_value (enum machine_mode mode, rtx libcall)
+arm_libcall_value (enum machine_mode mode, const_rtx libcall)
 {
   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
       && GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -6201,7 +6225,7 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
       else if ((outer == PLUS || outer == COMPARE)
 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
 	return 0;
-      else if (outer == AND
+      else if ((outer == IOR || outer == XOR || outer == AND)
 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
 	return COSTS_N_INSNS (1);
       else if (outer == ASHIFT || outer == ASHIFTRT
@@ -12269,7 +12293,7 @@ output_move_neon (rtx *operands)
 	  {
 	    /* We're only using DImode here because it's a convenient size.  */
 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-	    ops[1] = adjust_address (mem, SImode, 8 * i);
+	    ops[1] = adjust_address (mem, DImode, 8 * i);
 	    if (reg_overlap_mentioned_p (ops[0], mem))
 	      {
 		gcc_assert (overlap == -1);
@@ -13257,7 +13281,7 @@ arm_output_epilogue (rtx sibling)
       /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
       int vfp_offset = offsets->frame;
 
-      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+      if (TARGET_FPA_EMU2)
 	{
 	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -13480,7 +13504,7 @@ arm_output_epilogue (rtx sibling)
 			 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
 	}
 
-      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+      if (TARGET_FPA_EMU2)
 	{
 	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -14206,7 +14230,7 @@ arm_save_coproc_regs(void)
 
   /* Save any floating point call-saved registers used by this
      function.  */
-  if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+  if (TARGET_FPA_EMU2)
     {
       for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
 	if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -19688,45 +19712,8 @@ arm_file_start (void)
 	}
       else
 	{
-	  int set_float_abi_attributes = 0;
-	  switch (arm_fpu_arch)
-	    {
-	    case FPUTYPE_FPA:
-	      fpu_name = "fpa";
-	      break;
-	    case FPUTYPE_FPA_EMU2:
-	      fpu_name = "fpe2";
-	      break;
-	    case FPUTYPE_FPA_EMU3:
-	      fpu_name = "fpe3";
-	      break;
-	    case FPUTYPE_MAVERICK:
-	      fpu_name = "maverick";
-	      break;
-	    case FPUTYPE_VFP:
-	      fpu_name = "vfp";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_VFP3D16:
-	      fpu_name = "vfpv3-d16";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_VFP3:
-	      fpu_name = "vfpv3";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_NEON:
-	      fpu_name = "neon";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_NEON_FP16:
-	      fpu_name = "neon-fp16";
-	      set_float_abi_attributes = 1;
-	      break;
-	    default:
-	      abort();
-	    }
-	  if (set_float_abi_attributes)
+	  fpu_name = arm_fpu_desc->name;
+	  if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
 	    {
 	      if (TARGET_HARD_FLOAT)
 		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
@@ -21173,4 +21160,12 @@ arm_frame_pointer_required (void)
           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
 }
 
+/* Only thumb1 can't support conditional execution, so return true if
+   the target is not thumb1.  */
+static bool
+arm_have_conditional_execution (void)
+{
+  return !TARGET_THUMB1;
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 9272ca51cba..2dfd22df45c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -190,9 +190,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_HARD_FLOAT		(arm_float_abi != ARM_FLOAT_ABI_SOFT)
 /* Use hardware floating point calling convention.  */
 #define TARGET_HARD_FLOAT_ABI		(arm_float_abi == ARM_FLOAT_ABI_HARD)
-#define TARGET_FPA			(arm_fp_model == ARM_FP_MODEL_FPA)
-#define TARGET_MAVERICK			(arm_fp_model == ARM_FP_MODEL_MAVERICK)
-#define TARGET_VFP			(arm_fp_model == ARM_FP_MODEL_VFP)
+#define TARGET_FPA		(arm_fpu_desc->model == ARM_FP_MODEL_FPA)
+#define TARGET_MAVERICK		(arm_fpu_desc->model == ARM_FP_MODEL_MAVERICK)
+#define TARGET_VFP		(arm_fpu_desc->model == ARM_FP_MODEL_VFP)
 #define TARGET_IWMMXT			(arm_arch_iwmmxt)
 #define TARGET_REALLY_IWMMXT		(TARGET_IWMMXT && TARGET_32BIT)
 #define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
@@ -216,6 +216,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_THUMB2			(TARGET_THUMB && arm_arch_thumb2)
 /* Thumb-1 only.  */
 #define TARGET_THUMB1_ONLY		(TARGET_THUMB1 && !arm_arch_notm)
+/* FPA emulator without LFM.  */
+#define TARGET_FPA_EMU2			(TARGET_FPA && arm_fpu_desc->rev == 2)
 
 /* The following two macros concern the ability to execute coprocessor
    instructions for VFPv3 or NEON.  TARGET_VFP3/TARGET_VFPD32 are currently
@@ -223,27 +225,21 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
    to be more careful with TARGET_NEON as noted below.  */
 
 /* FPU is has the full VFPv3/NEON register file of 32 D registers.  */
-#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \
-		       && (arm_fpu_arch == FPUTYPE_VFP3 \
-			   || arm_fpu_arch == FPUTYPE_NEON \
-			   || arm_fpu_arch == FPUTYPE_NEON_FP16))
+#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32)
 
 /* FPU supports VFPv3 instructions.  */
-#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \
-		     && (arm_fpu_arch == FPUTYPE_VFP3D16 \
-			 || TARGET_VFPD32))
+#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
 
 /* FPU supports NEON/VFP half-precision floating-point.  */
-#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16)
+#define TARGET_NEON_FP16 \
+  (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16)
 
 /* FPU supports Neon instructions.  The setting of this macro gets
    revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT
    and TARGET_HARD_FLOAT to ensure that NEON instructions are
    available.  */
 #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \
-		     && arm_fp_model == ARM_FP_MODEL_VFP \
-		     && (arm_fpu_arch == FPUTYPE_NEON \
-			 || arm_fpu_arch == FPUTYPE_NEON_FP16))
+		     && TARGET_VFP && arm_fpu_desc->neon)
 
 /* "DSP" multiply instructions, eg. SMULxy.  */
 #define TARGET_DSP_MULTIPLY \
@@ -300,42 +296,25 @@ enum arm_fp_model
   ARM_FP_MODEL_VFP
 };
 
-extern enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available.  Also update
-   fp_model_for_fpu in arm.c when adding entries to this list.  */
-enum fputype
+enum vfp_reg_type
 {
-  /* No FP hardware.  */
-  FPUTYPE_NONE,
-  /* Full FPA support.  */
-  FPUTYPE_FPA,
-  /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM).  */
-  FPUTYPE_FPA_EMU2,
-  /* Emulated FPA hardware, Issue 3 emulator.  */
-  FPUTYPE_FPA_EMU3,
-  /* Cirrus Maverick floating point co-processor.  */
-  FPUTYPE_MAVERICK,
-  /* VFP.  */
-  FPUTYPE_VFP,
-  /* VFPv3-D16.  */
-  FPUTYPE_VFP3D16,
-  /* VFPv3.  */
-  FPUTYPE_VFP3,
-  /* Neon.  */
-  FPUTYPE_NEON,
-  /* Neon with half-precision float extensions.  */
-  FPUTYPE_NEON_FP16
+  VFP_REG_D16,
+  VFP_REG_D32,
+  VFP_REG_SINGLE
 };
 
-/* Recast the floating point class to be the floating point attribute.  */
-#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune)
-
-/* What type of floating point to tune for */
-extern enum fputype arm_fpu_tune;
-
-/* What type of floating point instructions are available */
-extern enum fputype arm_fpu_arch;
+extern const struct arm_fpu_desc
+{
+  const char *name;
+  enum arm_fp_model model;
+  int rev;
+  enum vfp_reg_type regs;
+  int neon;
+  int fp16;
+} *arm_fpu_desc;
+
+/* Which floating point hardware to schedule for.  */
+extern int arm_fpu_attr;
 
 enum float_abi_type
 {
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index e180c2f08f1..52edcbaa17b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -160,7 +160,7 @@
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16"
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
   (const (symbol_ref "arm_fpu_attr")))
 
 ; LENGTH of an instruction (in bytes)
@@ -392,6 +392,9 @@
 ; registers.
 (define_mode_iterator ANY64 [DI DF V8QI V4HI V2SI V2SF])
 
+;; The integer modes up to word size
+(define_mode_iterator QHSI [QI HI SI])
+
 ;;---------------------------------------------------------------------------
 ;; Predicates
 
@@ -1914,7 +1917,16 @@
   else /* TARGET_THUMB1 */
     {
       if (GET_CODE (operands[2]) != CONST_INT)
-        operands[2] = force_reg (SImode, operands[2]);
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
       else
         {
           int i;
@@ -2623,7 +2635,16 @@
           DONE;
 	}
       else /* TARGET_THUMB1 */
-	operands [2] = force_reg (SImode, operands [2]);
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
     }
   "
 )
@@ -2731,12 +2752,29 @@
 (define_expand "xorsi3"
   [(set (match_operand:SI         0 "s_register_operand" "")
 	(xor:SI (match_operand:SI 1 "s_register_operand" "")
-		(match_operand:SI 2 "arm_rhs_operand"  "")))]
+		(match_operand:SI 2 "reg_or_int_operand" "")))]
   "TARGET_EITHER"
-  "if (TARGET_THUMB1)
-     if (GET_CODE (operands[2]) == CONST_INT)
-       operands[2] = force_reg (SImode, operands[2]);
-  "
+  "if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (TARGET_32BIT)
+        {
+          arm_split_constant (XOR, SImode, NULL_RTX,
+	                      INTVAL (operands[2]), operands[0], operands[1],
+			      optimize && can_create_pseudo_p ());
+          DONE;
+	}
+      else /* TARGET_THUMB1 */
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
+    }"
 )
 
 (define_insn "*arm_xorsi3"
@@ -5813,6 +5851,11 @@
 	{
 	  rtx reg = gen_reg_rtx (SImode);
 
+	  /* For thumb we want an unsigned immediate, then we are more likely 
+	     to be able to use a movs insn.  */
+	  if (TARGET_THUMB)
+	    operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
+
 	  emit_insn (gen_movsi (reg, operands[1]));
 	  operands[1] = gen_lowpart (QImode, reg);
 	}
@@ -6727,6 +6770,7 @@
 		(const_int 6)
 		(const_int 8))))]
 )
+
 (define_insn "*movsi_cbranchsi4"
   [(set (pc)
 	(if_then_else
@@ -6790,6 +6834,45 @@
 	   (const_int 10)))))]
 )
 
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(match_operand:SI 1 "low_register_operand" ""))
+   (set (pc)
+	(if_then_else (match_operator 2 "arm_comparison_operator"
+		       [(match_dup 1) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_THUMB1"
+  [(parallel
+    [(set (pc)
+	(if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))
+     (set (match_dup 0) (match_dup 1))])]
+  ""
+)
+
+;; Sigh!  This variant shouldn't be needed, but combine often fails to
+;; merge cases like this because the op1 is a hard register in
+;; CLASS_LIKELY_SPILLED_P.
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(match_operand:SI 1 "low_register_operand" ""))
+   (set (pc)
+	(if_then_else (match_operator 2 "arm_comparison_operator"
+		       [(match_dup 0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_THUMB1"
+  [(parallel
+    [(set (pc)
+	(if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))
+     (set (match_dup 0) (match_dup 1))])]
+  ""
+)
+
 (define_insn "*negated_cbranchsi4"
   [(set (pc)
 	(if_then_else
@@ -8033,15 +8116,13 @@
       if (!thumb1_cmp_operand (op3, SImode))
         op3 = force_reg (SImode, op3);
       scratch = gen_reg_rtx (SImode);
-      emit_insn (gen_cstoresi_nltu_thumb1 (scratch, operands[2], op3));
-      emit_insn (gen_negsi2 (operands[0], scratch));
+      emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], operands[2], op3));
       break;
 
     case GTU:
       op3 = force_reg (SImode, operands[3]);
       scratch = gen_reg_rtx (SImode);
-      emit_insn (gen_cstoresi_nltu_thumb1 (scratch, op3, operands[2]));
-      emit_insn (gen_negsi2 (operands[0], scratch));
+      emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], op3, operands[2]));
       break;
 
     /* No good sequences for GT, LT.  */
@@ -8125,6 +8206,7 @@
   [(set_attr "length" "4")]
 )
 
+;; Used as part of the expansion of thumb ltu and gtu sequences
 (define_insn "cstoresi_nltu_thumb1"
   [(set (match_operand:SI 0 "s_register_operand" "=l,l")
         (neg:SI (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
@@ -8134,6 +8216,20 @@
   [(set_attr "length" "4")]
 )
 
+(define_insn_and_split "cstoresi_ltu_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l")
+        (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+		(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1"
+  [(set (match_dup 3)
+	(neg:SI (ltu:SI (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (neg:SI (match_dup 3)))]
+  "operands[3] = gen_reg_rtx (SImode);"
+  [(set_attr "length" "4")]
+)
+
 ;; Used as part of the expansion of thumb les sequence.
 (define_insn "thumb1_addsi3_addgeu"
   [(set (match_operand:SI 0 "s_register_operand" "=l")
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index faaaf7bca39..ccfc7426077 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -61,7 +61,7 @@ typedef __builtin_neon_uhi uint16x8_t	__attribute__ ((__vector_size__ (16)));
 typedef __builtin_neon_usi uint32x4_t	__attribute__ ((__vector_size__ (16)));
 typedef __builtin_neon_udi uint64x2_t	__attribute__ ((__vector_size__ (16)));
 
-typedef __builtin_neon_sf float32_t;
+typedef float float32_t;
 typedef __builtin_neon_poly8 poly8_t;
 typedef __builtin_neon_poly16 poly16_t;
 
@@ -5085,7 +5085,7 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
 {
-  return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c);
+  return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5151,7 +5151,7 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
 {
-  return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c);
+  return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5283,7 +5283,7 @@ vdup_n_s32 (int32_t __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vdup_n_f32 (float32_t __a)
 {
-  return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5349,7 +5349,7 @@ vdupq_n_s32 (int32_t __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vdupq_n_f32 (float32_t __a)
 {
-  return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5415,7 +5415,7 @@ vmov_n_s32 (int32_t __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmov_n_f32 (float32_t __a)
 {
-  return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5481,7 +5481,7 @@ vmovq_n_s32 (int32_t __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmovq_n_f32 (float32_t __a)
 {
-  return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -6591,7 +6591,7 @@ vmul_n_s32 (int32x2_t __a, int32_t __b)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmul_n_f32 (float32x2_t __a, float32_t __b)
 {
-  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3);
+  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6621,7 +6621,7 @@ vmulq_n_s32 (int32x4_t __a, int32_t __b)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmulq_n_f32 (float32x4_t __a, float32_t __b)
 {
-  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3);
+  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6735,7 +6735,7 @@ vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3);
+  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6765,7 +6765,7 @@ vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3);
+  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6831,7 +6831,7 @@ vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3);
+  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6861,7 +6861,7 @@ vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3);
+  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -7851,7 +7851,7 @@ vld1_s64 (const int64_t * __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_f32 (const float32_t * __a)
 {
-  return (float32x2_t)__builtin_neon_vld1v2sf (__a);
+  return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -7917,7 +7917,7 @@ vld1q_s64 (const int64_t * __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_f32 (const float32_t * __a)
 {
-  return (float32x4_t)__builtin_neon_vld1v4sf (__a);
+  return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -7977,7 +7977,7 @@ vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
 {
-  return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c);
+  return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8043,7 +8043,7 @@ vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
 {
-  return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c);
+  return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8109,7 +8109,7 @@ vld1_dup_s32 (const int32_t * __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_dup_f32 (const float32_t * __a)
 {
-  return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a);
+  return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8175,7 +8175,7 @@ vld1q_dup_s32 (const int32_t * __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_dup_f32 (const float32_t * __a)
 {
-  return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a);
+  return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8247,7 +8247,7 @@ vst1_s64 (int64_t * __a, int64x1_t __b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_f32 (float32_t * __a, float32x2_t __b)
 {
-  __builtin_neon_vst1v2sf (__a, __b);
+  __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8313,7 +8313,7 @@ vst1q_s64 (int64_t * __a, int64x2_t __b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_f32 (float32_t * __a, float32x4_t __b)
 {
-  __builtin_neon_vst1v4sf (__a, __b);
+  __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8373,7 +8373,7 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
 {
-  __builtin_neon_vst1_lanev2sf (__a, __b, __c);
+  __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8439,7 +8439,7 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
 {
-  __builtin_neon_vst1_lanev4sf (__a, __b, __c);
+  __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8512,7 +8512,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vld2_f32 (const float32_t * __a)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2v2sf (__a);
+  __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -8600,7 +8600,7 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vld2q_f32 (const float32_t * __a)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld2v4sf (__a);
+  __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -8676,7 +8676,7 @@ vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -8748,7 +8748,7 @@ vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -8807,7 +8807,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vld2_dup_f32 (const float32_t * __a)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -8892,7 +8892,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2_f32 (float32_t * __a, float32x2x2_t __b)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
-  __builtin_neon_vst2v2sf (__a, __bu.__o);
+  __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8969,7 +8969,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2q_f32 (float32_t * __a, float32x4x2_t __b)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst2v4sf (__a, __bu.__o);
+  __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9032,7 +9032,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
-  __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9088,7 +9088,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9140,7 +9140,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
 vld3_f32 (const float32_t * __a)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3v2sf (__a);
+  __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9228,7 +9228,7 @@ __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
 vld3q_f32 (const float32_t * __a)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
-  __rv.__o = __builtin_neon_vld3v4sf (__a);
+  __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9304,7 +9304,7 @@ vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -9376,7 +9376,7 @@ vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -9435,7 +9435,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
 vld3_dup_f32 (const float32_t * __a)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9520,7 +9520,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3_f32 (float32_t * __a, float32x2x3_t __b)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
-  __builtin_neon_vst3v2sf (__a, __bu.__o);
+  __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9597,7 +9597,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3q_f32 (float32_t * __a, float32x4x3_t __b)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
-  __builtin_neon_vst3v4sf (__a, __bu.__o);
+  __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9660,7 +9660,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
-  __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9716,7 +9716,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
-  __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9768,7 +9768,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
 vld4_f32 (const float32_t * __a)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4v2sf (__a);
+  __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9856,7 +9856,7 @@ __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
 vld4q_f32 (const float32_t * __a)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4v4sf (__a);
+  __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9932,7 +9932,7 @@ vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -10004,7 +10004,7 @@ vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -10063,7 +10063,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
 vld4_dup_f32 (const float32_t * __a)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -10148,7 +10148,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4_f32 (float32_t * __a, float32x2x4_t __b)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst4v2sf (__a, __bu.__o);
+  __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10225,7 +10225,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4q_f32 (float32_t * __a, float32x4x4_t __b)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
-  __builtin_neon_vst4v4sf (__a, __bu.__o);
+  __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10288,7 +10288,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10344,7 +10344,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
-  __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index bc0c62f401e..ba206022b75 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -30,7 +30,7 @@
 
 /* Section 4.1 of the AAPCS requires the use of VFP format.  */
 #undef  FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
 
 /* TARGET_BIG_ENDIAN_DEFAULT is set in
    config.gcc for big endian configurations.  */
@@ -53,6 +53,8 @@
 
 #define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}"
 
+#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a8|mcpu=cortex-a9:%{!r:--be8}}}"
+
 /* Tell the assembler to build BPABI binaries.  */
 #undef  SUBTARGET_EXTRA_ASM_SPEC
 #define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
@@ -65,7 +67,7 @@
 #define BPABI_LINK_SPEC \
   "%{mbig-endian:-EB} %{mlittle-endian:-EL} "		\
   "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} "	\
-  "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC
+  "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC
 
 #undef  LINK_SPEC
 #define LINK_SPEC BPABI_LINK_SPEC
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
index 121fd2da747..d1ad7cba767 100644
--- a/gcc/config/arm/cortex-a9.md
+++ b/gcc/config/arm/cortex-a9.md
@@ -1,6 +1,8 @@
-;; ARM Cortex-A9 VFP pipeline description
-;; Copyright (C) 2008 Free Software Foundation, Inc.
-;; Written by CodeSourcery.
+;; ARM Cortex-A9 pipeline description
+;; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;; Originally written by CodeSourcery for VFP.
+;;
+;; Integer core pipeline description contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -20,9 +22,181 @@
 
 (define_automaton "cortex_a9")
 
-;; FIXME: We model a single pipeline for all instructions.
-;; Is dual-issue possible, and do we have other pipelines?
-(define_cpu_unit "cortex_a9_vfp" "cortex_a9")
+;; The Cortex-A9 integer core is modelled as a dual issue pipeline that has
+;; the following components.
+;; 1. 1 Load Store Pipeline.
+;; 2. P0 / main pipeline for data processing instructions.
+;; 3. P1 / Dual pipeline for Data processing instructions.
+;; 4. MAC pipeline for multiply as well as multiply
+;;    and accumulate instructions.
+;; 5. 1 VFP / Neon pipeline.
+;; The Load/Store and VFP/Neon pipeline are multiplexed.
+;; The P0 / main pipeline and M1 stage of the MAC pipeline are
+;;   multiplexed.
+;; The P1 / dual pipeline and M2 stage of the MAC pipeline are
+;;   multiplexed.
+;; There are only 4 register read ports and hence at any point of
+;; time we can't have issue down the E1 and the E2 ports unless
+;; of course there are bypass paths that get exercised.
+;; Both P0 and P1 have 2 stages E1 and E2.
+;; Data processing instructions issue to E1 or E2 depending on
+;; whether they have an early shift or not.
+
+
+(define_cpu_unit "cortex_a9_vfp, cortex_a9_ls" "cortex_a9")
+(define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")
+(define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")
+(define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")
+(define_cpu_unit "cortex_a9_mac_m1, cortex_a9_mac_m2" "cortex_a9")
+(define_cpu_unit "cortex_a9_branch, cortex_a9_issue_branch" "cortex_a9")
+
+(define_reservation "cortex_a9_p0_default" "cortex_a9_p0_e2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_p1_default" "cortex_a9_p1_e2, cortex_a9_p1_wb")
+(define_reservation "cortex_a9_p0_shift" "cortex_a9_p0_e1, cortex_a9_p0_default")
+(define_reservation "cortex_a9_p1_shift" "cortex_a9_p1_e1, cortex_a9_p1_default")
+
+(define_reservation "cortex_a9_multcycle1"
+  "cortex_a9_p0_e2 + cortex_a9_mac_m1 + cortex_a9_mac_m2 + \
+cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
+
+(define_reservation "cortex_a9_mult16"
+  "cortex_a9_mac_m1, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mac16"
+  "cortex_a9_multcycle1, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mult"
+  "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mac"
+  "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
+
+
+;; Issue at the same time along the load store pipeline and
+;; the VFP / Neon pipeline is not possible.
+;; FIXME:: At some point we need to model the issue
+;; of the load store and the vfp being shared rather than anything else.
+
+(exclusion_set "cortex_a9_ls" "cortex_a9_vfp")
+
+
+;; Default data processing instruction without any shift
+;; The only exception to this is the mov instruction
+;; which can go down E2 without any problem.
+(define_insn_reservation "cortex_a9_dp" 2
+  (and (eq_attr "tune" "cortexa9")
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "alu_shift_reg, alu_shift")
+		 (eq_attr "insn" "mov"))))
+  "cortex_a9_p0_default|cortex_a9_p1_default")
+
+;; An instruction using the shifter will go down E1.
+(define_insn_reservation "cortex_a9_dp_shift" 3
+   (and (eq_attr "tune" "cortexa9")
+	(and (eq_attr "type" "alu_shift_reg, alu_shift")
+	     (not (eq_attr "insn" "mov"))))
+   "cortex_a9_p0_shift | cortex_a9_p1_shift")
+
+;; Loads have a latency of 4 cycles.
+;; We don't model autoincrement instructions. These
+;; instructions use the load store pipeline and 1 of
+;; the E2 units to write back the result of the increment.
+
+(define_insn_reservation "cortex_a9_load1_2" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "load1, load2, load_byte"))
+  "cortex_a9_ls")
+
+;; Loads multiples and store multiples can't be issued for 2 cycles in a
+;; row. The description below assumes that addresses are 64 bit aligned.
+;; If not, there is an extra cycle latency which is not modelled.
+
+;; FIXME:: This bit might need to be reworked when we get to
+;; tuning for the VFP because strictly speaking the ldm
+;; is sent to the LSU unit as is and there is only an
+;; issue restriction between the LSU and the VFP/ Neon unit.
+
+(define_insn_reservation "cortex_a9_load3_4" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "load3, load4"))
+  "cortex_a9_ls, cortex_a9_ls")
+
+(define_insn_reservation "cortex_a9_store1_2" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "store1, store2"))
+  "cortex_a9_ls")
+
+;; Almost all our store multiples use an auto-increment
+;; form. Don't issue back to back load and store multiples
+;; because the load store unit will stall.
+(define_insn_reservation "cortex_a9_store3_4" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "store3, store4"))
+  "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls")
+
+;; We get 16*16 multiply / mac results in 3 cycles.
+(define_insn_reservation "cortex_a9_mult16" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "smulxy"))
+       "cortex_a9_mult16")
+
+;; The 16*16 mac is slightly different that it
+;; reserves M1 and M2 in the same cycle.
+(define_insn_reservation "cortex_a9_mac16" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "smlaxy"))
+  "cortex_a9_mac16")
+
+
+(define_insn_reservation "cortex_a9_multiply" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "mul"))
+       "cortex_a9_mult")
+
+(define_insn_reservation "cortex_a9_mac" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "mla"))
+       "cortex_a9_mac")
+
+;; An instruction with a result in E2 can be forwarded
+;; to E2 or E1 or M1 or the load store unit in the next cycle.
+
+(define_bypass 1 "cortex_a9_dp"
+                 "cortex_a9_dp_shift, cortex_a9_multiply,
+ cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+
+(define_bypass 2 "cortex_a9_dp_shift"
+                 "cortex_a9_dp_shift, cortex_a9_multiply,
+ cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+
+;; An instruction in the load store pipeline can provide
+;; read access to a DP instruction in the P0 default pipeline
+;; before the writeback stage.
+
+(define_bypass 3 "cortex_a9_load1_2" "cortex_a9_dp, cortex_a9_load1_2,
+cortex_a9_store3_4, cortex_a9_store1_2")
+
+(define_bypass 4 "cortex_a9_load3_4" "cortex_a9_dp, cortex_a9_load1_2,
+cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
+
+;; Calls and branches.
+
+;; Branch instructions
+
+(define_insn_reservation "cortex_a9_branch" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "branch"))
+  "cortex_a9_branch")
+
+;; Call latencies are essentially 0 but make sure
+;; dual issue doesn't happen i.e the next instruction
+;; starts at the next cycle.
+(define_insn_reservation "cortex_a9_call"  0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "call"))
+  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + cortex_a9_vfp")
+
+
+;; Pipelining for VFP instructions.
 
 (define_insn_reservation "cortex_a9_ffarith" 1
  (and (eq_attr "tune" "cortexa9")
diff --git a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md
index fcd92b002d7..515de43d28b 100644
--- a/gcc/config/arm/fpa.md
+++ b/gcc/config/arm/fpa.md
@@ -599,10 +599,10 @@
     {
     default:
     case 0: return \"mvf%?e\\t%0, %1\";
-    case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+    case 1: if (TARGET_FPA_EMU2)
 	      return \"ldf%?e\\t%0, %1\";
 	    return \"lfm%?\\t%0, 1, %1\";
-    case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+    case 2: if (TARGET_FPA_EMU2)
 	      return \"stf%?e\\t%1, %0\";
 	    return \"sfm%?\\t%1, 1, %0\";
     }
diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h
index 780a504add2..fce1ed165d3 100644
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -66,7 +66,7 @@
 /* At this point, bpabi.h will have clobbered LINK_SPEC.  We want to
    use the GNU/Linux version, not the generic BPABI version.  */
 #undef  LINK_SPEC
-#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC BE8_LINK_SPEC
 
 /* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we
    do not use -lfloat.  */
diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h
index 07455ee87fd..9fdca414e8e 100644
--- a/gcc/config/arm/linux-elf.h
+++ b/gcc/config/arm/linux-elf.h
@@ -98,7 +98,7 @@
 
 /* NWFPE always understands FPA instructions.  */
 #undef  FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3
+#define FPUTYPE_DEFAULT "fpe3"
 
 /* Call the function profiler with a given profile label.  */
 #undef  ARM_FUNCTION_PROFILER
diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml
index 9c8e2a89b86..112c8be6e3b 100644
--- a/gcc/config/arm/neon-gen.ml
+++ b/gcc/config/arm/neon-gen.ml
@@ -122,6 +122,7 @@ let rec signed_ctype = function
   | T_uint16 | T_int16 -> T_intHI
   | T_uint32 | T_int32 -> T_intSI
   | T_uint64 | T_int64 -> T_intDI
+  | T_float32 -> T_floatSF
   | T_poly8 -> T_intQI
   | T_poly16 -> T_intHI
   | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
@@ -320,7 +321,7 @@ let deftypes () =
     typeinfo;
   Format.print_newline ();
   (* Extra types not in <stdint.h>.  *)
-  Format.printf "typedef __builtin_neon_sf float32_t;\n";
+  Format.printf "typedef float float32_t;\n";
   Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
   Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"
 
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 85bc3eed100..7d1ef111339 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3655,7 +3655,8 @@
 			  UNSPEC_VSHLL_N))]
   "TARGET_NEON"
 {
-  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  /* The boundaries are: 0 < imm <= size.  */
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
   return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
 }
   [(set_attr "neon_type" "neon_shift_1")]
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 10393b33ebc..114097d22a7 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -50,7 +50,7 @@ type vectype = T_int8x8    | T_int8x16
              | T_ptrto of vectype | T_const of vectype
              | T_void      | T_intQI
              | T_intHI     | T_intSI
-             | T_intDI
+             | T_intDI     | T_floatSF
 
 (* The meanings of the following are:
      TImode : "Tetra", two registers (four words).
@@ -1693,6 +1693,7 @@ let string_of_vectype vt =
   | T_intHI -> "__builtin_neon_hi"
   | T_intSI -> "__builtin_neon_si"
   | T_intDI -> "__builtin_neon_di"
+  | T_floatSF -> "__builtin_neon_sf"
   | T_arrayof (num, base) ->
       let basename = name (fun x -> x) base in
       affix (Printf.sprintf "%sx%d" basename num)
diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h
index 4c06fa1cb3b..9cf186b338d 100644
--- a/gcc/config/arm/netbsd-elf.h
+++ b/gcc/config/arm/netbsd-elf.h
@@ -153,5 +153,5 @@ do									\
 while (0)
 
 #undef FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
 
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 884d58c7677..82f75f9b733 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1156,8 +1156,8 @@
 
 ;; 16-bit load immediate
 (define_peephole2
-  [(set (match_operand:SI 0 "low_register_operand" "")
-	(match_operand:SI 1 "const_int_operand" ""))]
+  [(set (match_operand:QHSI 0 "low_register_operand" "")
+	(match_operand:QHSI 1 "const_int_operand" ""))]
   "TARGET_THUMB2
    && peep2_regno_dead_p(0, CC_REGNUM)
    && (unsigned HOST_WIDE_INT) INTVAL(operands[1]) < 256"
@@ -1168,9 +1168,9 @@
   ""
 )
 
-(define_insn "*thumb2_movsi_shortim"
-  [(set (match_operand:SI 0 "low_register_operand" "=l")
-	(match_operand:SI 1 "const_int_operand" "I"))
+(define_insn "*thumb2_mov<mode>_shortim"
+  [(set (match_operand:QHSI 0 "low_register_operand" "=l")
+	(match_operand:QHSI 1 "const_int_operand" "I"))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_THUMB2 && reload_completed"
   "mov%!\t%0, %1"
diff --git a/gcc/config/arm/unwind-arm.c b/gcc/config/arm/unwind-arm.c
index 4eb18215f17..2c6e004890e 100644
--- a/gcc/config/arm/unwind-arm.c
+++ b/gcc/config/arm/unwind-arm.c
@@ -1000,7 +1000,6 @@ __gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument,
   while (code != _URC_END_OF_STACK
 	 && code != _URC_FAILURE);
 
- finish:
   restore_non_core_regs (&saved_vrs);
   return code;
 }
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 8879fedb7d7..aa7e197bc5d 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -97,7 +97,7 @@ along with GCC; see the file COPYING3.  If not see
 /* There is no default multilib.  */
 #undef MULTILIB_DEFAULTS
 
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
 
 #undef FUNCTION_PROFILER
 #define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 76df476f4c4..cb2d709c89a 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -104,9 +104,6 @@ static GTY(()) rtx zero_reg_rtx;
 /* AVR register names {"r0", "r1", ..., "r31"} */
 static const char *const avr_regnames[] = REGISTER_NAMES;
 
-/* This holds the last insn address.  */
-static int last_insn_address = 0;
-
 /* Preprocessor macros to define depending on MCU type.  */
 static const char *avr_extra_arch_macro;
 
@@ -556,8 +553,6 @@ expand_prologue (void)
   rtx pushword = gen_rtx_MEM (HImode,
                   gen_rtx_POST_DEC (HImode, stack_pointer_rtx));
   rtx insn;
-
-  last_insn_address = 0;
   
   /* Init cfun->machine.  */
   cfun->machine->is_naked = avr_naked_function_p (current_function_decl);
@@ -1459,25 +1454,17 @@ byte_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
           && INTVAL (op) <= 0xff && INTVAL (op) >= 0);
 }
 
-/* Output all insn addresses and their sizes into the assembly language
-   output file.  This is helpful for debugging whether the length attributes
-   in the md file are correct.
-   Output insn cost for next insn.  */
+/* Output insn cost for next insn.  */
 
 void
 final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
 		    int num_operands ATTRIBUTE_UNUSED)
 {
-  int uid = INSN_UID (insn);
-
-  if (TARGET_INSN_SIZE_DUMP || TARGET_ALL_DEBUG)
+  if (TARGET_ALL_DEBUG)
     {
-      fprintf (asm_out_file, "/*DEBUG: 0x%x\t\t%d\t%d */\n",
-	       INSN_ADDRESSES (uid),
-               INSN_ADDRESSES (uid) - last_insn_address,
+      fprintf (asm_out_file, "/* DEBUG: cost = %d.  */\n",
 	       rtx_cost (PATTERN (insn), INSN, !optimize_size));
     }
-  last_insn_address = INSN_ADDRESSES (uid);
 }
 
 /* Return 0 if undefined, 1 if always true or always false.  */
@@ -5890,12 +5877,12 @@ avr_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
   return 1;
 }
 
-/* Output a branch that tests a single bit of a register (QI, HI or SImode)
+/* Output a branch that tests a single bit of a register (QI, HI, SI or DImode)
    or memory location in the I/O space (QImode only).
 
    Operand 0: comparison operator (must be EQ or NE, compare bit to zero).
    Operand 1: register operand to test, or CONST_INT memory address.
-   Operand 2: bit number (for QImode operand) or mask (HImode, SImode).
+   Operand 2: bit number.
    Operand 3: label to jump to if the test is true.  */
 
 const char *
@@ -5943,9 +5930,7 @@ avr_out_sbxx_branch (rtx insn, rtx operands[])
       else  /* HImode or SImode */
 	{
 	  static char buf[] = "sbrc %A1,0";
-	  int bit_nr = exact_log2 (INTVAL (operands[2])
-				   & GET_MODE_MASK (GET_MODE (operands[1])));
-
+	  int bit_nr = INTVAL (operands[2]);
 	  buf[3] = (comp == EQ) ? 's' : 'c';
 	  buf[6] = 'A' + (bit_nr >> 3);
 	  buf[9] = '0' + (bit_nr & 7);
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 0927e3928c1..782ad11627b 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -406,8 +406,6 @@ extern int avr_reg_order[];
 #define HAVE_POST_INCREMENT 1
 #define HAVE_PRE_DECREMENT 1
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 #define REG_OK_FOR_BASE_NOSTRICT_P(X) \
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 5a15200ffe3..51fc1f99b8b 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -118,6 +118,7 @@
 
 ;; Define mode iterator
 (define_mode_iterator QISI [(QI "") (HI "") (SI "")])
+(define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
 
 ;;========================================================================
 ;; The following is used by nonlocal_goto and setjmp.
@@ -235,7 +236,7 @@
 
 (define_insn "*movqi"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=r,d,Qm,r,q,r,*r")
-	(match_operand:QI 1 "general_operand"       "r,i,rL,Qm,r,q,i"))]
+	(match_operand:QI 1 "general_operand"       "rL,i,rL,Qm,r,q,i"))]
   "(register_operand (operands[0],QImode)
     || register_operand (operands[1], QImode) || const0_rtx == operands[1])"
   "* return output_movqi (insn, operands, NULL);"
@@ -336,7 +337,7 @@
 
 (define_insn "*movhi"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,d,*r,q,r")
-        (match_operand:HI 1 "general_operand"       "r,m,rL,i,i,r,q"))]
+        (match_operand:HI 1 "general_operand"       "rL,m,rL,i,i,r,q"))]
   "(register_operand (operands[0],HImode)
     || register_operand (operands[1],HImode) || const0_rtx == operands[1])"
   "* return output_movhi (insn, operands, NULL);"
@@ -2448,12 +2449,15 @@
 
 
 ;; Test a single bit in a QI/HI/SImode register.
-(define_insn "*sbrx_branch"
+;; Combine will create zero extract patterns for single bit tests.
+;; permit any mode in source pattern by using VOIDmode.
+
+(define_insn "*sbrx_branch<mode>"
   [(set (pc)
         (if_then_else
 	 (match_operator 0 "eqne_operator"
-			 [(zero_extract:HI
-			   (match_operand:QI 1 "register_operand" "r")
+			 [(zero_extract:QIDI
+			   (match_operand:VOID 1 "register_operand" "r")
 			   (const_int 1)
 			   (match_operand 2 "const_int_operand" "n"))
 			  (const_int 0)])
@@ -2470,39 +2474,27 @@
 				    (const_int 4))))
    (set_attr "cc" "clobber")])
 
-(define_insn "*sbrx_and_branchhi"
-  [(set (pc)
-        (if_then_else
-	 (match_operator 0 "eqne_operator"
-			 [(and:HI
-			   (match_operand:HI 1 "register_operand" "r")
-			   (match_operand:HI 2 "single_one_operand" "n"))
-			  (const_int 0)])
-	 (label_ref (match_operand 3 "" ""))
-	 (pc)))]
-  ""
-  "* return avr_out_sbxx_branch (insn, operands);"
-  [(set (attr "length")
-	(if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
-			   (le (minus (pc) (match_dup 3)) (const_int 2046)))
-		      (const_int 2)
-		      (if_then_else (eq_attr "mcu_mega" "no")
-				    (const_int 2)
-				    (const_int 4))))
-   (set_attr "cc" "clobber")])
+;; Same test based on Bitwise AND RTL. Keep this incase gcc changes patterns.
+;; or for old peepholes.
+;; Fixme - bitwise Mask will not work for DImode
 
-(define_insn "*sbrx_and_branchsi"
+(define_insn "*sbrx_and_branch<mode>"
   [(set (pc)
         (if_then_else
 	 (match_operator 0 "eqne_operator"
-			 [(and:SI
-			   (match_operand:SI 1 "register_operand" "r")
-			   (match_operand:SI 2 "single_one_operand" "n"))
+			 [(and:QISI
+			   (match_operand:QISI 1 "register_operand" "r")
+			   (match_operand:QISI 2 "single_one_operand" "n"))
 			  (const_int 0)])
 	 (label_ref (match_operand 3 "" ""))
 	 (pc)))]
   ""
-  "* return avr_out_sbxx_branch (insn, operands);"
+{
+    HOST_WIDE_INT bitnumber;
+    bitnumber = exact_log2 (GET_MODE_MASK (<MODE>mode) & INTVAL (operands[2]));
+    operands[2] = GEN_INT (bitnumber);
+    return avr_out_sbxx_branch (insn, operands);
+}
   [(set (attr "length")
 	(if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
 			   (le (minus (pc) (match_dup 3)) (const_int 2046)))
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index f94d6a3c2ac..f8013e53a18 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -47,10 +47,6 @@ mshort-calls
 Target Report Mask(SHORT_CALLS)
 Use rjmp/rcall (limited range) on >8K devices
 
-msize
-Target Report Mask(INSN_SIZE_DUMP)
-Output instruction sizes to the asm file
-
 mtiny-stack
 Target Report Mask(TINY_STACK)
 Change only the low 8 bits of the stack pointer
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 03a279036f3..365680ee9fa 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -911,9 +911,6 @@ typedef struct {
 
 /* Addressing Modes */
 
-/* Recognize any constant value that is a valid address.  */
-#define CONSTANT_ADDRESS_P(X)	(CONSTANT_P (X))
-
 /* Nonzero if the constant value X is a legitimate general operand.
    symbol_ref are not legitimate and will be put into constant pool.
    See force_const_mem().
diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
index db6aa9fe9ce..721c90ff887 100644
--- a/gcc/config/cris/cris-protos.h
+++ b/gcc/config/cris/cris-protos.h
@@ -71,3 +71,5 @@ extern void cris_override_options (void);
 extern int cris_initial_elimination_offset (int, int);
 
 extern void cris_init_expanders (void);
+
+extern bool cris_function_value_regno_p (const unsigned int);
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index bf00a57a3b6..225ad403dda 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -130,6 +130,9 @@ static bool cris_frame_pointer_required (void);
 static void cris_asm_trampoline_template (FILE *);
 static void cris_trampoline_init (rtx, tree, rtx);
 
+static rtx cris_function_value(const_tree, const_tree, bool);
+static rtx cris_libcall_value (enum machine_mode, const_rtx);
+
 /* This is the parsed result of the "-max-stack-stackframe=" option.  If
    it (still) is zero, then there was no such option given.  */
 int cris_max_stackframe = 0;
@@ -197,6 +200,11 @@ int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION;
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT cris_trampoline_init
 
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE cris_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE cris_libcall_value
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Helper for cris_load_multiple_op and cris_ret_movem_op.  */
@@ -3777,13 +3785,42 @@ cris_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
                             int for_return)
 {
   /* Defining PROMOTE_FUNCTION_RETURN in gcc-2.7.2 uncovered bug 981110 (even
-     when modifying FUNCTION_VALUE to return the promoted mode).  Maybe
-     pointless as of now, but let's keep the old behavior.  */
+     when modifying TARGET_FUNCTION_VALUE to return the promoted mode).
+     Maybe pointless as of now, but let's keep the old behavior.  */
   if (for_return == 1)
     return mode;
   return CRIS_PROMOTED_MODE (mode, *punsignedp, type);
 } 
 
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+static rtx
+cris_function_value(const_tree type,
+		    const_tree func ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (type), CRIS_FIRST_ARG_REG);
+}
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+static rtx
+cris_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG);
+}
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+bool
+cris_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == CRIS_FIRST_ARG_REG);
+}
 
 static int
 cris_arg_partial_bytes (CUMULATIVE_ARGS *ca, enum machine_mode mode,
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 586f7ff2077..3c426b74ae5 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -630,12 +630,17 @@ enum reg_class
   ? GENERAL_REGS : (CLASS))
 
 /* We can't move special registers to and from memory in smaller than
-   word_mode.  */
-#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X)		\
-  (((CLASS) != SPECIAL_REGS && (CLASS) != MOF_REGS)	\
-   || GET_MODE_SIZE (MODE) == 4				\
-   || !MEM_P (X)					\
-   ? NO_REGS : GENERAL_REGS)
+   word_mode.  We also can't move between special registers.  Luckily,
+   -1, as returned by true_regnum for non-sub/registers, is valid as a
+   parameter to our REGNO_REG_CLASS, returning GENERAL_REGS, so we get
+   the effect that any X that isn't a special-register is treated as
+   a non-empty intersection with GENERAL_REGS.  */
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X)				\
+ ((((CLASS) == SPECIAL_REGS || (CLASS) == MOF_REGS)			\
+   && ((GET_MODE_SIZE (MODE) < 4 && MEM_P (X))				\
+       || !reg_classes_intersect_p (REGNO_REG_CLASS (true_regnum (X)),	\
+				    GENERAL_REGS)))			\
+   ? GENERAL_REGS : NO_REGS)
 
 /* FIXME: Fix regrename.c; it should check validity of replacements,
    not just with a silly pass-specific macro.  We may miss some
@@ -901,14 +906,8 @@ struct cum_args {int regs;};
 
 /* Node: Scalar Return */
 
-/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
-   time being.  */
-#define FUNCTION_VALUE(VALTYPE, FUNC)  \
- gen_rtx_REG (TYPE_MODE (VALTYPE), CRIS_FIRST_ARG_REG)
+#define FUNCTION_VALUE_REGNO_P(N) cris_function_value_regno_p (N)
 
-#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, CRIS_FIRST_ARG_REG)
-
-#define FUNCTION_VALUE_REGNO_P(N) ((N) == CRIS_FIRST_ARG_REG)
 
 
 /* Node: Aggregate Return */
@@ -951,8 +950,6 @@ struct cum_args {int regs;};
 
 #define HAVE_POST_INCREMENT 1
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 /* Must be a compile-time constant, so we go with the highest value
    among all CRIS variants.  */
 #define MAX_REGS_PER_ADDRESS 2
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 79eb8da3b0d..bd14a16337e 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -4936,7 +4936,7 @@
 ;; It should be:
 ;;   movu.b some_byte,reg_32
 ;;   and.b const,reg_32
-;; but is turns into:
+;; but it turns into:
 ;;   move.b some_byte,reg_32
 ;;   and.d const,reg_32
 ;; Fix it here.
@@ -4953,7 +4953,9 @@
   "REGNO (operands[2]) == REGNO (operands[0])
    && INTVAL (operands[3]) <= 65535 && INTVAL (operands[3]) >= 0
    && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'I')
-   && !side_effects_p (operands[1])"
+   && !side_effects_p (operands[1])
+   && (!REG_P (operands[1])
+       || REGNO (operands[1]) <= CRIS_LAST_GENERAL_REGISTER)"
   ;; FIXME: CC0 valid except for M (i.e. CC_NOT_NEGATIVE).
   [(set (match_dup 0) (match_dup 4))
    (set (match_dup 5) (match_dup 6))]
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index a8933e66348..b5c4fb8e0d9 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -1697,6 +1697,17 @@ darwin_override_options (void)
   if (dwarf_strict < 0) 
     dwarf_strict = 1;
 
+  /* Disable -freorder-blocks-and-partition for darwin_emit_unwind_label.  */
+  if (flag_reorder_blocks_and_partition 
+      && (targetm.asm_out.unwind_label == darwin_emit_unwind_label))
+    {
+      inform (input_location,
+              "-freorder-blocks-and-partition does not work with exceptions "
+              "on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
   if (flag_mkernel || flag_apple_kext)
     {
       /* -mkernel implies -fapple-kext for C++ */
diff --git a/gcc/config/darwin10.h b/gcc/config/darwin10.h
index 65ba2632a8f..b1edf36ce3d 100644
--- a/gcc/config/darwin10.h
+++ b/gcc/config/darwin10.h
@@ -23,3 +23,8 @@ unwinder in libSystem is fixed to digest new epilog unwinding notes. */
 
 #undef LIB_SPEC
 #define LIB_SPEC "%{!static:-no_compact_unwind -lSystem}"
+
+/* Unwind labels are no longer required in darwin10.  */
+
+#undef TARGET_ASM_EMIT_UNWIND_LABEL
+#define TARGET_ASM_EMIT_UNWIND_LABEL default_emit_unwind_label
diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h
index 20e157173d8..5e6237895b5 100644
--- a/gcc/config/fr30/fr30.h
+++ b/gcc/config/fr30/fr30.h
@@ -741,16 +741,6 @@ enum reg_class
 /*}}}*/ 
 /*{{{  Addressing Modes.  */ 
 
-/* A C expression that is 1 if the RTX X is a constant which is a valid
-   address.  On most machines, this can be defined as `CONSTANT_P (X)', but a
-   few machines are more restrictive in which constant addresses are supported.
-
-   `CONSTANT_P' accepts integer-values expressions whose values are not
-   explicitly known, such as `symbol_ref', `label_ref', and `high' expressions
-   and `const' arithmetic expressions, in addition to `const_int' and
-   `const_double' expressions.  */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 /* A number, the maximum number of registers that can appear in a valid memory
    address.  Note that it is up to you to specify a value equal to the maximum
    number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept.  */
diff --git a/gcc/config/frv/frv-protos.h b/gcc/config/frv/frv-protos.h
index 2bfdc65f654..3c9950d740b 100644
--- a/gcc/config/frv/frv-protos.h
+++ b/gcc/config/frv/frv-protos.h
@@ -62,6 +62,7 @@ extern rtx frv_function_arg			(CUMULATIVE_ARGS *,
 extern void frv_function_arg_advance		(CUMULATIVE_ARGS *,
 						 enum machine_mode,
 						 tree, int);
+extern bool frv_function_value_regno_p		(const unsigned int);
 #endif /* TREE_CODE */
 
 extern int frv_expand_block_move		(rtx *);
diff --git a/gcc/config/frv/frv.c b/gcc/config/frv/frv.c
index d8901e9a68e..a757472f37b 100644
--- a/gcc/config/frv/frv.c
+++ b/gcc/config/frv/frv.c
@@ -273,6 +273,10 @@ static void frv_print_operand_memory_reference_reg
 static void frv_print_operand_memory_reference	(FILE *, rtx, int);
 static int frv_print_operand_jump_hint		(rtx);
 static const char *comparison_string		(enum rtx_code, rtx);
+static rtx frv_function_value			(const_tree, const_tree,
+						 bool);
+static rtx frv_libcall_value			(enum machine_mode,
+						 const_rtx);
 static FRV_INLINE int frv_regno_ok_for_base_p	(int, int);
 static rtx single_set_pattern			(rtx);
 static int frv_function_contains_far_jump	(void);
@@ -483,6 +487,11 @@ static void frv_trampoline_init			(rtx, tree, rtx);
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT frv_trampoline_init
 
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE frv_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE frv_libcall_value
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #define FRV_SYMBOL_REF_TLS_P(RTX) \
@@ -3291,6 +3300,35 @@ frv_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 }
 
 
+/* Implements TARGET_FUNCTION_VALUE.  */
+
+static rtx
+frv_function_value (const_tree valtype,
+		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), RETURN_VALUE_REGNUM);
+}
+
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+frv_libcall_value (enum machine_mode mode,
+		   const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+
+/* Implements FUNCTION_VALUE_REGNO_P.  */
+
+bool
+frv_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETURN_VALUE_REGNUM);
+}
+
 /* Return true if a register is ok to use as a base or index register.  */
 
 static FRV_INLINE int
diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h
index 53966de50b7..d5a7a4a6670 100644
--- a/gcc/config/frv/frv.h
+++ b/gcc/config/frv/frv.h
@@ -1746,48 +1746,7 @@ typedef struct frv_stack {
    function call.  */
 #define RETURN_VALUE_REGNUM	(GPR_FIRST + 8)
 
-/* A C expression to create an RTX representing the place where a function
-   returns a value of data type VALTYPE.  VALTYPE is a tree node representing a
-   data type.  Write `TYPE_MODE (VALTYPE)' to get the machine mode used to
-   represent that type.  On many machines, only the mode is relevant.
-   (Actually, on most machines, scalar values are returned in the same place
-   regardless of mode).
-
-   If the precise function being called is known, FUNC is a tree node
-   (`FUNCTION_DECL') for it; otherwise, FUNC is a null pointer.  This makes it
-   possible to use a different value-returning convention for specific
-   functions when all their calls are known.
-
-   `FUNCTION_VALUE' is not used for return vales with aggregate data types,
-   because these are returned in another way.  See
-   `TARGET_STRUCT_VALUE_RTX' and related macros, below.  */
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
-  gen_rtx_REG (TYPE_MODE (VALTYPE), RETURN_VALUE_REGNUM)
-
-/* A C expression to create an RTX representing the place where a library
-   function returns a value of mode MODE.
-
-   Note that "library function" in this context means a compiler support
-   routine, used to perform arithmetic, whose name is known specially by the
-   compiler and was not mentioned in the C code being compiled.
-
-   The definition of `LIBRARY_VALUE' need not be concerned aggregate data
-   types, because none of the library functions returns such types.  */
-#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, RETURN_VALUE_REGNUM)
-
-/* A C expression that is nonzero if REGNO is the number of a hard register in
-   which the values of called function may come back.
-
-   A register whose use for returning values is limited to serving as the
-   second of a pair (for a value of type `double', say) need not be recognized
-   by this macro.  So for most machines, this definition suffices:
-
-        #define FUNCTION_VALUE_REGNO_P(N) ((N) == RETURN)
-
-   If the machine has register windows, so that the caller and the called
-   function use different registers for the return value, this macro should
-   recognize only the caller's register numbers.  */
-#define FUNCTION_VALUE_REGNO_P(REGNO) ((REGNO) == RETURN_VALUE_REGNUM)
+#define FUNCTION_VALUE_REGNO_P(REGNO) frv_function_value_regno_p (REGNO)
 
 
 /* How Large Values are Returned.  */
@@ -1968,16 +1927,6 @@ __asm__("\n"								\
 
 /* Addressing Modes.  */
 
-/* A C expression that is 1 if the RTX X is a constant which is a valid
-   address.  On most machines, this can be defined as `CONSTANT_P (X)', but a
-   few machines are more restrictive in which constant addresses are supported.
-
-   `CONSTANT_P' accepts integer-values expressions whose values are not
-   explicitly known, such as `symbol_ref', `label_ref', and `high' expressions
-   and `const' arithmetic expressions, in addition to `const_int' and
-   `const_double' expressions.  */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 /* A number, the maximum number of registers that can appear in a valid memory
    address.  Note that it is up to you to specify a value equal to the maximum
    number that `TARGET_LEGITIMATE_ADDRESS_P' would ever accept.  */
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index 404595405f3..7bb1e7a6c5c 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -507,6 +507,32 @@ byte_reg (rtx x, int b)
 	   && call_used_regs[regno]					\
 	   && !current_function_is_leaf)))
 
+/* We use this to wrap all emitted insns in the prologue.  */
+static rtx
+F (rtx x)
+{
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Mark all the subexpressions of the PARALLEL rtx PAR as
+   frame-related.  Return PAR.
+
+   dwarf2out.c:dwarf2out_frame_debug_expr ignores sub-expressions of a
+   PARALLEL rtx other than the first if they do not have the
+   FRAME_RELATED flag set on them.  */
+static rtx
+Fpa (rtx par)
+{
+  int len = XVECLEN (par, 0);
+  int i;
+
+  for (i = 0; i < len; i++)
+    F (XVECEXP (par, 0, i));
+
+  return par;
+}
+
 /* Output assembly language to FILE for the operation OP with operand size
    SIZE to adjust the stack pointer.  */
 
@@ -526,22 +552,27 @@ h8300_emit_stack_adjustment (int sign, HOST_WIDE_INT size)
       && !(cfun->static_chain_decl != NULL && sign < 0))
     {
       rtx r3 = gen_rtx_REG (Pmode, 3);
-      emit_insn (gen_movhi (r3, GEN_INT (sign * size)));
-      emit_insn (gen_addhi3 (stack_pointer_rtx,
-			     stack_pointer_rtx, r3));
+      F (emit_insn (gen_movhi (r3, GEN_INT (sign * size))));
+      F (emit_insn (gen_addhi3 (stack_pointer_rtx,
+				stack_pointer_rtx, r3)));
     }
   else
     {
       /* The stack adjustment made here is further optimized by the
 	 splitter.  In case of H8/300, the splitter always splits the
-	 addition emitted here to make the adjustment
-	 interrupt-safe.  */
+	 addition emitted here to make the adjustment interrupt-safe.
+	 FIXME: We don't always tag those, because we don't know what
+	 the splitter will do.  */
       if (Pmode == HImode)
-	emit_insn (gen_addhi3 (stack_pointer_rtx,
-			       stack_pointer_rtx, GEN_INT (sign * size)));
+	{
+	  rtx x = emit_insn (gen_addhi3 (stack_pointer_rtx,
+					 stack_pointer_rtx, GEN_INT (sign * size)));
+	  if (size < 4)
+	    F (x);
+	}
       else
-	emit_insn (gen_addsi3 (stack_pointer_rtx,
-			       stack_pointer_rtx, GEN_INT (sign * size)));
+	F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				  stack_pointer_rtx, GEN_INT (sign * size))));
     }
 }
 
@@ -591,7 +622,7 @@ push (int rn)
     x = gen_push_h8300hs_advanced (reg);
   else
     x = gen_push_h8300hs_normal (reg);
-  x = emit_insn (x);
+  x = F (emit_insn (x));
   REG_NOTES (x) = gen_rtx_EXPR_LIST (REG_INC, stack_pointer_rtx, 0);
 }
 
@@ -634,7 +665,7 @@ h8300_push_pop (int regno, int nregs, int pop_p, int return_p)
 {
   int i, j;
   rtvec vec;
-  rtx sp, offset;
+  rtx sp, offset, x;
 
   /* See whether we can use a simple push or pop.  */
   if (!return_p && nregs == 1)
@@ -685,7 +716,10 @@ h8300_push_pop (int regno, int nregs, int pop_p, int return_p)
   RTVEC_ELT (vec, i + j) = gen_rtx_SET (VOIDmode, sp,
 					gen_rtx_PLUS (Pmode, sp, offset));
 
-  emit_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+  x = gen_rtx_PARALLEL (VOIDmode, vec);
+  if (!pop_p)
+    x = Fpa (x);
+  emit_insn (x);
 }
 
 /* Return true if X has the value sp + OFFSET.  */
@@ -820,7 +854,7 @@ h8300_expand_prologue (void)
     {
       /* Push fp.  */
       push (HARD_FRAME_POINTER_REGNUM);
-      emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      F (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
     }
 
   /* Push the rest of the registers in ascending order.  */
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index e0f0ea62f4e..9757afc2de3 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -147,6 +147,17 @@ extern const char * const *h8_reg_names;
 /* Show we can debug even without a frame pointer.  */
 /* #define CAN_DEBUG_WITHOUT_FP */
 
+/* We want dwarf2 info available to gdb...  */
+#define DWARF2_DEBUGGING_INFO        1
+/* ... but we don't actually support full dwarf2 EH.  */
+#define MUST_USE_SJLJ_EXCEPTIONS 1
+
+/* The return address is pushed on the stack.  */
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_MEM (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM))
+#define INCOMING_FRAME_SP_OFFSET   (POINTER_SIZE / 8)
+
+#define DWARF_CIE_DATA_ALIGNMENT	2
+
 /* Define this if addresses of constant functions
    shouldn't be put through pseudo regs where they can be cse'd.
    Desirable on machines where ordinary constants are expensive
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 49acfa780e4..21f0e3184ef 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -46,9 +46,11 @@
 
 /* Extended Features */
 /* %ecx */
+#define bit_FMA4        (1 << 16) 
 #define bit_LAHF_LM	(1 << 0)
+#define bit_LWP 	(1 << 15)
 #define bit_SSE4a	(1 << 6)
-#define bit_FMA4	(1 << 16)
+#define bit_XOP         (1 << 11)
 
 /* %edx */
 #define bit_LM		(1 << 29)
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 43003cc5cad..cdab21c91a2 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -49,8 +49,9 @@ along with GCC; see the file COPYING3.  If not see
    target, always use the svr4_dbx_register_map for DWARF .eh_frame
    even if we don't use DWARF .debug_frame. */
 #undef DWARF_FRAME_REGNUM
-#define DWARF_FRAME_REGNUM(n) TARGET_64BIT \
-	? dbx64_register_map[(n)] : svr4_dbx_register_map[(n)] 
+#define DWARF_FRAME_REGNUM(n)				\
+  (TARGET_64BIT ? dbx64_register_map[(n)]		\
+		: svr4_dbx_register_map[(n)])
 
 #ifdef HAVE_GAS_PE_SECREL32_RELOC
 /* Use section relative relocations for debugging offsets.  Unlike
diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h
index 42782ade0ed..2bd411a0f05 100644
--- a/gcc/config/i386/fma4intrin.h
+++ b/gcc/config/i386/fma4intrin.h
@@ -35,15 +35,6 @@
 /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
 #include <ammintrin.h>
 
-/* Internal data types for implementing the intrinsics.  */
-typedef float __v8sf __attribute__ ((__vector_size__ (32)));
-typedef double __v4df __attribute__ ((__vector_size__ (32)));
-
-typedef float __m256 __attribute__ ((__vector_size__ (32),
-				     __may_alias__));
-typedef double __m256d __attribute__ ((__vector_size__ (32),
-				       __may_alias__));
-
 /* 128b Floating point multiply/add type instructions.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 12a3f1759a8..5a5311fba0f 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -232,6 +232,10 @@ ix86_target_macros_internal (int isa_flag,
     def_or_undef (parse_in, "__SSE4A__");
   if (isa_flag & OPTION_MASK_ISA_FMA4)
     def_or_undef (parse_in, "__FMA4__");
+  if (isa_flag & OPTION_MASK_ISA_XOP)
+    def_or_undef (parse_in, "__XOP__");
+  if (isa_flag & OPTION_MASK_ISA_LWP)
+    def_or_undef (parse_in, "__LWP__");
   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
     def_or_undef (parse_in, "__SSE_MATH__");
   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9df01ba23dc..2031dfb6e98 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1553,6 +1553,11 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
 
   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
   ~m_386,
+
+  /* X86_ARCH_CALL_ESP: P6 processors will jump to the address after
+     the decrement (so they will execute return address as code).  See
+     Pentium Pro errata 70, Pentium 2 errata A33, Pentium 3 errata E17.  */
+  ~(m_386 | m_486 | m_PENT | m_PPRO),
 };
 
 static const unsigned int x86_accumulate_outgoing_args
@@ -1905,6 +1910,7 @@ static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
 static bool ix86_can_inline_p (tree, tree);
 static void ix86_set_current_function (tree);
+static unsigned int ix86_minimum_incoming_stack_boundary (bool);
 
 static enum calling_abi ix86_function_abi (const_tree);
 
@@ -1958,6 +1964,10 @@ static int ix86_isa_flags_explicit;
 #define OPTION_MASK_ISA_FMA4_SET \
   (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
    | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_XOP_SET \
+  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
+#define OPTION_MASK_ISA_LWP_SET \
+  OPTION_MASK_ISA_LWP
 
 /* AES and PCLMUL need SSE2 because they use xmm registers */
 #define OPTION_MASK_ISA_AES_SET \
@@ -2009,7 +2019,10 @@ static int ix86_isa_flags_explicit;
 #define OPTION_MASK_ISA_SSE4A_UNSET \
   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
 
-#define OPTION_MASK_ISA_FMA4_UNSET OPTION_MASK_ISA_FMA4
+#define OPTION_MASK_ISA_FMA4_UNSET \
+  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
+#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
+#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
 
 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
@@ -2257,6 +2270,32 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
 	}
       return true;
 
+   case OPT_mxop:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
+	}
+      return true;
+
+   case OPT_mlwp:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
+	}
+      return true;
+
     case OPT_mabm:
       if (value)
 	{
@@ -2385,6 +2424,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
   {
     { "-m64",		OPTION_MASK_ISA_64BIT },
     { "-mfma4",		OPTION_MASK_ISA_FMA4 },
+    { "-mxop",		OPTION_MASK_ISA_XOP },
+    { "-mlwp",		OPTION_MASK_ISA_LWP },
     { "-msse4a",	OPTION_MASK_ISA_SSE4A },
     { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
     { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
@@ -2615,7 +2656,9 @@ override_options (bool main_args_p)
       PTA_AVX = 1 << 18,
       PTA_FMA = 1 << 19,
       PTA_MOVBE = 1 << 20,
-      PTA_FMA4 = 1 << 21
+      PTA_FMA4 = 1 << 21,
+      PTA_XOP = 1 << 22,
+      PTA_LWP = 1 << 23
     };
 
   static struct pta
@@ -2961,6 +3004,12 @@ override_options (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_FMA4
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
 	  ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
+	if (processor_alias_table[i].flags & PTA_XOP
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
+	  ix86_isa_flags |= OPTION_MASK_ISA_XOP;
+	if (processor_alias_table[i].flags & PTA_LWP
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
+	  ix86_isa_flags |= OPTION_MASK_ISA_LWP;
 	if (processor_alias_table[i].flags & PTA_ABM
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
 	  ix86_isa_flags |= OPTION_MASK_ISA_ABM;
@@ -3239,12 +3288,10 @@ override_options (bool main_args_p)
   if (ix86_force_align_arg_pointer == -1)
     ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
 
+  ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
   /* Validate -mincoming-stack-boundary= value or default it to
      MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
-  if (ix86_force_align_arg_pointer)
-    ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
-  else
-    ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
   ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
   if (ix86_incoming_stack_boundary_string)
     {
@@ -3394,7 +3441,7 @@ override_options (bool main_args_p)
       ix86_gen_pop1 = gen_popdi1;
       ix86_gen_add3 = gen_adddi3;
       ix86_gen_sub3 = gen_subdi3;
-      ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
+      ix86_gen_sub3_carry = gen_subdi3_carry;
       ix86_gen_one_cmpl2 = gen_one_cmpldi2;
       ix86_gen_monitor = gen_sse3_monitor64;
       ix86_gen_andsp = gen_anddi3;
@@ -3645,6 +3692,8 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
     IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
     IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
     IX86_ATTR_ISA ("fma4",	OPT_mfma4),
+    IX86_ATTR_ISA ("xop",	OPT_mxop),
+    IX86_ATTR_ISA ("lwp",	OPT_mlwp),
 
     /* string options */
     IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
@@ -4277,7 +4326,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
 
   /* If we need to align the outgoing stack, then sibcalling would
      unalign the stack, which may break the called function.  */
-  if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
+  if (ix86_minimum_incoming_stack_boundary (true)
+      < PREFERRED_STACK_BOUNDARY)
     return false;
 
   if (decl)
@@ -4774,6 +4824,25 @@ ix86_function_type_abi (const_tree fntype)
   return ix86_abi;
 }
 
+static bool
+ix86_function_ms_hook_prologue (const_tree fntype)
+{
+  if (!TARGET_64BIT)
+    {
+      if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
+        {
+          if (decl_function_context (fntype) != NULL_TREE)
+          {
+            error_at (DECL_SOURCE_LOCATION (fntype),
+                "ms_hook_prologue is not compatible with nested function");
+          }
+
+          return true;
+        }
+    }
+  return false;
+}
+
 static enum calling_abi
 ix86_function_abi (const_tree fndecl)
 {
@@ -8177,37 +8246,58 @@ find_drap_reg (void)
     }
 }
 
-/* Update incoming stack boundary and estimated stack alignment.  */
+/* Return minimum incoming stack alignment.  */
 
-static void
-ix86_update_stack_boundary (void)
+static unsigned int
+ix86_minimum_incoming_stack_boundary (bool sibcall)
 {
+  unsigned int incoming_stack_boundary;
+
   /* Prefer the one specified at command line. */
-  ix86_incoming_stack_boundary 
-    = (ix86_user_incoming_stack_boundary
-       ? ix86_user_incoming_stack_boundary
-       : ix86_default_incoming_stack_boundary);
+  if (ix86_user_incoming_stack_boundary)
+    incoming_stack_boundary = ix86_user_incoming_stack_boundary;
+  /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
+     if -mstackrealign is used, it isn't used for sibcall check and 
+     estimated stack alignment is 128bit.  */
+  else if (!sibcall
+	   && !TARGET_64BIT
+	   && ix86_force_align_arg_pointer
+	   && crtl->stack_alignment_estimated == 128)
+    incoming_stack_boundary = MIN_STACK_BOUNDARY;
+  else
+    incoming_stack_boundary = ix86_default_incoming_stack_boundary;
 
   /* Incoming stack alignment can be changed on individual functions
      via force_align_arg_pointer attribute.  We use the smallest
      incoming stack boundary.  */
-  if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
+  if (incoming_stack_boundary > MIN_STACK_BOUNDARY
       && lookup_attribute (ix86_force_align_arg_pointer_string,
 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
-    ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
+    incoming_stack_boundary = MIN_STACK_BOUNDARY;
 
   /* The incoming stack frame has to be aligned at least at
      parm_stack_boundary.  */
-  if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
-    ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
+  if (incoming_stack_boundary < crtl->parm_stack_boundary)
+    incoming_stack_boundary = crtl->parm_stack_boundary;
 
   /* Stack at entrance of main is aligned by runtime.  We use the
      smallest incoming stack boundary. */
-  if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
+  if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
       && DECL_NAME (current_function_decl)
       && MAIN_NAME_P (DECL_NAME (current_function_decl))
       && DECL_FILE_SCOPE_P (current_function_decl))
-    ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
+    incoming_stack_boundary = MAIN_STACK_BOUNDARY;
+
+  return incoming_stack_boundary;
+}
+
+/* Update incoming stack boundary and estimated stack alignment.  */
+
+static void
+ix86_update_stack_boundary (void)
+{
+  ix86_incoming_stack_boundary
+    = ix86_minimum_incoming_stack_boundary (false);
 
   /* x86_64 vararg needs 16byte stack alignment for register save
      area.  */
@@ -8295,6 +8385,7 @@ ix86_expand_prologue (void)
   bool pic_reg_used;
   struct ix86_frame frame;
   HOST_WIDE_INT allocate;
+  int gen_frame_pointer = frame_pointer_needed;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -8307,6 +8398,46 @@ ix86_expand_prologue (void)
 
   ix86_compute_frame_layout (&frame);
 
+  if (ix86_function_ms_hook_prologue (current_function_decl))
+    {
+      rtx push, mov;
+
+      /* Make sure the function starts with
+	 8b ff     movl.s %edi,%edi
+	 55        push   %ebp
+	 8b ec     movl.s %esp,%ebp
+
+	 This matches the hookable function prologue in Win32 API
+	 functions in Microsoft Windows XP Service Pack 2 and newer.
+	 Wine uses this to enable Windows apps to hook the Win32 API
+	 functions provided by Wine.  */
+      insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG),
+				      gen_rtx_REG (SImode, DI_REG)));
+      push = emit_insn (gen_push (hard_frame_pointer_rtx));
+      mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
+				     stack_pointer_rtx));
+
+      if (frame_pointer_needed && !(crtl->drap_reg
+				    && crtl->stack_realign_needed))
+	{
+	  /* The push %ebp and movl.s %esp, %ebp already set up
+	     the frame pointer.  No need to do this again. */
+	  gen_frame_pointer = 0;
+	  RTX_FRAME_RELATED_P (push) = 1;
+	  RTX_FRAME_RELATED_P (mov) = 1;
+	  if (ix86_cfa_state->reg == stack_pointer_rtx)
+	    ix86_cfa_state->reg = hard_frame_pointer_rtx;
+	}
+      else
+	/* If the frame pointer is not needed, pop %ebp again. This
+	   could be optimized for cases where ebp needs to be backed up
+	   for some other reason.  If stack realignment is needed, pop
+	   the base pointer again, align the stack, and later regenerate
+	   the frame pointer setup.  The frame pointer generated by the
+	   hook prologue is not aligned, so it can't be used.  */
+	insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
+    }
+
   /* The first insn of a function that accepts its static chain on the
      stack is to push the register that would be filled in by a direct
      call.  This insn will be skipped by the trampoline.  */
@@ -8378,7 +8509,7 @@ ix86_expand_prologue (void)
   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
      slower on all targets.  Also sdb doesn't like it.  */
 
-  if (frame_pointer_needed)
+  if (gen_frame_pointer)
     {
       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
       RTX_FRAME_RELATED_P (insn) = 1;
@@ -8962,7 +9093,8 @@ ix86_expand_epilogue (int style)
 						0, red_offset,
 						style == 2);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
-				     GEN_INT (frame.nsseregs * 16 + frame.padding0),
+				     GEN_INT (frame.nsseregs * 16
+					      + frame.padding0),
 				     style, false);
 	}
       else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
@@ -10733,7 +10865,7 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
 static bool
 ix86_pic_register_p (rtx x)
 {
-  if (GET_CODE (x) == VALUE)
+  if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
     return (pic_offset_table_rtx
 	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
   else
@@ -11203,6 +11335,7 @@ get_some_local_dynamic_name (void)
    X -- don't print any sort of PIC '@' suffix for a symbol.
    & -- print some in-use local-dynamic symbol name.
    H -- print a memory address offset by 8; used for sse high-parts
+   Y -- print condition for XOP pcom* instruction.
    + -- print a branch hint as 'cs' or 'ds' prefix
    ; -- print a semicolon (after prefixes due to bug in older gas).
  */
@@ -11620,6 +11753,61 @@ print_operand (FILE *file, rtx x, int code)
 	    return;
 	  }
 
+	case 'Y':
+	  switch (GET_CODE (x))
+	    {
+	    case NE:
+	      fputs ("neq", file);
+	      break;
+	    case EQ:
+	      fputs ("eq", file);
+	      break;
+	    case GE:
+	    case GEU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
+	      break;
+	    case GT:
+	    case GTU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
+	      break;
+	    case LE:
+	    case LEU:
+	      fputs ("le", file);
+	      break;
+	    case LT:
+	    case LTU:
+	      fputs ("lt", file);
+	      break;
+	    case UNORDERED:
+	      fputs ("unord", file);
+	      break;
+	    case ORDERED:
+	      fputs ("ord", file);
+	      break;
+	    case UNEQ:
+	      fputs ("ueq", file);
+	      break;
+	    case UNGE:
+	      fputs ("nlt", file);
+	      break;
+	    case UNGT:
+	      fputs ("nle", file);
+	      break;
+	    case UNLE:
+	      fputs ("ule", file);
+	      break;
+	    case UNLT:
+	      fputs ("ult", file);
+	      break;
+	    case LTGT:
+	      fputs ("une", file);
+	      break;
+	    default:
+	      output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+	      return;
+	    }
+	  return;
+
 	case ';':
 #if TARGET_MACHO
 	  fputs (" ; ", file);
@@ -12637,7 +12825,7 @@ ix86_expand_clear (rtx dest)
   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
 
   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
-  if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
+  if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
     {
       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
@@ -15247,9 +15435,10 @@ ix86_expand_int_movcc (rtx operands[])
 		tmp = gen_reg_rtx (mode);
 
 	      if (mode == DImode)
-		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
+		emit_insn (gen_x86_movdicc_0_m1 (tmp, compare_op));
 	      else
-		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
+		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
+						 compare_op));
 	    }
 	  else
 	    {
@@ -15828,6 +16017,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       x = gen_rtx_AND (mode, x, op_false);
       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
     }
+  else if (TARGET_XOP)
+    {
+      rtx pcmov = gen_rtx_SET (mode, dest,
+			       gen_rtx_IF_THEN_ELSE (mode, cmp,
+						     op_true,
+						     op_false));
+      emit_insn (pcmov);
+    }
   else
     {
       op_true = force_reg (mode, op_true);
@@ -15950,6 +16147,9 @@ ix86_expand_int_vcond (rtx operands[])
   cop0 = operands[4];
   cop1 = operands[5];
 
+  /* XOP supports all of the comparisons on all vector int types.  */
+  if (!TARGET_XOP)
+    {
   /* Canonicalize the comparison to EQ, GT, GTU.  */
   switch (code)
     {
@@ -16060,6 +16260,7 @@ ix86_expand_int_vcond (rtx operands[])
       cop0 = x;
       cop1 = CONST0_RTX (mode);
     }
+    }
 
   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
 			   operands[1+negate], operands[2-negate]);
@@ -16171,6 +16372,7 @@ int
 ix86_expand_int_addcc (rtx operands[])
 {
   enum rtx_code code = GET_CODE (operands[1]);
+  rtx (*insn)(rtx, rtx, rtx, rtx);
   rtx compare_op;
   rtx val = const0_rtx;
   bool fpcmp = false;
@@ -16211,16 +16413,16 @@ ix86_expand_int_addcc (rtx operands[])
       switch (GET_MODE (operands[0]))
 	{
 	  case QImode:
-            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
+	    insn = gen_subqi3_carry;
 	    break;
 	  case HImode:
-            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
+	    insn = gen_subhi3_carry;
 	    break;
 	  case SImode:
-            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
+	    insn = gen_subsi3_carry;
 	    break;
 	  case DImode:
-            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+	    insn = gen_subdi3_carry;
 	    break;
 	  default:
 	    gcc_unreachable ();
@@ -16231,21 +16433,23 @@ ix86_expand_int_addcc (rtx operands[])
       switch (GET_MODE (operands[0]))
 	{
 	  case QImode:
-            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
+	    insn = gen_addqi3_carry;
 	    break;
 	  case HImode:
-            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
+	    insn = gen_addhi3_carry;
 	    break;
 	  case SImode:
-            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
+	    insn = gen_addsi3_carry;
 	    break;
 	  case DImode:
-            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+	    insn = gen_adddi3_carry;
 	    break;
 	  default:
 	    gcc_unreachable ();
 	}
     }
+  emit_insn (insn (operands[0], operands[2], val, compare_op));
+
   return 1; /* DONE */
 }
 
@@ -19977,6 +20181,9 @@ ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
     }
 
 #ifdef ENABLE_EXECUTE_STACK
+#ifdef CHECK_EXECUTE_STACK_ENABLED
+  if (CHECK_EXECUTE_STACK_ENABLED)
+#endif
   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
 		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
 #endif
@@ -20712,7 +20919,7 @@ enum ix86_builtins
 
   IX86_BUILTIN_CVTUDQ2PS,
 
-  /* FMA4 instructions.  */
+  /* FMA4 and XOP instructions.  */
   IX86_BUILTIN_VFMADDSS,
   IX86_BUILTIN_VFMADDSD,
   IX86_BUILTIN_VFMADDPS,
@@ -20745,6 +20952,164 @@ enum ix86_builtins
   IX86_BUILTIN_VFNMADDPD256,
   IX86_BUILTIN_VFNMSUBPS256,
   IX86_BUILTIN_VFNMSUBPD256,
+
+  IX86_BUILTIN_VPCMOV,
+  IX86_BUILTIN_VPCMOV_V2DI,
+  IX86_BUILTIN_VPCMOV_V4SI,
+  IX86_BUILTIN_VPCMOV_V8HI,
+  IX86_BUILTIN_VPCMOV_V16QI,
+  IX86_BUILTIN_VPCMOV_V4SF,
+  IX86_BUILTIN_VPCMOV_V2DF,
+  IX86_BUILTIN_VPCMOV256,
+  IX86_BUILTIN_VPCMOV_V4DI256,
+  IX86_BUILTIN_VPCMOV_V8SI256,
+  IX86_BUILTIN_VPCMOV_V16HI256,
+  IX86_BUILTIN_VPCMOV_V32QI256,
+  IX86_BUILTIN_VPCMOV_V8SF256,
+  IX86_BUILTIN_VPCMOV_V4DF256,
+
+  IX86_BUILTIN_VPPERM,
+
+  IX86_BUILTIN_VPMACSSWW,
+  IX86_BUILTIN_VPMACSWW,
+  IX86_BUILTIN_VPMACSSWD,
+  IX86_BUILTIN_VPMACSWD,
+  IX86_BUILTIN_VPMACSSDD,
+  IX86_BUILTIN_VPMACSDD,
+  IX86_BUILTIN_VPMACSSDQL,
+  IX86_BUILTIN_VPMACSSDQH,
+  IX86_BUILTIN_VPMACSDQL,
+  IX86_BUILTIN_VPMACSDQH,
+  IX86_BUILTIN_VPMADCSSWD,
+  IX86_BUILTIN_VPMADCSWD,
+
+  IX86_BUILTIN_VPHADDBW,
+  IX86_BUILTIN_VPHADDBD,
+  IX86_BUILTIN_VPHADDBQ,
+  IX86_BUILTIN_VPHADDWD,
+  IX86_BUILTIN_VPHADDWQ,
+  IX86_BUILTIN_VPHADDDQ,
+  IX86_BUILTIN_VPHADDUBW,
+  IX86_BUILTIN_VPHADDUBD,
+  IX86_BUILTIN_VPHADDUBQ,
+  IX86_BUILTIN_VPHADDUWD,
+  IX86_BUILTIN_VPHADDUWQ,
+  IX86_BUILTIN_VPHADDUDQ,
+  IX86_BUILTIN_VPHSUBBW,
+  IX86_BUILTIN_VPHSUBWD,
+  IX86_BUILTIN_VPHSUBDQ,
+
+  IX86_BUILTIN_VPROTB,
+  IX86_BUILTIN_VPROTW,
+  IX86_BUILTIN_VPROTD,
+  IX86_BUILTIN_VPROTQ,
+  IX86_BUILTIN_VPROTB_IMM,
+  IX86_BUILTIN_VPROTW_IMM,
+  IX86_BUILTIN_VPROTD_IMM,
+  IX86_BUILTIN_VPROTQ_IMM,
+
+  IX86_BUILTIN_VPSHLB,
+  IX86_BUILTIN_VPSHLW,
+  IX86_BUILTIN_VPSHLD,
+  IX86_BUILTIN_VPSHLQ,
+  IX86_BUILTIN_VPSHAB,
+  IX86_BUILTIN_VPSHAW,
+  IX86_BUILTIN_VPSHAD,
+  IX86_BUILTIN_VPSHAQ,
+
+  IX86_BUILTIN_VFRCZSS,
+  IX86_BUILTIN_VFRCZSD,
+  IX86_BUILTIN_VFRCZPS,
+  IX86_BUILTIN_VFRCZPD,
+  IX86_BUILTIN_VFRCZPS256,
+  IX86_BUILTIN_VFRCZPD256,
+
+  IX86_BUILTIN_VPCOMEQUB,
+  IX86_BUILTIN_VPCOMNEUB,
+  IX86_BUILTIN_VPCOMLTUB,
+  IX86_BUILTIN_VPCOMLEUB,
+  IX86_BUILTIN_VPCOMGTUB,
+  IX86_BUILTIN_VPCOMGEUB,
+  IX86_BUILTIN_VPCOMFALSEUB,
+  IX86_BUILTIN_VPCOMTRUEUB,
+
+  IX86_BUILTIN_VPCOMEQUW,
+  IX86_BUILTIN_VPCOMNEUW,
+  IX86_BUILTIN_VPCOMLTUW,
+  IX86_BUILTIN_VPCOMLEUW,
+  IX86_BUILTIN_VPCOMGTUW,
+  IX86_BUILTIN_VPCOMGEUW,
+  IX86_BUILTIN_VPCOMFALSEUW,
+  IX86_BUILTIN_VPCOMTRUEUW,
+
+  IX86_BUILTIN_VPCOMEQUD,
+  IX86_BUILTIN_VPCOMNEUD,
+  IX86_BUILTIN_VPCOMLTUD,
+  IX86_BUILTIN_VPCOMLEUD,
+  IX86_BUILTIN_VPCOMGTUD,
+  IX86_BUILTIN_VPCOMGEUD,
+  IX86_BUILTIN_VPCOMFALSEUD,
+  IX86_BUILTIN_VPCOMTRUEUD,
+
+  IX86_BUILTIN_VPCOMEQUQ,
+  IX86_BUILTIN_VPCOMNEUQ,
+  IX86_BUILTIN_VPCOMLTUQ,
+  IX86_BUILTIN_VPCOMLEUQ,
+  IX86_BUILTIN_VPCOMGTUQ,
+  IX86_BUILTIN_VPCOMGEUQ,
+  IX86_BUILTIN_VPCOMFALSEUQ,
+  IX86_BUILTIN_VPCOMTRUEUQ,
+
+  IX86_BUILTIN_VPCOMEQB,
+  IX86_BUILTIN_VPCOMNEB,
+  IX86_BUILTIN_VPCOMLTB,
+  IX86_BUILTIN_VPCOMLEB,
+  IX86_BUILTIN_VPCOMGTB,
+  IX86_BUILTIN_VPCOMGEB,
+  IX86_BUILTIN_VPCOMFALSEB,
+  IX86_BUILTIN_VPCOMTRUEB,
+
+  IX86_BUILTIN_VPCOMEQW,
+  IX86_BUILTIN_VPCOMNEW,
+  IX86_BUILTIN_VPCOMLTW,
+  IX86_BUILTIN_VPCOMLEW,
+  IX86_BUILTIN_VPCOMGTW,
+  IX86_BUILTIN_VPCOMGEW,
+  IX86_BUILTIN_VPCOMFALSEW,
+  IX86_BUILTIN_VPCOMTRUEW,
+
+  IX86_BUILTIN_VPCOMEQD,
+  IX86_BUILTIN_VPCOMNED,
+  IX86_BUILTIN_VPCOMLTD,
+  IX86_BUILTIN_VPCOMLED,
+  IX86_BUILTIN_VPCOMGTD,
+  IX86_BUILTIN_VPCOMGED,
+  IX86_BUILTIN_VPCOMFALSED,
+  IX86_BUILTIN_VPCOMTRUED,
+
+  IX86_BUILTIN_VPCOMEQQ,
+  IX86_BUILTIN_VPCOMNEQ,
+  IX86_BUILTIN_VPCOMLTQ,
+  IX86_BUILTIN_VPCOMLEQ,
+  IX86_BUILTIN_VPCOMGTQ,
+  IX86_BUILTIN_VPCOMGEQ,
+  IX86_BUILTIN_VPCOMFALSEQ,
+  IX86_BUILTIN_VPCOMTRUEQ,
+
+  /* LWP instructions.  */
+  IX86_BUILTIN_LLWPCB16,
+  IX86_BUILTIN_LLWPCB32,
+  IX86_BUILTIN_LLWPCB64,
+  IX86_BUILTIN_SLWPCB16,
+  IX86_BUILTIN_SLWPCB32,
+  IX86_BUILTIN_SLWPCB64,
+  IX86_BUILTIN_LWPVAL16,
+  IX86_BUILTIN_LWPVAL32,
+  IX86_BUILTIN_LWPVAL64,
+  IX86_BUILTIN_LWPINS16,
+  IX86_BUILTIN_LWPINS32,
+  IX86_BUILTIN_LWPINS64,
+
   IX86_BUILTIN_MAX
 };
 
@@ -20958,7 +21323,13 @@ enum ix86_special_builtin_type
   VOID_FTYPE_PV8SF_V8SF_V8SF,
   VOID_FTYPE_PV4DF_V4DF_V4DF,
   VOID_FTYPE_PV4SF_V4SF_V4SF,
-  VOID_FTYPE_PV2DF_V2DF_V2DF
+  VOID_FTYPE_PV2DF_V2DF_V2DF,
+  VOID_FTYPE_USHORT_UINT_USHORT,
+  VOID_FTYPE_UINT_UINT_UINT,
+  VOID_FTYPE_UINT64_UINT_UINT,
+  UCHAR_FTYPE_USHORT_UINT_USHORT,
+  UCHAR_FTYPE_UINT_UINT_UINT,
+  UCHAR_FTYPE_UINT64_UINT_UINT
 };
 
 /* Builtin types */
@@ -21205,6 +21576,22 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbhi1,   "__builtin_ia32_llwpcb16",   IX86_BUILTIN_LLWPCB16,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbsi1,   "__builtin_ia32_llwpcb32",   IX86_BUILTIN_LLWPCB32,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbdi1,   "__builtin_ia32_llwpcb64",   IX86_BUILTIN_LLWPCB64,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbhi1,   "__builtin_ia32_slwpcb16",   IX86_BUILTIN_SLWPCB16,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbsi1,   "__builtin_ia32_slwpcb32",   IX86_BUILTIN_SLWPCB32,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbdi1,   "__builtin_ia32_slwpcb64",   IX86_BUILTIN_SLWPCB64,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalhi3,   "__builtin_ia32_lwpval16", IX86_BUILTIN_LWPVAL16,  UNKNOWN,     (int) VOID_FTYPE_USHORT_UINT_USHORT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3,   "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL64,  UNKNOWN,     (int) VOID_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3,   "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64,  UNKNOWN,     (int) VOID_FTYPE_UINT64_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinshi3,   "__builtin_ia32_lwpins16", IX86_BUILTIN_LWPINS16,  UNKNOWN,     (int) UCHAR_FTYPE_USHORT_UINT_USHORT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3,   "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64,  UNKNOWN,     (int) UCHAR_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3,   "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64,  UNKNOWN,     (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+
 };
 
 /* Builtins with variable number of arguments.  */
@@ -21818,13 +22205,58 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
 };
 
-/* FMA4.  */
+/* FMA4 and XOP.  */
 enum multi_arg_type {
   MULTI_ARG_UNKNOWN,
   MULTI_ARG_3_SF,
   MULTI_ARG_3_DF,
   MULTI_ARG_3_SF2,
-  MULTI_ARG_3_DF2
+  MULTI_ARG_3_DF2,
+  MULTI_ARG_3_DI,
+  MULTI_ARG_3_SI,
+  MULTI_ARG_3_SI_DI,
+  MULTI_ARG_3_HI,
+  MULTI_ARG_3_HI_SI,
+  MULTI_ARG_3_QI,
+  MULTI_ARG_3_DI2,
+  MULTI_ARG_3_SI2,
+  MULTI_ARG_3_HI2,
+  MULTI_ARG_3_QI2,
+  MULTI_ARG_2_SF,
+  MULTI_ARG_2_DF,
+  MULTI_ARG_2_DI,
+  MULTI_ARG_2_SI,
+  MULTI_ARG_2_HI,
+  MULTI_ARG_2_QI,
+  MULTI_ARG_2_DI_IMM,
+  MULTI_ARG_2_SI_IMM,
+  MULTI_ARG_2_HI_IMM,
+  MULTI_ARG_2_QI_IMM,
+  MULTI_ARG_2_DI_CMP,
+  MULTI_ARG_2_SI_CMP,
+  MULTI_ARG_2_HI_CMP,
+  MULTI_ARG_2_QI_CMP,
+  MULTI_ARG_2_DI_TF,
+  MULTI_ARG_2_SI_TF,
+  MULTI_ARG_2_HI_TF,
+  MULTI_ARG_2_QI_TF,
+  MULTI_ARG_2_SF_TF,
+  MULTI_ARG_2_DF_TF,
+  MULTI_ARG_1_SF,
+  MULTI_ARG_1_DF,
+  MULTI_ARG_1_SF2,
+  MULTI_ARG_1_DF2,
+  MULTI_ARG_1_DI,
+  MULTI_ARG_1_SI,
+  MULTI_ARG_1_HI,
+  MULTI_ARG_1_QI,
+  MULTI_ARG_1_SI_DI,
+  MULTI_ARG_1_HI_DI,
+  MULTI_ARG_1_HI_SI,
+  MULTI_ARG_1_QI_DI,
+  MULTI_ARG_1_QI_SI,
+  MULTI_ARG_1_QI_HI
+
 };
 
 static const struct builtin_description bdesc_multi_arg[] =
@@ -21865,7 +22297,160 @@ static const struct builtin_description bdesc_multi_arg[] =
   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4,	   "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4,	   "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4,	   "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4,	   "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 }
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4,	   "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,	 UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_2_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_2_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
 
 };
 
@@ -22247,51 +22832,6 @@ ix86_init_mmx_sse_builtins (void)
 				integer_type_node,
 				NULL_TREE);
 
-
-  tree v2di_ftype_v2di
-    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
-
-  tree v16qi_ftype_v8hi_v8hi
-    = build_function_type_list (V16QI_type_node,
-				V8HI_type_node, V8HI_type_node,
-				NULL_TREE);
-  tree v8hi_ftype_v4si_v4si
-    = build_function_type_list (V8HI_type_node,
-				V4SI_type_node, V4SI_type_node,
-				NULL_TREE);
-  tree v8hi_ftype_v16qi_v16qi 
-    = build_function_type_list (V8HI_type_node,
-				V16QI_type_node, V16QI_type_node,
-				NULL_TREE);
-  tree v4hi_ftype_v8qi_v8qi 
-    = build_function_type_list (V4HI_type_node,
-				V8QI_type_node, V8QI_type_node,
-				NULL_TREE);
-  tree unsigned_ftype_unsigned_uchar
-    = build_function_type_list (unsigned_type_node,
-				unsigned_type_node,
-				unsigned_char_type_node,
-				NULL_TREE);
-  tree unsigned_ftype_unsigned_ushort
-    = build_function_type_list (unsigned_type_node,
-				unsigned_type_node,
-				short_unsigned_type_node,
-				NULL_TREE);
-  tree unsigned_ftype_unsigned_unsigned
-    = build_function_type_list (unsigned_type_node,
-				unsigned_type_node,
-				unsigned_type_node,
-				NULL_TREE);
-  tree uint64_ftype_uint64_uint64
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				NULL_TREE);
-  tree float_ftype_float
-    = build_function_type_list (float_type_node,
-				float_type_node,
-				NULL_TREE);
-
   /* AVX builtins  */
   tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
 						     V32QImode);
@@ -22303,6 +22843,8 @@ ix86_init_mmx_sse_builtins (void)
 						    V4DImode);
   tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
 						    V4DFmode);
+  tree V16HI_type_node = build_vector_type_for_mode (intHI_type_node,
+						     V16HImode);
   tree v8sf_ftype_v8sf
     = build_function_type_list (V8SF_type_node,
 				V8SF_type_node,
@@ -22547,6 +23089,138 @@ ix86_init_mmx_sse_builtins (void)
     = build_function_type_list (V2DF_type_node,
 				V2DF_type_node, V2DI_type_node, NULL_TREE);
 
+  /* XOP instructions */
+  tree v2di_ftype_v2di_v2di_v2di
+    = build_function_type_list (V2DI_type_node,
+				V2DI_type_node,
+				V2DI_type_node,
+				V2DI_type_node,
+				NULL_TREE);
+
+  tree v4di_ftype_v4di_v4di_v4di
+    = build_function_type_list (V4DI_type_node,
+				V4DI_type_node,
+				V4DI_type_node,
+				V4DI_type_node,
+				NULL_TREE);
+
+  tree v4si_ftype_v4si_v4si_v4si
+    = build_function_type_list (V4SI_type_node,
+				V4SI_type_node,
+				V4SI_type_node,
+				V4SI_type_node,
+				NULL_TREE);
+
+  tree v8si_ftype_v8si_v8si_v8si
+    = build_function_type_list (V8SI_type_node,
+				V8SI_type_node,
+				V8SI_type_node,
+				V8SI_type_node,
+				NULL_TREE);
+
+  tree v32qi_ftype_v32qi_v32qi_v32qi
+    = build_function_type_list (V32QI_type_node,
+				V32QI_type_node,
+				V32QI_type_node,
+				V32QI_type_node,
+				NULL_TREE);
+
+  tree v4si_ftype_v4si_v4si_v2di
+    = build_function_type_list (V4SI_type_node,
+				V4SI_type_node,
+				V4SI_type_node,
+				V2DI_type_node,
+				NULL_TREE);
+
+  tree v8hi_ftype_v8hi_v8hi_v8hi
+    = build_function_type_list (V8HI_type_node,
+				V8HI_type_node,
+				V8HI_type_node,
+				V8HI_type_node,
+				NULL_TREE);
+
+  tree v16hi_ftype_v16hi_v16hi_v16hi
+    = build_function_type_list (V16HI_type_node,
+				V16HI_type_node,
+				V16HI_type_node,
+				V16HI_type_node,
+				NULL_TREE);
+
+  tree v8hi_ftype_v8hi_v8hi_v4si
+    = build_function_type_list (V8HI_type_node,
+				V8HI_type_node,
+				V8HI_type_node,
+				V4SI_type_node,
+				NULL_TREE);
+
+  tree v2di_ftype_v2di_si
+    = build_function_type_list (V2DI_type_node,
+				V2DI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v4si_ftype_v4si_si
+    = build_function_type_list (V4SI_type_node,
+				V4SI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v8hi_ftype_v8hi_si
+    = build_function_type_list (V8HI_type_node,
+				V8HI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v16qi_ftype_v16qi_si
+    = build_function_type_list (V16QI_type_node,
+				V16QI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v2di_ftype_v2di
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
+
+  tree v16qi_ftype_v8hi_v8hi
+    = build_function_type_list (V16QI_type_node,
+				V8HI_type_node, V8HI_type_node,
+				NULL_TREE);
+  tree v8hi_ftype_v4si_v4si
+    = build_function_type_list (V8HI_type_node,
+				V4SI_type_node, V4SI_type_node,
+				NULL_TREE);
+  tree v8hi_ftype_v16qi_v16qi 
+    = build_function_type_list (V8HI_type_node,
+				V16QI_type_node, V16QI_type_node,
+				NULL_TREE);
+  tree v4hi_ftype_v8qi_v8qi 
+    = build_function_type_list (V4HI_type_node,
+				V8QI_type_node, V8QI_type_node,
+				NULL_TREE);
+  tree unsigned_ftype_unsigned_uchar
+    = build_function_type_list (unsigned_type_node,
+				unsigned_type_node,
+				unsigned_char_type_node,
+				NULL_TREE);
+  tree unsigned_ftype_unsigned_ushort
+    = build_function_type_list (unsigned_type_node,
+				unsigned_type_node,
+				short_unsigned_type_node,
+				NULL_TREE);
+  tree unsigned_ftype_unsigned_unsigned
+    = build_function_type_list (unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+  tree uint64_ftype_uint64_uint64
+    = build_function_type_list (long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				NULL_TREE);
+  tree float_ftype_float
+    = build_function_type_list (float_type_node,
+				float_type_node,
+				NULL_TREE);
+
   /* Integer intrinsics.  */
   tree uint64_ftype_void
     = build_function_type (long_long_unsigned_type_node,
@@ -22576,6 +23250,50 @@ ix86_init_mmx_sse_builtins (void)
 				integer_type_node,
 				NULL_TREE);
 
+  /* LWP instructions.  */
+
+  tree void_ftype_ushort_unsigned_ushort
+    = build_function_type_list (void_type_node,
+				short_unsigned_type_node,
+				unsigned_type_node,
+				short_unsigned_type_node,
+				NULL_TREE);
+
+  tree void_ftype_unsigned_unsigned_unsigned
+    = build_function_type_list (void_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
+  tree void_ftype_uint64_unsigned_unsigned
+    = build_function_type_list (void_type_node,
+				long_long_unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
+  tree uchar_ftype_ushort_unsigned_ushort
+    = build_function_type_list (unsigned_char_type_node,
+				short_unsigned_type_node,
+				unsigned_type_node,
+				short_unsigned_type_node,
+				NULL_TREE);
+
+  tree uchar_ftype_unsigned_unsigned_unsigned
+    = build_function_type_list (unsigned_char_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
+  tree uchar_ftype_uint64_unsigned_unsigned
+    = build_function_type_list (unsigned_char_type_node,
+				long_long_unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
   tree ftype;
 
   /* Add all special builtins with variable number of operands.  */
@@ -22689,6 +23407,25 @@ ix86_init_mmx_sse_builtins (void)
 	case VOID_FTYPE_PV2DF_V2DF_V2DF:
 	  type = void_ftype_pv2df_v2df_v2df;
 	  break;
+	case VOID_FTYPE_USHORT_UINT_USHORT:
+	  type = void_ftype_ushort_unsigned_ushort;
+	  break;
+	case VOID_FTYPE_UINT_UINT_UINT:
+	  type = void_ftype_unsigned_unsigned_unsigned;
+	  break;
+	case VOID_FTYPE_UINT64_UINT_UINT:
+	  type = void_ftype_uint64_unsigned_unsigned;
+	  break;
+	case UCHAR_FTYPE_USHORT_UINT_USHORT:
+	  type = uchar_ftype_ushort_unsigned_ushort;
+	  break;
+	case UCHAR_FTYPE_UINT_UINT_UINT:
+	  type = uchar_ftype_unsigned_unsigned_unsigned;
+	  break;
+	case UCHAR_FTYPE_UINT64_UINT_UINT:
+	  type = uchar_ftype_uint64_unsigned_unsigned;
+	  break;
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -23315,6 +24052,50 @@ ix86_init_mmx_sse_builtins (void)
 	case MULTI_ARG_3_DF:     mtype = v2df_ftype_v2df_v2df_v2df; 	break;
 	case MULTI_ARG_3_SF2:    mtype = v8sf_ftype_v8sf_v8sf_v8sf; 	break;
 	case MULTI_ARG_3_DF2:    mtype = v4df_ftype_v4df_v4df_v4df; 	break;
+	case MULTI_ARG_3_DI:     mtype = v2di_ftype_v2di_v2di_v2di; 	break;
+	case MULTI_ARG_3_SI:     mtype = v4si_ftype_v4si_v4si_v4si; 	break;
+	case MULTI_ARG_3_SI_DI:  mtype = v4si_ftype_v4si_v4si_v2di; 	break;
+	case MULTI_ARG_3_HI:     mtype = v8hi_ftype_v8hi_v8hi_v8hi; 	break;
+	case MULTI_ARG_3_HI_SI:  mtype = v8hi_ftype_v8hi_v8hi_v4si; 	break;
+	case MULTI_ARG_3_QI:     mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
+	case MULTI_ARG_3_DI2:    mtype = v4di_ftype_v4di_v4di_v4di; 	break;
+	case MULTI_ARG_3_SI2:    mtype = v8si_ftype_v8si_v8si_v8si; 	break;
+	case MULTI_ARG_3_HI2:    mtype = v16hi_ftype_v16hi_v16hi_v16hi; break;
+	case MULTI_ARG_3_QI2:    mtype = v32qi_ftype_v32qi_v32qi_v32qi; break;
+	case MULTI_ARG_2_SF:     mtype = v4sf_ftype_v4sf_v4sf;      	break;
+	case MULTI_ARG_2_DF:     mtype = v2df_ftype_v2df_v2df;      	break;
+	case MULTI_ARG_2_DI:     mtype = v2di_ftype_v2di_v2di;      	break;
+	case MULTI_ARG_2_SI:     mtype = v4si_ftype_v4si_v4si;      	break;
+	case MULTI_ARG_2_HI:     mtype = v8hi_ftype_v8hi_v8hi;      	break;
+	case MULTI_ARG_2_QI:     mtype = v16qi_ftype_v16qi_v16qi;      	break;
+	case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si;        	break;
+	case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si;        	break;
+	case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si;        	break;
+	case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si;        	break;
+	case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di;      	break;
+	case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si;      	break;
+	case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi;      	break;
+	case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi;      	break;
+	case MULTI_ARG_2_SF_TF:  mtype = v4sf_ftype_v4sf_v4sf;      	break;
+	case MULTI_ARG_2_DF_TF:  mtype = v2df_ftype_v2df_v2df;      	break;
+	case MULTI_ARG_2_DI_TF:  mtype = v2di_ftype_v2di_v2di;      	break;
+	case MULTI_ARG_2_SI_TF:  mtype = v4si_ftype_v4si_v4si;      	break;
+	case MULTI_ARG_2_HI_TF:  mtype = v8hi_ftype_v8hi_v8hi;      	break;
+	case MULTI_ARG_2_QI_TF:  mtype = v16qi_ftype_v16qi_v16qi;      	break;
+	case MULTI_ARG_1_SF:     mtype = v4sf_ftype_v4sf;           	break;
+	case MULTI_ARG_1_DF:     mtype = v2df_ftype_v2df;           	break;
+	case MULTI_ARG_1_SF2:    mtype = v8sf_ftype_v8sf;           	break;
+	case MULTI_ARG_1_DF2:    mtype = v4df_ftype_v4df;           	break;
+	case MULTI_ARG_1_DI:     mtype = v2di_ftype_v2di;           	break;
+	case MULTI_ARG_1_SI:     mtype = v4si_ftype_v4si;           	break;
+	case MULTI_ARG_1_HI:     mtype = v8hi_ftype_v8hi;           	break;
+	case MULTI_ARG_1_QI:     mtype = v16qi_ftype_v16qi;           	break;
+	case MULTI_ARG_1_SI_DI:  mtype = v2di_ftype_v4si;           	break;
+	case MULTI_ARG_1_HI_DI:  mtype = v2di_ftype_v8hi;           	break;
+	case MULTI_ARG_1_HI_SI:  mtype = v4si_ftype_v8hi;           	break;
+	case MULTI_ARG_1_QI_DI:  mtype = v2di_ftype_v16qi;           	break;
+	case MULTI_ARG_1_QI_SI:  mtype = v4si_ftype_v16qi;           	break;
+	case MULTI_ARG_1_QI_HI:  mtype = v8hi_ftype_v16qi;           	break;
 
 	case MULTI_ARG_UNKNOWN:
 	default:
@@ -23440,6 +24221,17 @@ ix86_init_builtins (void)
     ix86_init_builtins_va_builtins_abi ();
 }
 
+/* Return the ix86 builtin for CODE.  */
+
+static tree
+ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= IX86_BUILTIN_MAX)
+    return error_mark_node;
+
+  return ix86_builtins[code];
+}
+
 /* Errors in the source file can cause expand_expr to return const0_rtx
    where we expect a vector.  To avoid crashing, use one of the vector
    clear instructions.  */
@@ -23523,9 +24315,71 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
     case MULTI_ARG_3_DF:
     case MULTI_ARG_3_SF2:
     case MULTI_ARG_3_DF2:
+    case MULTI_ARG_3_DI:
+    case MULTI_ARG_3_SI:
+    case MULTI_ARG_3_SI_DI:
+    case MULTI_ARG_3_HI:
+    case MULTI_ARG_3_HI_SI:
+    case MULTI_ARG_3_QI:
+    case MULTI_ARG_3_DI2:
+    case MULTI_ARG_3_SI2:
+    case MULTI_ARG_3_HI2:
+    case MULTI_ARG_3_QI2:
       nargs = 3;
       break;
 
+    case MULTI_ARG_2_SF:
+    case MULTI_ARG_2_DF:
+    case MULTI_ARG_2_DI:
+    case MULTI_ARG_2_SI:
+    case MULTI_ARG_2_HI:
+    case MULTI_ARG_2_QI:
+      nargs = 2;
+      break;
+
+    case MULTI_ARG_2_DI_IMM:
+    case MULTI_ARG_2_SI_IMM:
+    case MULTI_ARG_2_HI_IMM:
+    case MULTI_ARG_2_QI_IMM:
+      nargs = 2;
+      last_arg_constant = true;
+      break;
+
+    case MULTI_ARG_1_SF:
+    case MULTI_ARG_1_DF:
+    case MULTI_ARG_1_SF2:
+    case MULTI_ARG_1_DF2:
+    case MULTI_ARG_1_DI:
+    case MULTI_ARG_1_SI:
+    case MULTI_ARG_1_HI:
+    case MULTI_ARG_1_QI:
+    case MULTI_ARG_1_SI_DI:
+    case MULTI_ARG_1_HI_DI:
+    case MULTI_ARG_1_HI_SI:
+    case MULTI_ARG_1_QI_DI:
+    case MULTI_ARG_1_QI_SI:
+    case MULTI_ARG_1_QI_HI:
+      nargs = 1;
+      break;
+
+    case MULTI_ARG_2_DI_CMP:
+    case MULTI_ARG_2_SI_CMP:
+    case MULTI_ARG_2_HI_CMP:
+    case MULTI_ARG_2_QI_CMP:
+      nargs = 2;
+      comparison_p = true;
+      break;
+
+    case MULTI_ARG_2_SF_TF:
+    case MULTI_ARG_2_DF_TF:
+    case MULTI_ARG_2_DI_TF:
+    case MULTI_ARG_2_SI_TF:
+    case MULTI_ARG_2_HI_TF:
+    case MULTI_ARG_2_QI_TF:
+      nargs = 2;
+      tf_p = true;
+      break;
+
     case MULTI_ARG_UNKNOWN:
     default:
       gcc_unreachable ();
@@ -24463,6 +25317,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       /* Reserve memory operand for target.  */
       memory = ARRAY_SIZE (args);
       break;
+    case VOID_FTYPE_USHORT_UINT_USHORT:
+    case VOID_FTYPE_UINT_UINT_UINT:
+    case VOID_FTYPE_UINT64_UINT_UINT:
+    case UCHAR_FTYPE_USHORT_UINT_USHORT:
+    case UCHAR_FTYPE_UINT_UINT_UINT:
+    case UCHAR_FTYPE_UINT64_UINT_UINT:
+      nargs = 3;
+      klass = store;
+      memory = 0;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -25206,7 +26070,7 @@ static tree
 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
 			 bool sqrt ATTRIBUTE_UNUSED)
 {
-  if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+  if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
 	 && flag_finite_math_only && !flag_trapping_math
 	 && flag_unsafe_math_optimizations))
     return NULL_TREE;
@@ -26455,6 +27319,33 @@ ix86_handle_struct_attribute (tree *node, tree name,
   return NULL_TREE;
 }
 
+static tree
+ix86_handle_fndecl_attribute (tree *node, tree name,
+                              tree args ATTRIBUTE_UNUSED,
+                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+               name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  if (TARGET_64BIT)
+    {
+      warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
+               name);
+      return NULL_TREE;
+    }
+
+#ifndef HAVE_AS_IX86_SWAP
+  sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
+#endif
+
+    return NULL_TREE;
+}
+
 static bool
 ix86_ms_bitfield_layout_p (const_tree record_type)
 {
@@ -28520,18 +29411,18 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
   emit_insn (gen_rtx_SET (VOIDmode, x0,
 			  gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
 					  UNSPEC_RCP)));
-  /* e0 = x0 * b */
+  /* e0 = x0 * a */
   emit_insn (gen_rtx_SET (VOIDmode, e0,
-			  gen_rtx_MULT (mode, x0, b)));
-  /* e1 = 2. - e0 */
+			  gen_rtx_MULT (mode, x0, a)));
+  /* e1 = x0 * b */
   emit_insn (gen_rtx_SET (VOIDmode, e1,
-			  gen_rtx_MINUS (mode, two, e0)));
-  /* x1 = x0 * e1 */
+			  gen_rtx_MULT (mode, x0, b)));
+  /* x1 = 2. - e1 */
   emit_insn (gen_rtx_SET (VOIDmode, x1,
-			  gen_rtx_MULT (mode, x0, e1)));
-  /* res = a * x1 */
+			  gen_rtx_MINUS (mode, two, e1)));
+  /* res = e0 * x1 */
   emit_insn (gen_rtx_SET (VOIDmode, res,
-			  gen_rtx_MULT (mode, a, x1)));
+			  gen_rtx_MULT (mode, e0, x1)));
 }
 
 /* Output code to perform a Newton-Rhapson approximation of a
@@ -29497,6 +30388,7 @@ static const struct attribute_spec ix86_attribute_table[] =
   /* ms_abi and sysv_abi calling convention function attributes.  */
   { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
   { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
+  { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
   /* End element.  */
   { NULL,        0, 0, false, false, false, NULL }
 };
@@ -29663,6 +30555,8 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
 
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS ix86_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL ix86_builtin_decl
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8d525727eec..4bc8ef18500 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_FMA	OPTION_ISA_FMA
 #define TARGET_SSE4A	OPTION_ISA_SSE4A
 #define TARGET_FMA4	OPTION_ISA_FMA4
+#define TARGET_XOP	OPTION_ISA_XOP
+#define TARGET_LWP	OPTION_ISA_LWP
 #define TARGET_ROUND	OPTION_ISA_ROUND
 #define TARGET_ABM	OPTION_ISA_ABM
 #define TARGET_POPCNT	OPTION_ISA_POPCNT
@@ -400,6 +402,7 @@ enum ix86_arch_indices {
   X86_ARCH_CMPXCHG8B,
   X86_ARCH_XADD,
   X86_ARCH_BSWAP,
+  X86_ARCH_CALL_ESP,
 
   X86_ARCH_LAST
 };
@@ -411,6 +414,7 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST];
 #define TARGET_CMPXCHG8B	ix86_arch_features[X86_ARCH_CMPXCHG8B]
 #define TARGET_XADD		ix86_arch_features[X86_ARCH_XADD]
 #define TARGET_BSWAP		ix86_arch_features[X86_ARCH_BSWAP]
+#define TARGET_CALL_ESP		ix86_arch_features[X86_ARCH_CALL_ESP]
 
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 
@@ -706,9 +710,7 @@ enum target_cpu_default
    generate an alternate prologue and epilogue that realigns the
    runtime stack if nessary.  This supports mixing codes that keep a
    4-byte aligned stack, as specified by i386 psABI, with codes that
-   need a 16-byte aligned stack, as required by SSE instructions.  If
-   STACK_REALIGN_DEFAULT is 1 and PREFERRED_STACK_BOUNDARY_DEFAULT is
-   128, stacks for all functions may be realigned.  */
+   need a 16-byte aligned stack, as required by SSE instructions.  */
 #define STACK_REALIGN_DEFAULT 0
 
 /* Boundary (in *bits*) on which the incoming stack is aligned.  */
@@ -873,6 +875,9 @@ enum target_cpu_default
    || ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH))  \
    || (MODE) == XFmode)
 
+/* Cover class containing the stack registers.  */
+#define STACK_REG_COVER_CLASS FLOAT_REGS
+
 /* Number of actual hardware registers.
    The hardware registers are assigned numbers for the compiler
    from 0 to just below FIRST_PSEUDO_REGISTER.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 27800804eca..82f5352597c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -57,6 +57,7 @@
 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
 ;; & -- print some in-use local-dynamic symbol name.
 ;; H -- print a memory address offset by 8; used for sse high-parts
+;; Y -- print condition for XOP pcom* instruction.
 ;; + -- print a branch hint as 'cs' or 'ds' prefix
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
 
@@ -199,6 +200,15 @@
    (UNSPEC_FMA4_INTRINSIC	150)
    (UNSPEC_FMA4_FMADDSUB	151)
    (UNSPEC_FMA4_FMSUBADD	152)
+   (UNSPEC_XOP_UNSIGNED_CMP	151)
+   (UNSPEC_XOP_TRUEFALSE	152)
+   (UNSPEC_XOP_PERMUTE		153)
+   (UNSPEC_FRCZ			154)
+   (UNSPEC_LLWP_INTRINSIC	155)
+   (UNSPEC_SLWP_INTRINSIC	156)
+   (UNSPECV_LWPVAL_INTRINSIC	157)
+   (UNSPECV_LWPINS_INTRINSIC	158)
+
    ; For AES support
    (UNSPEC_AESENC		159)
    (UNSPEC_AESENCLAST		160)
@@ -241,6 +251,7 @@
    (UNSPECV_RDTSC		18)
    (UNSPECV_RDTSCP		19)
    (UNSPECV_RDPMC		20)
+   (UNSPECV_VSWAPMOV	21)
   ])
 
 ;; Constants to represent pcomtrue/pcomfalse variants
@@ -253,6 +264,20 @@
    (COM_TRUE_P			5)
   ])
 
+;; Constants used in the XOP pperm instruction
+(define_constants
+  [(PPERM_SRC			0x00)	/* copy source */
+   (PPERM_INVERT		0x20)	/* invert source */
+   (PPERM_REVERSE		0x40)	/* bit reverse source */
+   (PPERM_REV_INV		0x60)	/* bit reverse & invert src */
+   (PPERM_ZERO			0x80)	/* all 0's */
+   (PPERM_ONES			0xa0)	/* all 1's */
+   (PPERM_SIGN			0xc0)	/* propagate sign bit */
+   (PPERM_INV_SIGN		0xe0)	/* invert & propagate sign */
+   (PPERM_SRC1			0x00)	/* use first source byte */
+   (PPERM_SRC2			0x10)	/* use second source byte */
+   ])
+
 ;; Registers by name.
 (define_constants
   [(AX_REG			 0)
@@ -332,7 +357,7 @@
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
    sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
    sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
-   ssemuladd,sse4arg,
+   ssemuladd,sse4arg,lwp,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
@@ -702,12 +727,52 @@
 ;; Base name for x87 insn mnemonic.
 (define_code_attr absnegprefix [(abs "abs") (neg "chs")])
 
+;; Used in signed and unsigned widening multiplications.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; Used in signed and unsigned divisions.
+(define_code_iterator any_div [div udiv])
+
+;; Various insn prefixes for signed and unsigned operations.
+(define_code_attr u [(sign_extend "") (zero_extend "u")
+		     (div "") (udiv "u")])
+(define_code_attr s [(sign_extend "s") (zero_extend "u")])
+
+;; Instruction prefix for signed and unsigned operations.
+(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
+			     (div "i") (udiv "")])
+
 ;; All single word integer modes.
 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
 
 ;; Single word integer modes without QImode.
 (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
 
+;; Single word integer modes without QImode and HImode.
+(define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
+
+;; All math-dependant single and double word integer modes.
+(define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
+			     (HI "TARGET_HIMODE_MATH")
+			     SI DI (TI "TARGET_64BIT")])
+
+;; Math-dependant single word integer modes.
+(define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
+			    (HI "TARGET_HIMODE_MATH")
+			    SI (DI "TARGET_64BIT")])
+
+;; Math-dependant single word integer modes without QImode.
+(define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
+		      	       SI (DI "TARGET_64BIT")])
+
+;; Half mode for double word integer modes.
+(define_mode_iterator DWIH [(SI "!TARGET_64BIT")
+			    (DI "TARGET_64BIT")])
+
+;; Double word integer modes.
+(define_mode_attr DWI [(SI "DI") (DI "TI")])
+(define_mode_attr dwi [(SI "di") (DI "ti")])
+
 ;; Instruction suffix for integer modes.
 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
 
@@ -717,12 +782,19 @@
 ;; Immediate operand constraint for integer modes.
 (define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")])
 
+;; General operand constraint for word modes.
+(define_mode_attr g [(SI "g") (DI "rme")])
+
+;; Immediate operand constraint for double integer modes.
+(define_mode_attr di [(SI "iF") (DI "e")])
+
 ;; General operand predicate for integer modes.
 (define_mode_attr general_operand
 	[(QI "general_operand")
 	 (HI "general_operand")
 	 (SI "general_operand")
-	 (DI "x86_64_general_operand")])
+	 (DI "x86_64_general_operand")
+	 (TI "x86_64_general_operand")])
 
 ;; SSE and x87 SFmode and DFmode floating point modes
 (define_mode_iterator MODEF [SF DF])
@@ -752,7 +824,6 @@
 ;; This mode iterator allows :P to be used for patterns that operate on
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
-
 
 ;; Scheduling descriptions
 
@@ -2994,7 +3065,6 @@
   [(set (match_dup 0)
 	(match_dup 2))])
 
-
 ;; %%% Kill this when call knows how to work this out.
 (define_split
   [(set (match_operand:SF 0 "push_operand" "")
@@ -4040,9 +4110,11 @@
    && (TARGET_ZERO_EXTEND_WITH_AND
        && optimize_function_for_speed_p (cfun))
    && !reg_overlap_mentioned_p (operands[0], operands[1])"
-  [(set (match_dup 0) (const_int 0))
-   (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = gen_lowpart (QImode, operands[0]);")
+  [(set (strict_low_part (match_dup 2)) (match_dup 1))]
+{
+  operands[2] = gen_lowpart (QImode, operands[0]);
+  ix86_expand_clear (operands[0]);
+})
 
 ;; Rest is handled by single and.
 (define_split
@@ -4072,7 +4144,7 @@
   [(set_attr "type" "alu1")
    (set_attr "mode" "SI")])
 
-(define_insn "*zero_extendqisi2_movzbw_and"
+(define_insn "*zero_extendqisi2_movzbl_and"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
    (clobber (reg:CC FLAGS_REG))]
@@ -4081,7 +4153,7 @@
   [(set_attr "type" "imovx,alu1")
    (set_attr "mode" "SI")])
 
-(define_insn "*zero_extendqisi2_movzbw"
+(define_insn "*zero_extendqisi2_movzbl"
   [(set (match_operand:SI 0 "register_operand" "=r")
      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
   "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
@@ -4112,9 +4184,11 @@
    && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
    && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
    && !reg_overlap_mentioned_p (operands[0], operands[1])"
-  [(set (match_dup 0) (const_int 0))
-   (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = gen_lowpart (QImode, operands[0]);")
+  [(set (strict_low_part (match_dup 2)) (match_dup 1))]
+{
+  operands[2] = gen_lowpart (QImode, operands[0]);
+  ix86_expand_clear (operands[0]);
+})
 
 ;; Rest is handled by single and.
 (define_split
@@ -5426,11 +5500,18 @@
       && !X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode))
     {
       rtx reg = gen_reg_rtx (XFmode);
+      rtx insn;
+
       emit_insn (gen_float<SSEMODEI24:mode>xf2 (reg, operands[1]));
-/* Avoid references to nonexistent function in dead code in XFmode case.  */
-#define gen_truncxfxf2 gen_truncxfdf2
-      emit_insn (gen_truncxf<X87MODEF:mode>2 (operands[0], reg));
-#undef gen_truncxfxf2
+
+      if (<X87MODEF:MODE>mode == SFmode)
+	insn = gen_truncxfsf2 (operands[0], reg);
+      else if (<X87MODEF:MODE>mode == DFmode)
+	insn = gen_truncxfdf2 (operands[0], reg);
+      else
+	gcc_unreachable ();
+
+      emit_insn (insn);
       DONE;
     }
 }")
@@ -6046,195 +6127,57 @@
 
 ;; Add instructions
 
-;; %%% splits for addditi3
-
-(define_expand "addti3"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-	(plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-		 (match_operand:TI 2 "x86_64_general_operand" "")))]
-  "TARGET_64BIT"
-  "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;")
-
-(define_insn "*addti3_1"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
-	(plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0")
-		 (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)"
-  "#")
-
-(define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-	(plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-		 (match_operand:TI 2 "x86_64_general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
-  [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
-					  UNSPEC_ADD_CARRY))
-	      (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))])
-   (parallel [(set (match_dup 3)
-		   (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
-				     (match_dup 4))
-			    (match_dup 5)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
-
-;; %%% splits for addsidi3
-;  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-;	(plus:DI (match_operand:DI 1 "general_operand" "")
-;		 (zero_extend:DI (match_operand:SI 2 "general_operand" ""))))]
-
-(define_expand "adddi3"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
-		 (match_operand:DI 2 "x86_64_general_operand" "")))]
+(define_expand "add<mode>3"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand" "")
+	(plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "")
+		    (match_operand:SDWIM 2 "<general_operand>" "")))]
   ""
-  "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;")
+  "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")
 
-(define_insn "*adddi3_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
-	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
-		 (match_operand:DI 2 "general_operand" "roiF,riF")))
+(define_insn_and_split "*add<dwi>3_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(plus:<DWI>
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
+	  (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
-  "#")
-
-(define_split
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
-		 (match_operand:DI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && reload_completed"
-  [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
-					  UNSPEC_ADD_CARRY))
-	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (unspec:CC [(match_dup 1) (match_dup 2)]
+			      UNSPEC_ADD_CARRY))
+	      (set (match_dup 0)
+		   (plus:DWIH (match_dup 1) (match_dup 2)))])
    (parallel [(set (match_dup 3)
-		   (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
-				     (match_dup 4))
-			    (match_dup 5)))
+		   (plus:DWIH
+		     (plus:DWIH
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 4))
+		     (match_dup 5)))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "split_di (&operands[0], 3, &operands[0], &operands[3]);")
+  "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);")
 
-(define_insn "adddi3_carry_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
-	  (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
-			    (match_operand:DI 1 "nonimmediate_operand" "%0,0"))
-		   (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+(define_insn "add<mode>3_carry"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plus:SWI
+	  (plus:SWI (match_operand:SWI 3 "ix86_carry_flag_operator" "")
+		    (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
-  "adc{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "use_carry" "1")
-   (set_attr "pent_pair" "pu")
-   (set_attr "mode" "DI")])
-
-(define_insn "*adddi3_cc_rex64"
-  [(set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0")
-		    (match_operand:DI 2 "x86_64_general_operand" "re,rm")]
-		   UNSPEC_ADD_CARRY))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
-	(plus:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
-  "add{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "DI")])
-
-(define_insn "*<plusminus_insn><mode>3_cc_overflow"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-	    (plusminus:SWI
-		(match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
-		(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
-	    (match_dup 1)))
-   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
-	(plusminus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<plusminus_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*add<mode>3_cconly_overflow"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-		(plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0")
-			  (match_operand:SWI 2 "<general_operand>" "<r><i>m"))
-		(match_dup 1)))
-   (clobber (match_scratch:SWI 0 "=<r>"))]
   "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*sub<mode>3_cconly_overflow"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-	     (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
-			(match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
-	     (match_dup 0)))]
-  ""
-  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "icmp")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<plusminus_insn>si3_zext_cc_overflow"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-	    (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
-			  (match_operand:SI 2 "general_operand" "g"))
-	    (match_dup 1)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<plusminus_mnemonic>{l}\t{%2, %k0|%k0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "SI")])
-
-(define_insn "addqi3_carry"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
-	  (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
-			    (match_operand:QI 1 "nonimmediate_operand" "%0,0"))
-		   (match_operand:QI 2 "general_operand" "qn,qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, QImode, operands)"
-  "adc{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "use_carry" "1")
-   (set_attr "pent_pair" "pu")
-   (set_attr "mode" "QI")])
-
-(define_insn "addhi3_carry"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
-	  (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
-			    (match_operand:HI 1 "nonimmediate_operand" "%0,0"))
-		   (match_operand:HI 2 "general_operand" "rn,rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, HImode, operands)"
-  "adc{w}\t{%2, %0|%0, %2}"
+  "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
-   (set_attr "mode" "HI")])
-
-(define_insn "addsi3_carry"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
-	  (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
-			    (match_operand:SI 1 "nonimmediate_operand" "%0,0"))
-		   (match_operand:SI 2 "general_operand" "ri,rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, SImode, operands)"
-  "adc{l}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "use_carry" "1")
-   (set_attr "pent_pair" "pu")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*addsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
-	  (zero_extend:DI
-	    (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
-			      (match_operand:SI 1 "nonimmediate_operand" "%0"))
-		     (match_operand:SI 2 "general_operand" "g"))))
+	(zero_extend:DI
+	  (plus:SI
+	    (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+		     (match_operand:SI 1 "nonimmediate_operand" "%0"))
+	  (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
   "adc{l}\t{%2, %k0|%k0, %2}"
@@ -6243,23 +6186,25 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
-(define_insn "*addsi3_cc"
+(define_insn "*add<mode>3_cc"
   [(set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")
-		    (match_operand:SI 2 "general_operand" "ri,rm")]
-		   UNSPEC_ADD_CARRY))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
-	(plus:SI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (PLUS, SImode, operands)"
-  "add{l}\t{%2, %0|%0, %2}"
+	(unspec:CC
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:SWI48 2 "<general_operand>" "r<i>,rm")]
+	  UNSPEC_ADD_CARRY))
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(plus:SWI48 (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "addqi3_cc"
   [(set (reg:CC FLAGS_REG)
-	(unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
-		    (match_operand:QI 2 "general_operand" "qn,qm")]
-		   UNSPEC_ADD_CARRY))
+	(unspec:CC
+	  [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:QI 2 "general_operand" "qn,qm")]
+	  UNSPEC_ADD_CARRY))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
 	(plus:QI (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (PLUS, QImode, operands)"
@@ -6267,22 +6212,28 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
-(define_expand "addsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
-		 (match_operand:SI 2 "general_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;")
+(define_insn "*add<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SWI 2 "<general_operand>" "<r><i>m"))
+	  (match_dup 1)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*lea_1"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(match_operand:SI 1 "no_seg_address_operand" "p"))]
-  "!TARGET_64BIT"
-  "lea{l}\t{%a1, %0|%0, %a1}"
+  [(set (match_operand:DWIH 0 "register_operand" "=r")
+	(match_operand:DWIH 1 "no_seg_address_operand" "p"))]
+  ""
+  "lea{<imodesuffix>}\t{%a1, %0|%0, %a1}"
   [(set_attr "type" "lea")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*lea_1_rex64"
+(define_insn "*lea_2"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))]
   "TARGET_64BIT"
@@ -6290,227 +6241,60 @@
   [(set_attr "type" "lea")
    (set_attr "mode" "SI")])
 
-(define_insn "*lea_1_zext"
+(define_insn "*lea_2_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	 (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))]
+	  (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))]
   "TARGET_64BIT"
   "lea{l}\t{%a1, %k0|%k0, %a1}"
   [(set_attr "type" "lea")
    (set_attr "mode" "SI")])
 
-(define_insn "*lea_2_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(match_operand:DI 1 "no_seg_address_operand" "p"))]
-  "TARGET_64BIT"
-  "lea{q}\t{%a1, %0|%0, %a1}"
-  [(set_attr "type" "lea")
-   (set_attr "mode" "DI")])
-
-;; The lea patterns for non-Pmodes needs to be matched by several
-;; insns converted to real lea by splitters.
-
-(define_insn_and_split "*lea_general_1"
-  [(set (match_operand 0 "register_operand" "=r")
-	(plus (plus (match_operand 1 "index_register_operand" "l")
-		    (match_operand 2 "register_operand" "r"))
-	      (match_operand 3 "immediate_operand" "i")))]
-  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
-    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
-   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && GET_MODE (operands[0]) == GET_MODE (operands[1])
-   && GET_MODE (operands[0]) == GET_MODE (operands[2])
-   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
-       || GET_MODE (operands[3]) == VOIDmode)"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  rtx pat;
-  operands[0] = gen_lowpart (SImode, operands[0]);
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[2] = gen_lowpart (Pmode, operands[2]);
-  operands[3] = gen_lowpart (Pmode, operands[3]);
-  pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]),
-  		      operands[3]);
-  if (Pmode != SImode)
-    pat = gen_rtx_SUBREG (SImode, pat, 0);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
-  DONE;
-}
-  [(set_attr "type" "lea")
-   (set_attr "mode" "SI")])
-
-(define_insn_and_split "*lea_general_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-	  (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l")
-			    (match_operand:SI 2 "register_operand" "r"))
-		   (match_operand:SI 3 "immediate_operand" "i"))))]
-  "TARGET_64BIT"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1)
-						     (match_dup 2))
-					    (match_dup 3)) 0)))]
-{
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[2] = gen_lowpart (Pmode, operands[2]);
-  operands[3] = gen_lowpart (Pmode, operands[3]);
-}
-  [(set_attr "type" "lea")
-   (set_attr "mode" "SI")])
-
-(define_insn_and_split "*lea_general_2"
-  [(set (match_operand 0 "register_operand" "=r")
-	(plus (mult (match_operand 1 "index_register_operand" "l")
-		    (match_operand 2 "const248_operand" "i"))
-	      (match_operand 3 "nonmemory_operand" "ri")))]
-  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
-    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
-   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && GET_MODE (operands[0]) == GET_MODE (operands[1])
-   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
-       || GET_MODE (operands[3]) == VOIDmode)"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  rtx pat;
-  operands[0] = gen_lowpart (SImode, operands[0]);
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[3] = gen_lowpart (Pmode, operands[3]);
-  pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]),
-  		      operands[3]);
-  if (Pmode != SImode)
-    pat = gen_rtx_SUBREG (SImode, pat, 0);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
-  DONE;
-}
-  [(set_attr "type" "lea")
-   (set_attr "mode" "SI")])
-
-(define_insn_and_split "*lea_general_2_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-	  (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l")
-			    (match_operand:SI 2 "const248_operand" "n"))
-		   (match_operand:SI 3 "nonmemory_operand" "ri"))))]
-  "TARGET_64BIT"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1)
-						     (match_dup 2))
-					    (match_dup 3)) 0)))]
-{
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[3] = gen_lowpart (Pmode, operands[3]);
-}
-  [(set_attr "type" "lea")
-   (set_attr "mode" "SI")])
-
-(define_insn_and_split "*lea_general_3"
-  [(set (match_operand 0 "register_operand" "=r")
-	(plus (plus (mult (match_operand 1 "index_register_operand" "l")
-			  (match_operand 2 "const248_operand" "i"))
-		    (match_operand 3 "register_operand" "r"))
-	      (match_operand 4 "immediate_operand" "i")))]
-  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
-    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
-   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && GET_MODE (operands[0]) == GET_MODE (operands[1])
-   && GET_MODE (operands[0]) == GET_MODE (operands[3])"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  rtx pat;
-  operands[0] = gen_lowpart (SImode, operands[0]);
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[3] = gen_lowpart (Pmode, operands[3]);
-  operands[4] = gen_lowpart (Pmode, operands[4]);
-  pat = gen_rtx_PLUS (Pmode,
-  		      gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1],
-		      					 operands[2]),
-				    operands[3]),
-  		      operands[4]);
-  if (Pmode != SImode)
-    pat = gen_rtx_SUBREG (SImode, pat, 0);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
-  DONE;
-}
-  [(set_attr "type" "lea")
-   (set_attr "mode" "SI")])
-
-(define_insn_and_split "*lea_general_3_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-	  (plus:SI (plus:SI (mult:SI
-			      (match_operand:SI 1 "index_register_operand" "l")
-			      (match_operand:SI 2 "const248_operand" "n"))
-			    (match_operand:SI 3 "register_operand" "r"))
-		   (match_operand:SI 4 "immediate_operand" "i"))))]
-  "TARGET_64BIT"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1)
-							      (match_dup 2))
-						     (match_dup 3))
-					    (match_dup 4)) 0)))]
-{
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[3] = gen_lowpart (Pmode, operands[3]);
-  operands[4] = gen_lowpart (Pmode, operands[4]);
-}
-  [(set_attr "type" "lea")
-   (set_attr "mode" "SI")])
-
-(define_insn "*adddi_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
-	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r")
-		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le")))
+(define_insn "*add<mode>_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
+	  (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>,0,l<i>")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
       operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
-      return "lea{q}\t{%a2, %0|%0, %a2}";
+      return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}";
 
     case TYPE_INCDEC:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{q}\t%0";
+        return "inc{<imodesuffix>}\t%0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{q}\t%0";
+          return "dec{<imodesuffix>}\t%0";
 	}
 
     default:
       /* Use add as much as possible to replace lea for AGU optimization. */
       if (which_alternative == 2 && TARGET_OPT_AGU)
-        return "add{q}\t{%1, %0|%0, %1}";
+        return "add{<imodesuffix>}\t{%1, %0|%0, %1}";
         
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
 
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
 	  /* Avoid overflows.  */
-	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+	  && (<MODE>mode != DImode
+	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
         {
           operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{q}\t{%2, %0|%0, %2}";
+          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
         }
-      return "add{q}\t{%2, %0|%0, %2}";
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
@@ -6521,9 +6305,9 @@
               (const_string "lea")
 	    ; Current assemblers are broken and do not allow @GOTOFF in
 	    ; ought but a memory context.
-	    (match_operand:DI 2 "pic_symbolic_operand" "")
+	    (match_operand:SWI48 2 "pic_symbolic_operand" "")
 	      (const_string "lea")
-	    (match_operand:DI 2 "incdec_operand" "")
+	    (match_operand:SWI48 2 "incdec_operand" "")
 	      (const_string "incdec")
 	   ]
 	   (const_string "alu")))
@@ -6532,122 +6316,104 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "<MODE>")])
 
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(plus:DI (match_operand:DI 1 "register_operand" "")
-		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed 
-   && ix86_lea_for_add_ok (PLUS, insn, operands)"
-  [(set (match_dup 0)
-	(plus:DI (match_dup 1)
-		 (match_dup 2)))]
-  "")
+;; It may seem that nonimmediate operand is proper one for operand 1.
+;; The addsi_1 pattern allows nonimmediate operand at that place and
+;; we take care in ix86_binary_operator_ok to not allow two memory
+;; operands so proper swapping will be done in reload.  This allow
+;; patterns constructed from addsi_1 to match.
 
-(define_insn "*adddi_2_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
-		   (match_operand:DI 2 "x86_64_general_operand" "rme,re"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
-	(plus:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, DImode, operands)
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+(define_insn "*addsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+		   (match_operand:SI 2 "general_operand" "g,li"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_LEA:
+      operands[2] = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
+      return "lea{l}\t{%a2, %k0|%k0, %a2}";
+
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{q}\t%0";
+        return "inc{l}\t%k0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{q}\t%0";
+          return "dec{l}\t%k0";
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* ???? We ought to handle there the 32bit case too
-	 - do we need new constraint?  */
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
         {
           operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{q}\t{%2, %0|%0, %2}";
+          return "sub{l}\t{%2, %k0|%k0, %2}";
         }
-      return "add{q}\t{%2, %0|%0, %2}";
+      return "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:DI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+	    ; Current assemblers are broken and do not allow @GOTOFF in
+	    ; ought but a memory context.
+	    (match_operand:SI 2 "pic_symbolic_operand" "")
+	      (const_string "lea")
+	    (match_operand:SI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
    (set (attr "length_immediate")
       (if_then_else
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "SI")])
 
-(define_insn "*adddi_3_rex64"
-  [(set (reg FLAGS_REG)
-	(compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme"))
-		 (match_operand:DI 1 "x86_64_general_operand" "%0")))
-   (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT
-   && ix86_match_ccmode (insn, CCZmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+(define_insn "*addhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:HI 2 "general_operand" "rn,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{q}\t%0";
+	return "inc{w}\t%0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{q}\t%0";
+	  return "dec{w}\t%0";
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* ???? We ought to handle there the 32bit case too
-	 - do we need new constraint?  */
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{q}\t{%2, %0|%0, %2}";
-        }
-      return "add{q}\t{%2, %0|%0, %2}";
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:DI 2 "incdec_operand" "")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -6655,105 +6421,102 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "HI")])
 
-; For comparisons against 1, -1 and 128, we may generate better code
-; by converting cmp to add, inc or dec as done by peephole2.  This pattern
-; is matched then.  We can't accept general immediate, because for
-; case of overflows,  the result is messed up.
-; This pattern also don't hold of 0x8000000000000000, since the value overflows
-; when negated.
-; Also carry flag is reversed compared to cmp, so this conversion is valid
-; only for comparisons not depending on it.
-(define_insn "*adddi_4_rex64"
-  [(set (reg FLAGS_REG)
-	(compare (match_operand:DI 1 "nonimmediate_operand" "0")
-		 (match_operand:DI 2 "x86_64_immediate_operand" "e")))
-   (clobber (match_scratch:DI 0 "=rm"))]
-  "TARGET_64BIT
-   &&  ix86_match_ccmode (insn, CCGCmode)"
+;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah
+;; type optimizations enabled by define-splits.  This is not important
+;; for PII, and in fact harmful because of partial register stalls.
+
+(define_insn "*addhi_1_lea"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
+		 (match_operand:HI 2 "general_operand" "rn,rm,ln")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_LEA:
+      return "#";
     case TYPE_INCDEC:
-      if (operands[2] == constm1_rtx)
-        return "inc{q}\t%0";
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
       else
-        {
-	  gcc_assert (operands[2] == const1_rtx);
-          return "dec{q}\t%0";
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if ((INTVAL (operands[2]) == -128
-	   || (INTVAL (operands[2]) > 0
-	       && INTVAL (operands[2]) != 128))
-	  /* Avoid overflows.  */
-	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
-	return "sub{q}\t{%2, %0|%0, %2}";
-      operands[2] = GEN_INT (-INTVAL (operands[2]));
-      return "add{q}\t{%2, %0|%0, %2}";
+      if (CONST_INT_P (operands[2])
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:DI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
+     (if_then_else (eq_attr "alternative" "2")
+	(const_string "lea")
+	(if_then_else (match_operand:HI 2 "incdec_operand" "")
+	   (const_string "incdec")
+	   (const_string "alu"))))
    (set (attr "length_immediate")
       (if_then_else
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "HI,HI,SI")])
 
-(define_insn "*adddi_5_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
-		   (match_operand:DI 2 "x86_64_general_operand" "rme"))
-	  (const_int 0)))
-   (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT
-   && ix86_match_ccmode (insn, CCGOCmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+(define_insn "*addqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		 (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
 {
+  int widen = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{q}\t%0";
+	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
       else
-        {
-          gcc_assert (operands[2] == constm1_rtx);
-          return "dec{q}\t%0";
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
-	  /* Avoid overflows.  */
-	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{q}\t{%2, %0|%0, %2}";
-        }
-      return "add{q}\t{%2, %0|%0, %2}";
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  if (widen)
+	    return "sub{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      if (widen)
+        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      else
+        return "add{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:DI 2 "incdec_operand" "")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -6761,184 +6524,115 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "DI")])
-
+   (set_attr "mode" "QI,QI,SI")])
 
-(define_insn "*addsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
-	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r")
-		 (match_operand:SI 2 "general_operand" "g,ri,0,li")))
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*addqi_1_lea"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r")
+		 (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
 {
+  int widen = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
-      return "lea{l}\t{%a2, %0|%0, %a2}";
-
+      return "#";
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%0";
+	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
       else
 	{
-  	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%0";
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
 	}
 
     default:
-      /* Use add as much as possible to replace lea for AGU optimization. */
-      if (which_alternative == 2 && TARGET_OPT_AGU)
-        return "add{l}\t{%1, %0|%0, %1}";
-
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %0|%0, %2}";
-        }
-      return "add{l}\t{%2, %0|%0, %2}";
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  if (widen)
+	    return "sub{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      if (widen)
+        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      else
+        return "add{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (cond [(and (eq_attr "alternative" "2") 
-                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
-               (const_string "lea")
-            (eq_attr "alternative" "3")
-	      (const_string "lea")
-	    ; Current assemblers are broken and do not allow @GOTOFF in
-	    ; ought but a memory context.
-	    (match_operand:SI 2 "pic_symbolic_operand" "")
-	      (const_string "lea")
-	    (match_operand:SI 2 "incdec_operand" "")
-	      (const_string "incdec")
-	   ]
-	   (const_string "alu")))
+     (if_then_else (eq_attr "alternative" "3")
+	(const_string "lea")
+	(if_then_else (match_operand:QI 2 "incdec_operand" "")
+	   (const_string "incdec")
+	   (const_string "alu"))))
    (set (attr "length_immediate")
       (if_then_else
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "SI")])
-
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand 0 "register_operand" "")
-	(plus (match_operand 1 "register_operand" "")
-              (match_operand 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" 
-  [(const_int 0)]
-{
-  rtx pat;
-  /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
-     may confuse gen_lowpart.  */
-  if (GET_MODE (operands[0]) != Pmode)
-    {
-      operands[1] = gen_lowpart (Pmode, operands[1]);
-      operands[2] = gen_lowpart (Pmode, operands[2]);
-    }
-  operands[0] = gen_lowpart (SImode, operands[0]);
-  pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
-  if (Pmode != SImode)
-    pat = gen_rtx_SUBREG (SImode, pat, 0);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
-  DONE;
-})
+   (set_attr "mode" "QI,QI,SI,SI")])
 
-;; It may seem that nonimmediate operand is proper one for operand 1.
-;; The addsi_1 pattern allows nonimmediate operand at that place and
-;; we take care in ix86_binary_operator_ok to not allow two memory
-;; operands so proper swapping will be done in reload.  This allow
-;; patterns constructed from addsi_1 to match.
-(define_insn "addsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(zero_extend:DI
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-		   (match_operand:SI 2 "general_operand" "g,li"))))
+(define_insn "*addqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(plus:QI (match_dup 0)
+		 (match_operand:QI 1 "general_operand" "qn,qnm")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     {
-    case TYPE_LEA:
-      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
-      return "lea{l}\t{%a2, %k0|%k0, %a2}";
-
     case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-        return "inc{l}\t%k0";
+      if (operands[1] == const1_rtx)
+	return "inc{b}\t%0";
       else
-        {
-	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%k0";
+	{
+	  gcc_assert (operands[1] == constm1_rtx);
+	  return "dec{b}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %k0|%k0, %2}";
-        }
-      return "add{l}\t{%2, %k0|%k0, %2}";
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
+      if (CONST_INT_P (operands[1])
+	  && INTVAL (operands[1]) < 0)
+	{
+	  operands[1] = GEN_INT (-INTVAL (operands[1]));
+	  return "sub{b}\t{%1, %0|%0, %1}";
+	}
+      return "add{b}\t{%1, %0|%0, %1}";
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "1")
-	      (const_string "lea")
-	    ; Current assemblers are broken and do not allow @GOTOFF in
-	    ; ought but a memory context.
-	    (match_operand:SI 2 "pic_symbolic_operand" "")
-	      (const_string "lea")
-	    (match_operand:SI 2 "incdec_operand" "")
-	      (const_string "incdec")
-	   ]
-	   (const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "SI")])
-
-;; Convert lea to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-	(zero_extend:DI
-	  (plus:SI (match_operand:SI 1 "register_operand" "")
-		   (match_operand:SI 2 "nonmemory_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
-  [(set (match_dup 0)
-	(zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
-{
-  operands[1] = gen_lowpart (Pmode, operands[1]);
-  operands[2] = gen_lowpart (Pmode, operands[2]);
-})
+     (if_then_else (match_operand:QI 1 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu1")))
+   (set (attr "memory")
+     (if_then_else (match_operand 1 "memory_operand" "")
+        (const_string "load")
+        (const_string "none")))
+   (set_attr "mode" "QI")])
 
-(define_insn "*addsi_2"
+(define_insn "*add<mode>_2"
   [(set (reg FLAGS_REG)
 	(compare
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
-		   (match_operand:SI 2 "general_operand" "g,ri"))
+	  (plus:SWI48
+	    (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
+	    (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>"))
 	  (const_int 0)))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
-	(plus:SI (match_dup 1) (match_dup 2)))]
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
+	(plus:SWI48 (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, SImode, operands)
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
    /* Current assemblers are broken and do not allow @GOTOFF in
       ought but a memory context.  */
    && ! pic_symbolic_operand (operands[2], VOIDmode)"
@@ -6948,30 +6642,35 @@
     case TYPE_INCDEC:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%0";
+        return "inc{<imodesuffix>}\t%0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%0";
+          return "dec{<imodesuffix>}\t%0";
 	}
 
     default:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* ???? In DImode, we ought to handle there the 32bit case too
+	 - do we need new constraint?  */
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && (<MODE>mode != DImode
+	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
         {
           operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %0|%0, %2}";
+          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
         }
-      return "add{l}\t{%2, %0|%0, %2}";
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+     (if_then_else (match_operand:SWI48 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -6979,7 +6678,7 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*addsi_2_zext"
@@ -7008,7 +6707,7 @@
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
           && (INTVAL (operands[2]) == 128
@@ -7032,46 +6731,44 @@
 	(const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*addsi_3"
+(define_insn "*addhi_2"
   [(set (reg FLAGS_REG)
-	(compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
-		 (match_operand:SI 1 "nonimmediate_operand" "%0")))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCZmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+	(compare
+	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		   (match_operand:HI 2 "general_operand" "rmn,rn"))
+	  (const_int 0)))
+   (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+	(plus:HI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%0";
+	return "inc{w}\t%0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%0";
+	  return "dec{w}\t%0";
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %0|%0, %2}";
-        }
-      return "add{l}\t{%2, %0|%0, %2}";
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{w}\t{%2, %0|%0, %2}";
+	}
+      return "add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -7079,98 +6776,95 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "HI")])
 
-;; See comment for addsi_1_zext why we do use nonimmediate_operand
-(define_insn "*addsi_3_zext"
+(define_insn "*addqi_2"
   [(set (reg FLAGS_REG)
-	(compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
-		 (match_operand:SI 1 "nonimmediate_operand" "%0")))
-   (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
-   && ix86_binary_operator_ok (PLUS, SImode, operands)
-   /* Current assemblers are broken and do not allow @GOTOFF in
-      ought but a memory context.  */
-   && ! pic_symbolic_operand (operands[2], VOIDmode)"
+	(compare
+	  (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+		   (match_operand:QI 2 "general_operand" "qmn,qn"))
+	  (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+	(plus:QI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%k0";
+	return "inc{b}\t%0";
       else
         {
-	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%k0";
+	  gcc_assert (operands[2] == constm1_rtx
+		      || (CONST_INT_P (operands[2])
+		          && INTVAL (operands[2]) == 255));
+	  return "dec{b}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
       if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %k0|%k0, %2}";
-        }
-      return "add{l}\t{%2, %k0|%k0, %2}";
+          && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      return "add{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "QI")])
 
-; For comparisons against 1, -1 and 128, we may generate better code
-; by converting cmp to add, inc or dec as done by peephole2.  This pattern
-; is matched then.  We can't accept general immediate, because for
-; case of overflows,  the result is messed up.
-; This pattern also don't hold of 0x80000000, since the value overflows
-; when negated.
-; Also carry flag is reversed compared to cmp, so this conversion is valid
-; only for comparisons not depending on it.
-(define_insn "*addsi_4"
+(define_insn "*add<mode>_3"
   [(set (reg FLAGS_REG)
-	(compare (match_operand:SI 1 "nonimmediate_operand" "0")
-		 (match_operand:SI 2 "const_int_operand" "n")))
-   (clobber (match_scratch:SI 0 "=rm"))]
-  "ix86_match_ccmode (insn, CCGCmode)
-   && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000"
+	(compare
+	  (neg:SWI48 (match_operand:SWI48 2 "<general_operand>" "<g>"))
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:SWI48 0 "=r"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (operands[2] == constm1_rtx)
-        return "inc{l}\t%0";
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
       else
         {
-	  gcc_assert (operands[2] == const1_rtx);
-          return "dec{l}\t%0";
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
 	}
 
     default:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* ???? In DImode, we ought to handle there the 32bit case too
+	 - do we need new constraint?  */
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if ((INTVAL (operands[2]) == -128
-	   || (INTVAL (operands[2]) > 0
-	       && INTVAL (operands[2]) != 128)))
-	return "sub{l}\t{%2, %0|%0, %2}";
-      operands[2] = GEN_INT (-INTVAL (operands[2]));
-      return "add{l}\t{%2, %0|%0, %2}";
+      if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && (<MODE>mode != DImode
+	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
+          && (INTVAL (operands[2]) == 128
+	      || (INTVAL (operands[2]) < 0
+		  && INTVAL (operands[2]) != -128)))
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        }
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+     (if_then_else (match_operand:SWI48 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -7178,17 +6872,18 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*addsi_5"
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_3_zext"
   [(set (reg FLAGS_REG)
 	(compare
-	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-		   (match_operand:SI 2 "general_operand" "g"))
-	  (const_int 0)))
-   (clobber (match_scratch:SI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+	  (neg:SI (match_operand:SI 2 "general_operand" "g"))
+	  (match_operand:SI 1 "nonimmediate_operand" "%0")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)
    /* Current assemblers are broken and do not allow @GOTOFF in
       ought but a memory context.  */
    && ! pic_symbolic_operand (operands[2], VOIDmode)"
@@ -7196,18 +6891,16 @@
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-        return "inc{l}\t%0";
+        return "inc{l}\t%k0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-          return "dec{l}\t%0";
+          return "dec{l}\t%k0";
 	}
 
     default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
           && (INTVAL (operands[2]) == 128
@@ -7215,9 +6908,9 @@
 		  && INTVAL (operands[2]) != -128)))
         {
           operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "sub{l}\t{%2, %0|%0, %2}";
+          return "sub{l}\t{%2, %k0|%k0, %2}";
         }
-      return "add{l}\t{%2, %0|%0, %2}";
+      return "add{l}\t{%2, %k0|%k0, %2}";
     }
 }
   [(set (attr "type")
@@ -7231,40 +6924,28 @@
 	(const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_expand "addhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "")
-		 (match_operand:HI 2 "general_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;")
-
-;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah
-;; type optimizations enabled by define-splits.  This is not important
-;; for PII, and in fact harmful because of partial register stalls.
-
-(define_insn "*addhi_1_lea"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
-	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
-		 (match_operand:HI 2 "general_operand" "rn,rm,ln")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL
-   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+(define_insn "*addhi_3"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (neg:HI (match_operand:HI 2 "general_operand" "rmn"))
+	  (match_operand:HI 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
   switch (get_attr_type (insn))
     {
-    case TYPE_LEA:
-      return "#";
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
 	return "inc{w}\t%0";
       else
-	{
+        {
 	  gcc_assert (operands[2] == constm1_rtx);
 	  return "dec{w}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
           && (INTVAL (operands[2]) == 128
@@ -7278,100 +6959,100 @@
     }
 }
   [(set (attr "type")
-     (if_then_else (eq_attr "alternative" "2")
-	(const_string "lea")
-	(if_then_else (match_operand:HI 2 "incdec_operand" "")
-	   (const_string "incdec")
-	   (const_string "alu"))))
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
    (set (attr "length_immediate")
       (if_then_else
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "HI,HI,SI")])
+   (set_attr "mode" "HI")])
 
-(define_insn "*addhi_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
-	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-		 (match_operand:HI 2 "general_operand" "rn,rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_PARTIAL_REG_STALL
-   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+(define_insn "*addqi_3"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (neg:QI (match_operand:QI 2 "general_operand" "qmn"))
+	  (match_operand:QI 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-	return "inc{w}\t%0";
+	return "inc{b}\t%0";
       else
         {
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return "dec{w}\t%0";
+	  gcc_assert (operands[2] == constm1_rtx
+		      || (CONST_INT_P (operands[2])
+			  && INTVAL (operands[2]) == 255));
+	  return "dec{b}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
       if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
+          && INTVAL (operands[2]) < 0)
 	{
 	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
+	  return "sub{b}\t{%2, %0|%0, %2}";
 	}
-      return "add{w}\t{%2, %0|%0, %2}";
+      return "add{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "QI")])
 
-(define_insn "*addhi_2"
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; This pattern also don't hold of 0x8000000000000000, since the value
+; overflows when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+
+(define_insn "*adddi_4"
   [(set (reg FLAGS_REG)
 	(compare
-	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-		   (match_operand:HI 2 "general_operand" "rmn,rn"))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
-	(plus:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+	  (match_operand:DI 1 "nonimmediate_operand" "0")
+	  (match_operand:DI 2 "x86_64_immediate_operand" "e")))
+   (clobber (match_scratch:DI 0 "=rm"))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCGCmode)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{w}\t%0";
+      if (operands[2] == constm1_rtx)
+        return "inc{q}\t%0";
       else
         {
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return "dec{w}\t%0";
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{q}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
-	}
-      return "add{w}\t{%2, %0|%0, %2}";
+      if ((INTVAL (operands[2]) == -128
+	   || (INTVAL (operands[2]) > 0
+	       && INTVAL (operands[2]) != 128))
+	  /* Avoid overflows.  */
+	  && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
+	return "sub{q}\t{%2, %0|%0, %2}";
+      operands[2] = GEN_INT (-INTVAL (operands[2]));
+      return "add{q}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+     (if_then_else (match_operand:DI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -7379,43 +7060,51 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "DI")])
 
-(define_insn "*addhi_3"
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; This pattern also don't hold of 0x80000000, since the value overflows
+; when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+
+(define_insn "*addsi_4"
   [(set (reg FLAGS_REG)
-	(compare (neg:HI (match_operand:HI 2 "general_operand" "rmn"))
-		 (match_operand:HI 1 "nonimmediate_operand" "%0")))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCZmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+	(compare
+	  (match_operand:SI 1 "nonimmediate_operand" "0")
+	  (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_scratch:SI 0 "=rm"))]
+  "ix86_match_ccmode (insn, CCGCmode)
+   && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{w}\t%0";
+      if (operands[2] == constm1_rtx)
+        return "inc{l}\t%0";
       else
         {
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return "dec{w}\t%0";
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{l}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
-	}
-      return "add{w}\t{%2, %0|%0, %2}";
+      if ((INTVAL (operands[2]) == -128
+	   || (INTVAL (operands[2]) > 0
+	       && INTVAL (operands[2]) != 128)))
+	return "sub{l}\t{%2, %0|%0, %2}";
+      operands[2] = GEN_INT (-INTVAL (operands[2]));
+      return "add{l}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -7423,13 +7112,15 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "SI")])
 
 ; See comments above addsi_4 for details.
+
 (define_insn "*addhi_4"
   [(set (reg FLAGS_REG)
-	(compare (match_operand:HI 1 "nonimmediate_operand" "0")
-		 (match_operand:HI 2 "const_int_operand" "n")))
+	(compare
+	  (match_operand:HI 1 "nonimmediate_operand" "0")
+	  (match_operand:HI 2 "const_int_operand" "n")))
    (clobber (match_scratch:HI 0 "=rm"))]
   "ix86_match_ccmode (insn, CCGCmode)
    && (INTVAL (operands[2]) & 0xffff) != 0x8000"
@@ -7447,7 +7138,7 @@
 
     default:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if ((INTVAL (operands[2]) == -128
 	   || (INTVAL (operands[2]) > 0
@@ -7468,138 +7159,124 @@
 	(const_string "*")))
    (set_attr "mode" "HI")])
 
+; See comments above addsi_4 for details.
 
-(define_insn "*addhi_5"
+(define_insn "*addqi_4"
   [(set (reg FLAGS_REG)
 	(compare
-	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
-		   (match_operand:HI 2 "general_operand" "rmn"))
-	  (const_int 0)))
-   (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+	  (match_operand:QI 1 "nonimmediate_operand" "0")
+	  (match_operand:QI 2 "const_int_operand" "n")))
+   (clobber (match_scratch:QI 0 "=qm"))]
+  "ix86_match_ccmode (insn, CCGCmode)
+   && (INTVAL (operands[2]) & 0xff) != 0x80"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{w}\t%0";
+      if (operands[2] == constm1_rtx
+	  || (CONST_INT_P (operands[2])
+	      && INTVAL (operands[2]) == 255))
+        return "inc{b}\t%0";
       else
 	{
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return "dec{w}\t%0";
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{b}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
-	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
-      if (CONST_INT_P (operands[2])
-          && (INTVAL (operands[2]) == 128
-	      || (INTVAL (operands[2]) < 0
-		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{w}\t{%2, %0|%0, %2}";
-	}
-      return "add{w}\t{%2, %0|%0, %2}";
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (INTVAL (operands[2]) < 0)
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "add{b}\t{%2, %0|%0, %2}";
+        }
+      return "sub{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
      (if_then_else (match_operand:HI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
-   (set (attr "length_immediate")
-      (if_then_else
-	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
-	(const_string "1")
-	(const_string "*")))
-   (set_attr "mode" "HI")])
-
-(define_expand "addqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "")
-		 (match_operand:QI 2 "general_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;")
+   (set_attr "mode" "QI")])
 
-;; %%% Potential partial reg stall on alternative 2.  What to do?
-(define_insn "*addqi_1_lea"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r")
-	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r")
-		 (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL
-   && ix86_binary_operator_ok (PLUS, QImode, operands)"
+(define_insn "*add<mode>_5"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI48
+	    (match_operand:SWI48 1 "nonimmediate_operand" "%0")
+	    (match_operand:SWI48 2 "<general_operand>" "<g>"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI48 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   /* Current assemblers are broken and do not allow @GOTOFF in
+      ought but a memory context.  */
+   && ! pic_symbolic_operand (operands[2], VOIDmode)"
 {
-  int widen = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
-    case TYPE_LEA:
-      return "#";
     case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
-	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+        return "inc{<imodesuffix>}\t%0";
       else
-	{
-	  gcc_assert (operands[2] == constm1_rtx);
-	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+        {
+          gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
+	  /* Avoid overflows.  */
+	  && (<MODE>mode != DImode
+	      || ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
           && (INTVAL (operands[2]) == 128
 	      || (INTVAL (operands[2]) < 0
 		  && INTVAL (operands[2]) != -128)))
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  if (widen)
-	    return "sub{l}\t{%2, %k0|%k0, %2}";
-	  else
-	    return "sub{b}\t{%2, %0|%0, %2}";
-	}
-      if (widen)
-        return "add{l}\t{%k2, %k0|%k0, %k2}";
-      else
-        return "add{b}\t{%2, %0|%0, %2}";
+        {
+          operands[2] = GEN_INT (-INTVAL (operands[2]));
+          return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        }
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (eq_attr "alternative" "3")
-	(const_string "lea")
-	(if_then_else (match_operand:QI 2 "incdec_operand" "")
-	   (const_string "incdec")
-	   (const_string "alu"))))
+     (if_then_else (match_operand:SWI48 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
    (set (attr "length_immediate")
       (if_then_else
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "QI,QI,SI,SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*addqi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
-	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-		 (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_PARTIAL_REG_STALL
-   && ix86_binary_operator_ok (PLUS, QImode, operands)"
+(define_insn "*addhi_5"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+		   (match_operand:HI 2 "general_operand" "rmn"))
+	  (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
-  int widen = (which_alternative == 2);
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+	return "inc{w}\t%0";
       else
 	{
 	  gcc_assert (operands[2] == constm1_rtx);
-	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	  return "dec{w}\t%0";
 	}
 
     default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+      /* Make things pretty and `subw $4,%ax' rather than `addw $-4,%ax'.
 	 Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (CONST_INT_P (operands[2])
           && (INTVAL (operands[2]) == 128
@@ -7607,19 +7284,13 @@
 		  && INTVAL (operands[2]) != -128)))
 	{
 	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  if (widen)
-	    return "sub{l}\t{%2, %k0|%k0, %2}";
-	  else
-	    return "sub{b}\t{%2, %0|%0, %2}";
+	  return "sub{w}\t{%2, %0|%0, %2}";
 	}
-      if (widen)
-        return "add{l}\t{%k2, %k0|%k0, %k2}";
-      else
-        return "add{b}\t{%2, %0|%0, %2}";
+      return "add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")
-     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
 	(const_string "incdec")
 	(const_string "alu")))
    (set (attr "length_immediate")
@@ -7627,165 +7298,7 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "QI,QI,SI")])
-
-(define_insn "*addqi_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
-	(plus:QI (match_dup 0)
-		 (match_operand:QI 1 "general_operand" "qn,qnm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[1] == const1_rtx)
-	return "inc{b}\t%0";
-      else
-	{
-	  gcc_assert (operands[1] == constm1_rtx);
-	  return "dec{b}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.  */
-      if (CONST_INT_P (operands[1])
-	  && INTVAL (operands[1]) < 0)
-	{
-	  operands[1] = GEN_INT (-INTVAL (operands[1]));
-	  return "sub{b}\t{%1, %0|%0, %1}";
-	}
-      return "add{b}\t{%1, %0|%0, %1}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:QI 1 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu1")))
-   (set (attr "memory")
-     (if_then_else (match_operand 1 "memory_operand" "")
-        (const_string "load")
-        (const_string "none")))
-   (set_attr "mode" "QI")])
-
-(define_insn "*addqi_2"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
-		   (match_operand:QI 2 "general_operand" "qmn,qn"))
-	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
-	(plus:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (PLUS, QImode, operands)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{b}\t%0";
-      else
-        {
-	  gcc_assert (operands[2] == constm1_rtx
-		      || (CONST_INT_P (operands[2])
-		          && INTVAL (operands[2]) == 255));
-	  return "dec{b}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
-      if (CONST_INT_P (operands[2])
-          && INTVAL (operands[2]) < 0)
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{b}\t{%2, %0|%0, %2}";
-	}
-      return "add{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:QI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set_attr "mode" "QI")])
-
-(define_insn "*addqi_3"
-  [(set (reg FLAGS_REG)
-	(compare (neg:QI (match_operand:QI 2 "general_operand" "qmn"))
-		 (match_operand:QI 1 "nonimmediate_operand" "%0")))
-   (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCZmode)
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == const1_rtx)
-	return "inc{b}\t%0";
-      else
-        {
-	  gcc_assert (operands[2] == constm1_rtx
-		      || (CONST_INT_P (operands[2])
-			  && INTVAL (operands[2]) == 255));
-	  return "dec{b}\t%0";
-	}
-
-    default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
-      if (CONST_INT_P (operands[2])
-          && INTVAL (operands[2]) < 0)
-	{
-	  operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  return "sub{b}\t{%2, %0|%0, %2}";
-	}
-      return "add{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:QI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set_attr "mode" "QI")])
-
-; See comments above addsi_4 for details.
-(define_insn "*addqi_4"
-  [(set (reg FLAGS_REG)
-	(compare (match_operand:QI 1 "nonimmediate_operand" "0")
-		 (match_operand:QI 2 "const_int_operand" "n")))
-   (clobber (match_scratch:QI 0 "=qm"))]
-  "ix86_match_ccmode (insn, CCGCmode)
-   && (INTVAL (operands[2]) & 0xff) != 0x80"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_INCDEC:
-      if (operands[2] == constm1_rtx
-	  || (CONST_INT_P (operands[2])
-	      && INTVAL (operands[2]) == 255))
-        return "inc{b}\t%0";
-      else
-	{
-	  gcc_assert (operands[2] == const1_rtx);
-          return "dec{b}\t%0";
-	}
-
-    default:
-      gcc_assert (rtx_equal_p (operands[0], operands[1]));
-      if (INTVAL (operands[2]) < 0)
-        {
-          operands[2] = GEN_INT (-INTVAL (operands[2]));
-          return "add{b}\t{%2, %0|%0, %2}";
-        }
-      return "sub{b}\t{%2, %0|%0, %2}";
-    }
-}
-  [(set (attr "type")
-     (if_then_else (match_operand:HI 2 "incdec_operand" "")
-	(const_string "incdec")
-	(const_string "alu")))
-   (set_attr "mode" "QI")])
-
+   (set_attr "mode" "HI")])
 
 (define_insn "*addqi_5"
   [(set (reg FLAGS_REG)
@@ -7811,7 +7324,7 @@
 	}
 
     default:
-      /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
+      /* Make things pretty and `subb $4,%al' rather than `addb $-4,%al'.  */
       if (CONST_INT_P (operands[2])
           && INTVAL (operands[2]) < 0)
 	{
@@ -7827,8 +7340,7 @@
 	(const_string "alu")))
    (set_attr "mode" "QI")])
 
-
-(define_insn "addqi_ext_1"
+(define_insn "*addqi_ext_1_rex64"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
 			 (const_int 8)
 			 (const_int 8))
@@ -7837,9 +7349,9 @@
 	    (match_operand 1 "ext_register_operand" "0")
 	    (const_int 8)
 	    (const_int 8))
-	  (match_operand:QI 2 "general_operand" "Qmn")))
+	  (match_operand:QI 2 "nonmemory_operand" "Qn")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT"
+  "TARGET_64BIT"
 {
   switch (get_attr_type (insn))
     {
@@ -7852,7 +7364,7 @@
 		      || (CONST_INT_P (operands[2])
 			  && INTVAL (operands[2]) == 255));
           return "dec{b}\t%h0";
-	}
+        }
 
     default:
       return "add{b}\t{%2, %h0|%h0, %2}";
@@ -7865,7 +7377,7 @@
    (set_attr "modrm" "1")
    (set_attr "mode" "QI")])
 
-(define_insn "*addqi_ext_1_rex64"
+(define_insn "addqi_ext_1"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
 			 (const_int 8)
 			 (const_int 8))
@@ -7874,9 +7386,9 @@
 	    (match_operand 1 "ext_register_operand" "0")
 	    (const_int 8)
 	    (const_int 8))
-	  (match_operand:QI 2 "nonmemory_operand" "Qn")))
+	  (match_operand:QI 2 "general_operand" "Qmn")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
+  "!TARGET_64BIT"
 {
   switch (get_attr_type (insn))
     {
@@ -7889,7 +7401,7 @@
 		      || (CONST_INT_P (operands[2])
 			  && INTVAL (operands[2]) == 255));
           return "dec{b}\t%h0";
-        }
+	}
 
     default:
       return "add{b}\t{%2, %h0|%h0, %2}";
@@ -7921,178 +7433,271 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
-;; The patterns that match these are at the end of this file.
-
-(define_expand "addxf3"
-  [(set (match_operand:XF 0 "register_operand" "")
-	(plus:XF (match_operand:XF 1 "register_operand" "")
-		 (match_operand:XF 2 "register_operand" "")))]
-  "TARGET_80387"
-  "")
-
-(define_expand "add<mode>3"
-  [(set (match_operand:MODEF 0 "register_operand" "")
-	(plus:MODEF (match_operand:MODEF 1 "register_operand" "")
-		    (match_operand:MODEF 2 "nonimmediate_operand" "")))]
-  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
-    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-  "")
-
-;; Subtract instructions
+;; The lea patterns for non-Pmodes needs to be matched by
+;; several insns converted to real lea by splitters.
 
-;; %%% splits for subditi3
+(define_insn_and_split "*lea_general_1"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "register_operand" "r"))
+	      (match_operand 3 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[2])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]),
+  		      operands[3]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
 
-(define_expand "subti3"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-	(minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-		  (match_operand:TI 2 "x86_64_general_operand" "")))]
+(define_insn_and_split "*lea_general_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (plus:SI
+		     (match_operand:SI 1 "index_register_operand" "l")
+		     (match_operand:SI 2 "register_operand" "r"))
+		   (match_operand:SI 3 "immediate_operand" "i"))))]
   "TARGET_64BIT"
-  "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;")
-
-(define_insn "*subti3_1"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
-	(minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0")
-		  (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)"
-  "#")
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1)
+						     (match_dup 2))
+					    (match_dup 3)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
 
-(define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-	(minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-		  (match_operand:TI 2 "x86_64_general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
-  [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
-	      (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))])
-   (parallel [(set (match_dup 3)
-		   (minus:DI (match_dup 4)
-			     (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
-				      (match_dup 5))))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
+(define_insn_and_split "*lea_general_2"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (mult (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "const248_operand" "i"))
+	      (match_operand 3 "nonmemory_operand" "ri")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]),
+  		      operands[3]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
 
-;; %%% splits for subsidi3
+(define_insn_and_split "*lea_general_2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (mult:SI
+		     (match_operand:SI 1 "index_register_operand" "l")
+		     (match_operand:SI 2 "const248_operand" "n"))
+		   (match_operand:SI 3 "nonmemory_operand" "ri"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1)
+						     (match_dup 2))
+					    (match_dup 3)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
 
-(define_expand "subdi3"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
-		  (match_operand:DI 2 "x86_64_general_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;")
+(define_insn_and_split "*lea_general_3"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (mult (match_operand 1 "index_register_operand" "l")
+			  (match_operand 2 "const248_operand" "i"))
+		    (match_operand 3 "register_operand" "r"))
+	      (match_operand 4 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[3])"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  operands[4] = gen_lowpart (Pmode, operands[4]);
+  pat = gen_rtx_PLUS (Pmode,
+  		      gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1],
+		      					 operands[2]),
+				    operands[3]),
+  		      operands[4]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
 
-(define_insn "*subdi3_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
-	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		  (match_operand:DI 2 "general_operand" "roiF,riF")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
-  "#")
+(define_insn_and_split "*lea_general_3_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (plus:SI
+		     (mult:SI
+		       (match_operand:SI 1 "index_register_operand" "l")
+		       (match_operand:SI 2 "const248_operand" "n"))
+		     (match_operand:SI 3 "register_operand" "r"))
+		   (match_operand:SI 4 "immediate_operand" "i"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1)
+							      (match_dup 2))
+						     (match_dup 3))
+					    (match_dup 4)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  operands[4] = gen_lowpart (Pmode, operands[4]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
 
+;; Convert lea to the lea pattern to avoid flags dependency.
 (define_split
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
-		  (match_operand:DI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && reload_completed"
-  [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
-	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
-   (parallel [(set (match_dup 3)
-		   (minus:SI (match_dup 4)
-			     (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
-				      (match_dup 5))))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "split_di (&operands[0], 3, &operands[0], &operands[3]);")
-
-(define_insn "subdi3_carry_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
-	  (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-	    (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
-	       (match_operand:DI 2 "x86_64_general_operand" "re,rm"))))
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
-  "sbb{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "use_carry" "1")
-   (set_attr "pent_pair" "pu")
-   (set_attr "mode" "DI")])
+  "TARGET_64BIT && reload_completed 
+   && ix86_lea_for_add_ok (PLUS, insn, operands)"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 1)
+		 (match_dup 2)))]
+  "")
 
-(define_insn "*subdi_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
-	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		  (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(plus (match_operand 1 "register_operand" "")
+              (match_operand 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
-  "sub{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "DI")])
-
-(define_insn "*subdi_2_rex64"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		    (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
-	  (const_int 0)))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
-	(minus:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, DImode, operands)"
-  "sub{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "DI")])
-
-(define_insn "*subdi_3_rex63"
-  [(set (reg FLAGS_REG)
-	(compare (match_operand:DI 1 "nonimmediate_operand" "0,0")
-		 (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
-   (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
-	(minus:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "DI")])
+  "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" 
+  [(const_int 0)]
+{
+  rtx pat;
+  /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
+     may confuse gen_lowpart.  */
+  if (GET_MODE (operands[0]) != Pmode)
+    {
+      operands[1] = gen_lowpart (Pmode, operands[1]);
+      operands[2] = gen_lowpart (Pmode, operands[2]);
+    }
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
 
-(define_insn "subqi3_carry"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
-	  (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-	    (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
-	       (match_operand:QI 2 "general_operand" "qn,qm"))))
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" ""))))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, QImode, operands)"
-  "sbb{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "use_carry" "1")
-   (set_attr "pent_pair" "pu")
-   (set_attr "mode" "QI")])
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+})
+
+;; Subtract instructions
 
-(define_insn "subhi3_carry"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
-	  (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-	    (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
-	       (match_operand:HI 2 "general_operand" "rn,rm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, HImode, operands)"
-  "sbb{w}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "use_carry" "1")
-   (set_attr "pent_pair" "pu")
-   (set_attr "mode" "HI")])
+(define_expand "sub<mode>3"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand" "")
+	(minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "")
+		     (match_operand:SDWIM 2 "<general_operand>" "")))]
+  ""
+  "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
 
-(define_insn "subsi3_carry"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
-	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-	    (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
-	       (match_operand:SI 2 "general_operand" "ri,rm"))))
+(define_insn_and_split "*sub<dwi>3_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(minus:<DWI>
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
+	  (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sbb{l}\t{%2, %0|%0, %2}"
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0)
+		   (minus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (minus:DWIH
+		     (match_dup 4)
+		     (plus:DWIH
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 5))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_<dwi> (&operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "sub<mode>3_carry"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	  (minus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	    (plus:SWI
+	      (match_operand:SWI 3 "ix86_carry_flag_operator" "")
+	      (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "subsi3_carry_zext"
+(define_insn "*subsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	  (zero_extend:DI
 	    (minus:SI (match_operand:SI 1 "register_operand" "0")
@@ -8105,22 +7710,28 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
-(define_expand "subsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-	(minus:SI (match_operand:SI 1 "nonimmediate_operand" "")
-		  (match_operand:SI 2 "general_operand" "")))]
+(define_insn "*sub<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (minus:SWI
+	    (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+	    (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
+	  (match_dup 0)))]
   ""
-  "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;")
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*subsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
-	(minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		  (match_operand:SI 2 "general_operand" "ri,rm")))
+(define_insn "*sub<mode>_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %0|%0, %2}"
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -8133,19 +7744,31 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
-(define_insn "*subsi_2"
+(define_insn "*subqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(minus:QI (match_dup 0)
+		  (match_operand:QI 1 "general_operand" "qn,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "sub{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*sub<mode>_2"
   [(set (reg FLAGS_REG)
 	(compare
-	  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		    (match_operand:SI 2 "general_operand" "ri,rm"))
+	  (minus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
 	  (const_int 0)))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
-	(minus:SI (match_dup 1) (match_dup 2)))]
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %0|%0, %2}"
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_2_zext"
   [(set (reg FLAGS_REG)
@@ -8163,17 +7786,17 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
-(define_insn "*subsi_3"
+(define_insn "*sub<mode>_3"
   [(set (reg FLAGS_REG)
-	(compare (match_operand:SI 1 "nonimmediate_operand" "0,0")
-		 (match_operand:SI 2 "general_operand" "ri,rm")))
-   (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
-	(minus:SI (match_dup 1) (match_dup 2)))]
+	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+		 (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{l}\t{%2, %0|%0, %2}"
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*subsi_3_zext"
   [(set (reg FLAGS_REG)
@@ -8187,195 +7810,95 @@
    && ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sub{l}\t{%2, %1|%1, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "DI")])
-
-(define_expand "subhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-	(minus:HI (match_operand:HI 1 "nonimmediate_operand" "")
-		  (match_operand:HI 2 "general_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;")
-
-(define_insn "*subhi_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
-	(minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-		  (match_operand:HI 2 "general_operand" "rn,rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, HImode, operands)"
-  "sub{w}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "HI")])
-
-(define_insn "*subhi_2"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-		    (match_operand:HI 2 "general_operand" "rn,rm"))
-	  (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
-	(minus:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, HImode, operands)"
-  "sub{w}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "HI")])
-
-(define_insn "*subhi_3"
-  [(set (reg FLAGS_REG)
-	(compare (match_operand:HI 1 "nonimmediate_operand" "0,0")
-		 (match_operand:HI 2 "general_operand" "rn,rm")))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
-	(minus:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, HImode, operands)"
-  "sub{w}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "HI")])
-
-(define_expand "subqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-	(minus:QI (match_operand:QI 1 "nonimmediate_operand" "")
-		  (match_operand:QI 2 "general_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;")
-
-(define_insn "*subqi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
-	(minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		  (match_operand:QI 2 "general_operand" "qn,qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, QImode, operands)"
-  "sub{b}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "QI")])
+   (set_attr "mode" "SI")])
 
-(define_insn "*subqi_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
-	(minus:QI (match_dup 0)
-		  (match_operand:QI 1 "general_operand" "qn,qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "sub{b}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "QI")])
 
-(define_insn "*subqi_2"
-  [(set (reg FLAGS_REG)
-	(compare
-	  (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		    (match_operand:QI 2 "general_operand" "qn,qm"))
-	  (const_int 0)))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
-	(minus:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (MINUS, QImode, operands)"
-  "sub{b}\t{%2, %0|%0, %2}"
+(define_insn "*<plusminus_insn><mode>3_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plusminus:SWI
+		(match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+		(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+	    (match_dup 1)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plusminus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<plusminus_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "QI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*subqi_3"
-  [(set (reg FLAGS_REG)
-	(compare (match_operand:QI 1 "nonimmediate_operand" "0,0")
-		 (match_operand:QI 2 "general_operand" "qn,qm")))
-   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
-	(minus:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCmode)
-   && ix86_binary_operator_ok (MINUS, QImode, operands)"
-  "sub{b}\t{%2, %0|%0, %2}"
+(define_insn "*<plusminus_insn>si3_zext_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plusminus:SI
+	    (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
+	    (match_operand:SI 2 "general_operand" "g"))
+	  (match_dup 1)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<plusminus_mnemonic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "QI")])
+   (set_attr "mode" "SI")])
 
 ;; The patterns that match these are at the end of this file.
 
-(define_expand "subxf3"
+(define_expand "<plusminus_insn>xf3"
   [(set (match_operand:XF 0 "register_operand" "")
-	(minus:XF (match_operand:XF 1 "register_operand" "")
-		  (match_operand:XF 2 "register_operand" "")))]
+	(plusminus:XF
+	  (match_operand:XF 1 "register_operand" "")
+	  (match_operand:XF 2 "register_operand" "")))]
   "TARGET_80387"
   "")
 
-(define_expand "sub<mode>3"
+(define_expand "<plusminus_insn><mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "")
-	(minus:MODEF (match_operand:MODEF 1 "register_operand" "")
-		     (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+	(plusminus:MODEF
+	  (match_operand:MODEF 1 "register_operand" "")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "")))]
   "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
     || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "")
 
 ;; Multiply instructions
 
-(define_expand "muldi3"
-  [(parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (mult:DI (match_operand:DI 1 "register_operand" "")
-			    (match_operand:DI 2 "x86_64_general_operand" "")))
+(define_expand "mul<mode>3"
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
+		   (mult:SWIM248
+		     (match_operand:SWIM248 1 "register_operand" "")
+		     (match_operand:SWIM248 2 "<general_operand>" "")))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
+  ""
   "")
 
-;; On AMDFAM10
-;; IMUL reg64, reg64, imm8 	Direct
-;; IMUL reg64, mem64, imm8 	VectorPath
-;; IMUL reg64, reg64, imm32 	Direct
-;; IMUL reg64, mem64, imm32 	VectorPath
-;; IMUL reg64, reg64 		Direct
-;; IMUL reg64, mem64 		Direct
-
-(define_insn "*muldi3_1_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
-	(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
-		 (match_operand:DI 2 "x86_64_general_operand" "K,e,mr")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "@
-   imul{q}\t{%2, %1, %0|%0, %1, %2}
-   imul{q}\t{%2, %1, %0|%0, %1, %2}
-   imul{q}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "imul")
-   (set_attr "prefix_0f" "0,0,1")
-   (set (attr "athlon_decode")
-	(cond [(eq_attr "cpu" "athlon")
-		  (const_string "vector")
-	       (eq_attr "alternative" "1")
-		  (const_string "vector")
-	       (and (eq_attr "alternative" "2")
-		    (match_operand 1 "memory_operand" ""))
-		  (const_string "vector")]
-	      (const_string "direct")))
-   (set (attr "amdfam10_decode")
-	(cond [(and (eq_attr "alternative" "0,1")
-		    (match_operand 1 "memory_operand" ""))
-		  (const_string "vector")]
-	      (const_string "direct")))
-   (set_attr "mode" "DI")])
-
-(define_expand "mulsi3"
-  [(parallel [(set (match_operand:SI 0 "register_operand" "")
-		   (mult:SI (match_operand:SI 1 "register_operand" "")
-			    (match_operand:SI 2 "general_operand" "")))
+(define_expand "mulqi3"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (mult:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])]
-  ""
+  "TARGET_QIMODE_MATH"
   "")
 
 ;; On AMDFAM10
-;; IMUL reg32, reg32, imm8 	Direct
-;; IMUL reg32, mem32, imm8 	VectorPath
-;; IMUL reg32, reg32, imm32 	Direct
-;; IMUL reg32, mem32, imm32 	VectorPath
-;; IMUL reg32, reg32 		Direct
-;; IMUL reg32, mem32 		Direct
-
-(define_insn "*mulsi3_1"
-  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
-	(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
-		 (match_operand:SI 2 "general_operand" "K,i,mr")))
+;; IMUL reg32/64, reg32/64, imm8 	Direct
+;; IMUL reg32/64, mem32/64, imm8 	VectorPath
+;; IMUL reg32/64, reg32/64, imm32 	Direct
+;; IMUL reg32/64, mem32/64, imm32 	VectorPath
+;; IMUL reg32/64, reg32/64 		Direct
+;; IMUL reg32/64, mem32/64 		Direct
+
+(define_insn "*mul<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r,r")
+	(mult:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%rm,rm,0")
+	  (match_operand:SWI48 2 "<general_operand>" "K,<i>,mr")))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
-   imul{l}\t{%2, %1, %0|%0, %1, %2}
-   imul{l}\t{%2, %1, %0|%0, %1, %2}
-   imul{l}\t{%2, %0|%0, %2}"
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "imul")
    (set_attr "prefix_0f" "0,0,1")
    (set (attr "athlon_decode")
@@ -8392,7 +7915,7 @@
 		    (match_operand 1 "memory_operand" ""))
 		  (const_string "vector")]
 	      (const_string "direct")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*mulsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r")
@@ -8424,14 +7947,6 @@
 	      (const_string "direct")))
    (set_attr "mode" "SI")])
 
-(define_expand "mulhi3"
-  [(parallel [(set (match_operand:HI 0 "register_operand" "")
-		   (mult:HI (match_operand:HI 1 "register_operand" "")
-			    (match_operand:HI 2 "general_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_HIMODE_MATH"
-  "")
-
 ;; On AMDFAM10
 ;; IMUL reg16, reg16, imm8 	VectorPath
 ;; IMUL reg16, mem16, imm8 	VectorPath
@@ -8439,12 +7954,14 @@
 ;; IMUL reg16, mem16, imm16 	VectorPath
 ;; IMUL reg16, reg16 		Direct
 ;; IMUL reg16, mem16 		Direct
+
 (define_insn "*mulhi3_1"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
 	(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
 		 (match_operand:HI 2 "general_operand" "K,n,mr")))
    (clobber (reg:CC FLAGS_REG))]
-  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "TARGET_HIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    imul{w}\t{%2, %1, %0|%0, %1, %2}
    imul{w}\t{%2, %1, %0|%0, %1, %2}
@@ -8463,14 +7980,6 @@
 	      (const_string "direct")))
    (set_attr "mode" "HI")])
 
-(define_expand "mulqi3"
-  [(parallel [(set (match_operand:QI 0 "register_operand" "")
-		   (mult:QI (match_operand:QI 1 "nonimmediate_operand" "")
-			    (match_operand:QI 2 "register_operand" "")))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_QIMODE_MATH"
-  "")
-
 ;;On AMDFAM10
 ;; MUL reg8 	Direct
 ;; MUL mem8 	Direct
@@ -8492,131 +8001,38 @@
    (set_attr "amdfam10_decode" "direct")
    (set_attr "mode" "QI")])
 
-(define_expand "umulqihi3"
-  [(parallel [(set (match_operand:HI 0 "register_operand" "")
-		   (mult:HI (zero_extend:HI
-			      (match_operand:QI 1 "nonimmediate_operand" ""))
-			    (zero_extend:HI
-			      (match_operand:QI 2 "register_operand" ""))))
+(define_expand "<u>mul<mode><dwi>3"
+  [(parallel [(set (match_operand:<DWI> 0 "register_operand" "")
+		   (mult:<DWI>
+		     (any_extend:<DWI>
+		       (match_operand:DWIH 1 "nonimmediate_operand" ""))
+		     (any_extend:<DWI>
+		       (match_operand:DWIH 2 "register_operand" ""))))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_QIMODE_MATH"
+  ""
   "")
 
-(define_insn "*umulqihi3_1"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-	(mult:HI (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
-		 (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{b}\t%2"
-  [(set_attr "type" "imul")
-   (set_attr "length_immediate" "0")
-   (set (attr "athlon_decode")
-     (if_then_else (eq_attr "cpu" "athlon")
-        (const_string "vector")
-        (const_string "direct")))
-   (set_attr "amdfam10_decode" "direct")
-   (set_attr "mode" "QI")])
-
-(define_expand "mulqihi3"
+(define_expand "<u>mulqihi3"
   [(parallel [(set (match_operand:HI 0 "register_operand" "")
-		   (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))
-			    (sign_extend:HI (match_operand:QI 2 "register_operand" ""))))
+		   (mult:HI
+		     (any_extend:HI
+		       (match_operand:QI 1 "nonimmediate_operand" ""))
+		     (any_extend:HI
+		       (match_operand:QI 2 "register_operand" ""))))
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_QIMODE_MATH"
   "")
 
-(define_insn "*mulqihi3_insn"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-	(mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
-		 (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "imul{b}\t%2"
-  [(set_attr "type" "imul")
-   (set_attr "length_immediate" "0")
-   (set (attr "athlon_decode")
-     (if_then_else (eq_attr "cpu" "athlon")
-        (const_string "vector")
-        (const_string "direct")))
-   (set_attr "amdfam10_decode" "direct")
-   (set_attr "mode" "QI")])
-
-(define_expand "umulditi3"
-  [(parallel [(set (match_operand:TI 0 "register_operand" "")
-		   (mult:TI (zero_extend:TI
-			      (match_operand:DI 1 "nonimmediate_operand" ""))
-			    (zero_extend:TI
-			      (match_operand:DI 2 "register_operand" ""))))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*umulditi3_insn"
-  [(set (match_operand:TI 0 "register_operand" "=A")
-	(mult:TI (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
-		 (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{q}\t%2"
-  [(set_attr "type" "imul")
-   (set_attr "length_immediate" "0")
-   (set (attr "athlon_decode")
-     (if_then_else (eq_attr "cpu" "athlon")
-        (const_string "vector")
-        (const_string "double")))
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "mode" "DI")])
-
-;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
-(define_expand "umulsidi3"
-  [(parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (mult:DI (zero_extend:DI
-			      (match_operand:SI 1 "nonimmediate_operand" ""))
-			    (zero_extend:DI
-			      (match_operand:SI 2 "register_operand" ""))))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "!TARGET_64BIT"
-  "")
-
-(define_insn "*umulsidi3_insn"
-  [(set (match_operand:DI 0 "register_operand" "=A")
-	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
-		 (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{l}\t%2"
-  [(set_attr "type" "imul")
-   (set_attr "length_immediate" "0")
-   (set (attr "athlon_decode")
-     (if_then_else (eq_attr "cpu" "athlon")
-        (const_string "vector")
-        (const_string "double")))
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "mode" "SI")])
-
-(define_expand "mulditi3"
-  [(parallel [(set (match_operand:TI 0 "register_operand" "")
-		   (mult:TI (sign_extend:TI
-			      (match_operand:DI 1 "nonimmediate_operand" ""))
-			    (sign_extend:TI
-			      (match_operand:DI 2 "register_operand" ""))))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*mulditi3_insn"
-  [(set (match_operand:TI 0 "register_operand" "=A")
-	(mult:TI (sign_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
-		 (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
+(define_insn "*<u>mul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A")
+	(mult:<DWI>
+	  (any_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
+	  (any_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "imul{q}\t%2"
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -8624,63 +8040,58 @@
         (const_string "vector")
         (const_string "double")))
    (set_attr "amdfam10_decode" "double")
-   (set_attr "mode" "DI")])
-
-(define_expand "mulsidi3"
-  [(parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (mult:DI (sign_extend:DI
-			      (match_operand:SI 1 "nonimmediate_operand" ""))
-			    (sign_extend:DI
-			      (match_operand:SI 2 "register_operand" ""))))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "!TARGET_64BIT"
-  "")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*mulsidi3_insn"
-  [(set (match_operand:DI 0 "register_operand" "=A")
-	(mult:DI (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
-		 (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
+(define_insn "*<u>mulqihi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(mult:HI
+	  (any_extend:HI
+	    (match_operand:QI 1 "nonimmediate_operand" "%0"))
+	  (any_extend:HI
+	    (match_operand:QI 2 "nonimmediate_operand" "qm"))))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT
+  "TARGET_QIMODE_MATH
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "imul{l}\t%2"
+  "<sgnprefix>mul{b}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
-        (const_string "double")))
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "mode" "SI")])
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "mode" "QI")])
 
-(define_expand "umuldi3_highpart"
-  [(parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (truncate:DI
-		     (lshiftrt:TI
-		       (mult:TI (zero_extend:TI
-				  (match_operand:DI 1 "nonimmediate_operand" ""))
-				(zero_extend:TI
-				  (match_operand:DI 2 "register_operand" "")))
-		       (const_int 64))))
-	      (clobber (match_scratch:DI 3 ""))
+(define_expand "<s>mul<mode>3_highpart"
+  [(parallel [(set (match_operand:SWI48 0 "register_operand" "")
+		   (truncate:SWI48
+		     (lshiftrt:<DWI>
+		       (mult:<DWI>
+			 (any_extend:<DWI>
+			   (match_operand:SWI48 1 "nonimmediate_operand" ""))
+			 (any_extend:<DWI>
+			   (match_operand:SWI48 2 "register_operand" "")))
+		       (match_dup 4))))
+	      (clobber (match_scratch:SWI48 3 ""))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-  "")
+  ""
+  "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
 
-(define_insn "*umuldi3_highpart_rex64"
+(define_insn "*<s>muldi3_highpart_1"
   [(set (match_operand:DI 0 "register_operand" "=d")
 	(truncate:DI
 	  (lshiftrt:TI
-	    (mult:TI (zero_extend:TI
-		       (match_operand:DI 1 "nonimmediate_operand" "%a"))
-		     (zero_extend:TI
-		       (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	    (mult:TI
+	      (any_extend:TI
+		(match_operand:DI 1 "nonimmediate_operand" "%a"))
+	      (any_extend:TI
+		(match_operand:DI 2 "nonimmediate_operand" "rm")))
 	    (const_int 64))))
    (clobber (match_scratch:DI 3 "=1"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{q}\t%2"
+  "<sgnprefix>mul{q}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -8690,33 +8101,20 @@
    (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "DI")])
 
-(define_expand "umulsi3_highpart"
-  [(parallel [(set (match_operand:SI 0 "register_operand" "")
-		   (truncate:SI
-		     (lshiftrt:DI
-		       (mult:DI (zero_extend:DI
-				  (match_operand:SI 1 "nonimmediate_operand" ""))
-				(zero_extend:DI
-				  (match_operand:SI 2 "register_operand" "")))
-		       (const_int 32))))
-	      (clobber (match_scratch:SI 3 ""))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
-
-(define_insn "*umulsi3_highpart_insn"
+(define_insn "*<s>mulsi3_highpart_1"
   [(set (match_operand:SI 0 "register_operand" "=d")
 	(truncate:SI
 	  (lshiftrt:DI
-	    (mult:DI (zero_extend:DI
-		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
-		     (zero_extend:DI
-		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (mult:DI
+	      (any_extend:DI
+		(match_operand:SI 1 "nonimmediate_operand" "%a"))
+	      (any_extend:DI
+		(match_operand:SI 2 "nonimmediate_operand" "rm")))
 	    (const_int 32))))
    (clobber (match_scratch:SI 3 "=1"))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{l}\t%2"
+  "<sgnprefix>mul{l}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -8726,20 +8124,20 @@
    (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "SI")])
 
-(define_insn "*umulsi3_highpart_zext"
+(define_insn "*<s>mulsi3_highpart_zext"
   [(set (match_operand:DI 0 "register_operand" "=d")
 	(zero_extend:DI (truncate:SI
 	  (lshiftrt:DI
-	    (mult:DI (zero_extend:DI
+	    (mult:DI (any_extend:DI
 		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
-		     (zero_extend:DI
+		     (any_extend:DI
 		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
 	    (const_int 32)))))
    (clobber (match_scratch:SI 3 "=1"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{l}\t%2"
+  "<sgnprefix>mul{l}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -8749,99 +8147,6 @@
    (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "SI")])
 
-(define_expand "smuldi3_highpart"
-  [(parallel [(set (match_operand:DI 0 "register_operand" "")
-		   (truncate:DI
-		     (lshiftrt:TI
-		       (mult:TI (sign_extend:TI
-				  (match_operand:DI 1 "nonimmediate_operand" ""))
-				(sign_extend:TI
-				  (match_operand:DI 2 "register_operand" "")))
-		       (const_int 64))))
-	      (clobber (match_scratch:DI 3 ""))
-	      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*smuldi3_highpart_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=d")
-	(truncate:DI
-	  (lshiftrt:TI
-	    (mult:TI (sign_extend:TI
-		       (match_operand:DI 1 "nonimmediate_operand" "%a"))
-		     (sign_extend:TI
-		       (match_operand:DI 2 "nonimmediate_operand" "rm")))
-	    (const_int 64))))
-   (clobber (match_scratch:DI 3 "=1"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "imul{q}\t%2"
-  [(set_attr "type" "imul")
-   (set (attr "athlon_decode")
-     (if_then_else (eq_attr "cpu" "athlon")
-        (const_string "vector")
-        (const_string "double")))
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "mode" "DI")])
-
-(define_expand "smulsi3_highpart"
-  [(parallel [(set (match_operand:SI 0 "register_operand" "")
-		   (truncate:SI
-		     (lshiftrt:DI
-		       (mult:DI (sign_extend:DI
-				  (match_operand:SI 1 "nonimmediate_operand" ""))
-				(sign_extend:DI
-				  (match_operand:SI 2 "register_operand" "")))
-		       (const_int 32))))
-	      (clobber (match_scratch:SI 3 ""))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
-
-(define_insn "*smulsi3_highpart_insn"
-  [(set (match_operand:SI 0 "register_operand" "=d")
-	(truncate:SI
-	  (lshiftrt:DI
-	    (mult:DI (sign_extend:DI
-		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
-		     (sign_extend:DI
-		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
-	    (const_int 32))))
-   (clobber (match_scratch:SI 3 "=1"))
-   (clobber (reg:CC FLAGS_REG))]
-  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "imul{l}\t%2"
-  [(set_attr "type" "imul")
-   (set (attr "athlon_decode")
-     (if_then_else (eq_attr "cpu" "athlon")
-        (const_string "vector")
-        (const_string "double")))
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "mode" "SI")])
-
-(define_insn "*smulsi3_highpart_zext"
-  [(set (match_operand:DI 0 "register_operand" "=d")
-	(zero_extend:DI (truncate:SI
-	  (lshiftrt:DI
-	    (mult:DI (sign_extend:DI
-		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
-		     (sign_extend:DI
-		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
-	    (const_int 32)))))
-   (clobber (match_scratch:SI 3 "=1"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "imul{l}\t%2"
-  [(set_attr "type" "imul")
-   (set (attr "athlon_decode")
-     (if_then_else (eq_attr "cpu" "athlon")
-        (const_string "vector")
-        (const_string "double")))
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "mode" "SI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "mulxf3"
@@ -8858,27 +8163,17 @@
   "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
     || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "")
-
 
 ;; Divide instructions
 
-(define_insn "divqi3"
+(define_insn "<u>divqi3"
   [(set (match_operand:QI 0 "register_operand" "=a")
-	(div:QI (match_operand:HI 1 "register_operand" "0")
-		(match_operand:QI 2 "nonimmediate_operand" "qm")))
+	(any_div:QI
+	  (match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_QIMODE_MATH"
-  "idiv{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
-(define_insn "udivqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(udiv:QI (match_operand:HI 1 "register_operand" "0")
-		 (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "div{b}\t%2"
+  "<sgnprefix>div{b}\t%2"
   [(set_attr "type" "idiv")
    (set_attr "mode" "QI")])
 
@@ -8919,33 +8214,33 @@
 ;; Divmod instructions.
 
 (define_expand "divmod<mode>4"
-  [(parallel [(set (match_operand:SWI248 0 "register_operand" "")
-		   (div:SWI248
-		     (match_operand:SWI248 1 "register_operand" "")
-		     (match_operand:SWI248 2 "nonimmediate_operand" "")))
-	      (set (match_operand:SWI248 3 "register_operand" "")
-		   (mod:SWI248 (match_dup 1) (match_dup 2)))
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
+		   (div:SWIM248
+		     (match_operand:SWIM248 1 "register_operand" "")
+		     (match_operand:SWIM248 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SWIM248 3 "register_operand" "")
+		   (mod:SWIM248 (match_dup 1) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "(<MODE>mode != HImode) || TARGET_HIMODE_MATH"
+  ""
   "")
 
 (define_insn_and_split "*divmod<mode>4"
-  [(set (match_operand:SWI248 0 "register_operand" "=a")
-	(div:SWI248 (match_operand:SWI248 2 "register_operand" "0")
-		    (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
-   (set (match_operand:SWI248 1 "register_operand" "=&d")
-	(mod:SWI248 (match_dup 2) (match_dup 3)))
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=&d")
+	(mod:SWIM248 (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "(<MODE>mode != HImode) || TARGET_HIMODE_MATH"
+  ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 1)
-		   (ashiftrt:SWI248 (match_dup 4) (match_dup 5)))
+		   (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
 	      (clobber (reg:CC FLAGS_REG))])
    (parallel [(set (match_dup 0)
-	           (div:SWI248 (match_dup 2) (match_dup 3)))
+	           (div:SWIM248 (match_dup 2) (match_dup 3)))
 	      (set (match_dup 1)
-		   (mod:SWI248 (match_dup 2) (match_dup 3)))
+		   (mod:SWIM248 (match_dup 2) (match_dup 3)))
 	      (use (match_dup 1))
 	      (clobber (reg:CC FLAGS_REG))])]
 {
@@ -8965,12 +8260,12 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*divmod<mode>4_noext"
-  [(set (match_operand:SWI248 0 "register_operand" "=a")
-	(div:SWI248 (match_operand:SWI248 2 "register_operand" "0")
-		    (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
-   (set (match_operand:SWI248 1 "register_operand" "=d")
-	(mod:SWI248 (match_dup 2) (match_dup 3)))
-   (use (match_operand:SWI248 4 "register_operand" "1"))
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(mod:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "idiv{<imodesuffix>}\t%3"
@@ -8978,31 +8273,31 @@
    (set_attr "mode" "<MODE>")])
 
 (define_expand "udivmod<mode>4"
-  [(parallel [(set (match_operand:SWI248 0 "register_operand" "")
-		   (udiv:SWI248
-		     (match_operand:SWI248 1 "register_operand" "")
-		     (match_operand:SWI248 2 "nonimmediate_operand" "")))
-	      (set (match_operand:SWI248 3 "register_operand" "")
-		   (umod:SWI248 (match_dup 1) (match_dup 2)))
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
+		   (udiv:SWIM248
+		     (match_operand:SWIM248 1 "register_operand" "")
+		     (match_operand:SWIM248 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SWIM248 3 "register_operand" "")
+		   (umod:SWIM248 (match_dup 1) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "(<MODE>mode != HImode) || TARGET_HIMODE_MATH"
+  ""
   "")
 
 (define_insn_and_split "*udivmod<mode>4"
-  [(set (match_operand:SWI248 0 "register_operand" "=a")
-	(udiv:SWI248 (match_operand:SWI248 2 "register_operand" "0")
-		     (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
-   (set (match_operand:SWI248 1 "register_operand" "=&d")
-	(umod:SWI248 (match_dup 2) (match_dup 3)))
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		      (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=&d")
+	(umod:SWIM248 (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "(<MODE>mode != HImode) || TARGET_HIMODE_MATH"
+  ""
   "#"
   "&& reload_completed"
   [(set (match_dup 1) (const_int 0))
    (parallel [(set (match_dup 0)
-		   (udiv:SWI248 (match_dup 2) (match_dup 3)))
+		   (udiv:SWIM248 (match_dup 2) (match_dup 3)))
 	      (set (match_dup 1)
-		   (umod:SWI248 (match_dup 2) (match_dup 3)))
+		   (umod:SWIM248 (match_dup 2) (match_dup 3)))
 	      (use (match_dup 1))
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -9010,14 +8305,14 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*udivmod<mode>4_noext"
-  [(set (match_operand:SWI248 0 "register_operand" "=a")
-	(udiv:SWI248 (match_operand:SWI248 2 "register_operand" "0")
-		     (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
-   (set (match_operand:SWI248 1 "register_operand" "=d")
-	(umod:SWI248 (match_dup 2) (match_dup 3)))
-   (use (match_operand:SWI248 4 "register_operand" "1"))
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		      (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(umod:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))
    (clobber (reg:CC FLAGS_REG))]
-  "(<MODE>mode != HImode) || TARGET_HIMODE_MATH"
+  ""
   "div{<imodesuffix>}\t%3"
   [(set_attr "type" "idiv")
    (set_attr "mode" "<MODE>")])
@@ -9301,7 +8596,8 @@
   else if (GET_CODE (val) == SUBREG
 	   && (submode = GET_MODE (SUBREG_REG (val)),
 	       GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
-	   && pos + len <= GET_MODE_BITSIZE (submode))
+	   && pos + len <= GET_MODE_BITSIZE (submode)
+	   && GET_MODE_CLASS (submode) == MODE_INT)
     {
       /* Narrow a paradoxical subreg to prevent partial register stalls.  */
       mode = submode;
@@ -14505,11 +13801,53 @@
 ;; For all sCOND expanders, also expand the compare or test insn that
 ;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
 
-;; %%% Do the expansion to SImode.  If PII, do things the xor+setcc way
-;; to avoid partial register stalls.  Otherwise do things the setcc+movzx
-;; way, which can later delete the movzx if only QImode is needed.
+(define_insn_and_split "*setcc_di_1"
+  [(set (match_operand:DI 0 "register_operand" "=q")
+	(match_operator:DI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn_and_split "*setcc_si_1_and"
+  [(set (match_operand:SI 0 "register_operand" "=q")
+	(match_operator:SI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn_and_split "*setcc_si_1_movzbl"
+  [(set (match_operand:SI 0 "register_operand" "=q")
+	(match_operator:SI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "!TARGET_PARTIAL_REG_STALL
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
 
-(define_insn "*setcc_1"
+(define_insn "*setcc_qi"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
 	(match_operator:QI 1 "ix86_comparison_operator"
 	  [(reg FLAGS_REG) (const_int 0)]))]
@@ -14518,7 +13856,7 @@
   [(set_attr "type" "setcc")
    (set_attr "mode" "QI")])
 
-(define_insn "*setcc_2"
+(define_insn "*setcc_qi_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
 	(match_operator:QI 1 "ix86_comparison_operator"
 	  [(reg FLAGS_REG) (const_int 0)]))]
@@ -15252,12 +14590,25 @@
 }
   [(set_attr "type" "call")])
 
-(define_insn "*call_pop_1"
+(define_insn "*call_pop_1_esp"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
 	 (match_operand:SI 1 "" ""))
    (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
 			    (match_operand:SI 2 "immediate_operand" "i")))]
-  "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "call\t%P0";
+  return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*call_pop_1"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 2 "immediate_operand" "i")))]
+  "!TARGET_64BIT && !TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
 {
   if (constant_call_address_operand (operands[0], Pmode))
     return "call\t%P0";
@@ -15270,7 +14621,7 @@
 	 (match_operand:SI 1 "" ""))
    (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
 			    (match_operand:SI 2 "immediate_operand" "i,i")))]
-  "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
   "@
    jmp\t%P0
    jmp\t%A0"
@@ -15308,10 +14659,21 @@
 }
   [(set_attr "type" "call")])
 
-(define_insn "*call_1"
+(define_insn "*call_1_esp"
   [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
 	 (match_operand 1 "" ""))]
-  "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "call\t%P0";
+  return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+	 (match_operand 1 "" ""))]
+  "!TARGET_64BIT && !TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
 {
   if (constant_call_address_operand (operands[0], Pmode))
     return "call\t%P0";
@@ -15322,7 +14684,7 @@
 (define_insn "*sibcall_1"
   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand 1 "" ""))]
-  "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
   "@
    jmp\t%P0
    jmp\t%A0"
@@ -15331,7 +14693,7 @@
 (define_insn "*call_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
 	 (match_operand 1 "" ""))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)
    && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
 {
   if (constant_call_address_operand (operands[0], Pmode))
@@ -15356,7 +14718,7 @@
    (clobber (reg:TI XMM15_REG))
    (clobber (reg:DI SI_REG))
    (clobber (reg:DI DI_REG))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
 {
   if (constant_call_address_operand (operands[0], Pmode))
     return "call\t%P0";
@@ -15367,14 +14729,14 @@
 (define_insn "*call_1_rex64_large"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
 	 (match_operand 1 "" ""))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
   "call\t%A0"
   [(set_attr "type" "call")])
 
 (define_insn "*sibcall_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
 	 (match_operand 1 "" ""))]
-  "SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && SIBLING_CALL_P (insn)"
   "@
    jmp\t%P0
    jmp\t%A0"
@@ -15566,6 +14928,16 @@
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
+(define_insn "vswapmov"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operand:SI 1 "register_operand" "r"))
+   (unspec_volatile [(const_int 0)] UNSPECV_VSWAPMOV)]
+  ""
+  "movl.s\t{%1, %0|%0, %1}"
+  [(set_attr "length" "2")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
 ;; Pad to 16-byte boundary, max skip in op0.  Used to avoid
 ;; branch prediction penalty for the third jump in a 16-byte
 ;; block on K8.
@@ -18734,25 +18106,13 @@
    && flag_unsafe_math_optimizations"
   "")
 
-(define_expand "lfloor<mode>di2"
-  [(match_operand:DI 0 "nonimmediate_operand" "")
+(define_expand "lfloor<MODEF:mode><SWI48:mode>2"
+  [(match_operand:SWI48 0 "nonimmediate_operand" "")
    (match_operand:MODEF 1 "register_operand" "")]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
-   && !flag_trapping_math"
-{
-  if (optimize_insn_for_size_p ())
-    FAIL;
-  ix86_expand_lfloorceil (operand0, operand1, true);
-  DONE;
-})
-
-(define_expand "lfloor<mode>si2"
-  [(match_operand:SI 0 "nonimmediate_operand" "")
-   (match_operand:MODEF 1 "register_operand" "")]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && !flag_trapping_math"
 {
-  if (optimize_insn_for_size_p () && TARGET_64BIT)
+  if (TARGET_64BIT && optimize_insn_for_size_p ())
     FAIL;
   ix86_expand_lfloorceil (operand0, operand1, true);
   DONE;
@@ -19008,20 +18368,10 @@
    && flag_unsafe_math_optimizations"
   "")
 
-(define_expand "lceil<mode>di2"
-  [(match_operand:DI 0 "nonimmediate_operand" "")
-   (match_operand:MODEF 1 "register_operand" "")]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
-   && !flag_trapping_math"
-{
-  ix86_expand_lfloorceil (operand0, operand1, false);
-  DONE;
-})
-
-(define_expand "lceil<mode>si2"
-  [(match_operand:SI 0 "nonimmediate_operand" "")
+(define_expand "lceil<MODEF:mode><SWI48:mode>2"
+  [(match_operand:SWI48 0 "nonimmediate_operand" "")
    (match_operand:MODEF 1 "register_operand" "")]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && !flag_trapping_math"
 {
   ix86_expand_lfloorceil (operand0, operand1, false);
@@ -20168,22 +19518,26 @@
 
 ;; Conditional move instructions.
 
-(define_expand "movdicc"
-  [(set (match_operand:DI 0 "register_operand" "")
-	(if_then_else:DI (match_operand 1 "comparison_operator" "")
-			 (match_operand:DI 2 "general_operand" "")
-			 (match_operand:DI 3 "general_operand" "")))]
-  "TARGET_64BIT"
+(define_expand "mov<mode>cc"
+  [(set (match_operand:SWIM 0 "register_operand" "")
+	(if_then_else:SWIM (match_operand 1 "comparison_operator" "")
+			   (match_operand:SWIM 2 "general_operand" "")
+			   (match_operand:SWIM 3 "general_operand" "")))]
+  ""
   "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
-(define_insn "x86_movdicc_0_m1_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "")
+;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
+;; the register first winds up with `sbbl $0,reg', which is also weird.
+;; So just document what we're doing explicitly.
+
+(define_insn "x86_mov<mode>cc_0_m1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(if_then_else:SWI48 (match_operand 1 "ix86_carry_flag_operator" "")
 	  (const_int -1)
 	  (const_int 0)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "sbb{q}\t%0, %0"
+  ""
+  "sbb{<imodesuffix>}\t%0, %0"
   ; Since we don't have the proper number of operands for an alu insn,
   ; fill in all the blanks.
   [(set_attr "type" "alu")
@@ -20191,134 +19545,56 @@
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
-   (set_attr "mode" "DI")
-   (set_attr "length_immediate" "0")])
-
-(define_insn "*x86_movdicc_0_m1_se"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "")
-			 (const_int 1)
-			 (const_int 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "sbb{q}\t%0, %0"
-  [(set_attr "type" "alu")
-   (set_attr "use_carry" "1")
-   (set_attr "pent_pair" "pu")
-   (set_attr "memory" "none")
-   (set_attr "imm_disp" "false")
-   (set_attr "mode" "DI")
+   (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
-(define_insn "*movdicc_c_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(if_then_else:DI (match_operator 1 "ix86_comparison_operator"
-				[(reg FLAGS_REG) (const_int 0)])
-		      (match_operand:DI 2 "nonimmediate_operand" "rm,0")
-		      (match_operand:DI 3 "nonimmediate_operand" "0,rm")))]
-  "TARGET_64BIT && TARGET_CMOVE
-   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
-  "@
-   cmov%O2%C1\t{%2, %0|%0, %2}
-   cmov%O2%c1\t{%3, %0|%0, %3}"
-  [(set_attr "type" "icmov")
-   (set_attr "mode" "DI")])
-
-(define_expand "movsicc"
-  [(set (match_operand:SI 0 "register_operand" "")
-	(if_then_else:SI (match_operand 1 "comparison_operator" "")
-			 (match_operand:SI 2 "general_operand" "")
-			 (match_operand:SI 3 "general_operand" "")))]
-  ""
-  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
-
-;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
-;; the register first winds up with `sbbl $0,reg', which is also weird.
-;; So just document what we're doing explicitly.
-
-(define_insn "x86_movsicc_0_m1"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "")
-	  (const_int -1)
-	  (const_int 0)))
+(define_insn "*x86_mov<mode>cc_0_m1_se"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(sign_extract:SWI48 (match_operand 1 "ix86_carry_flag_operator" "")
+			    (const_int 1)
+			    (const_int 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "sbb{l}\t%0, %0"
-  ; Since we don't have the proper number of operands for an alu insn,
-  ; fill in all the blanks.
+  "sbb{<imodesuffix>}\t%0, %0"
   [(set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
-   (set_attr "mode" "SI")
+   (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
-(define_insn "*x86_movsicc_0_m1_se"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "")
-			 (const_int 1)
-			 (const_int 0)))
-   (clobber (reg:CC FLAGS_REG))]
+(define_insn "*x86_mov<mode>cc_0_m1_neg"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(neg:SWI48 (match_operand 1 "ix86_carry_flag_operator" "")))]
   ""
-  "sbb{l}\t%0, %0"
+  "sbb{<imodesuffix>}\t%0, %0"
   [(set_attr "type" "alu")
    (set_attr "use_carry" "1")
    (set_attr "pent_pair" "pu")
    (set_attr "memory" "none")
    (set_attr "imm_disp" "false")
-   (set_attr "mode" "SI")
+   (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "0")])
 
-(define_insn "*movsicc_noc"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(if_then_else:SI (match_operator 1 "ix86_comparison_operator"
-				[(reg FLAGS_REG) (const_int 0)])
-		      (match_operand:SI 2 "nonimmediate_operand" "rm,0")
-		      (match_operand:SI 3 "nonimmediate_operand" "0,rm")))]
-  "TARGET_CMOVE
-   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+(define_insn "*mov<mode>cc_noc"
+  [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
+	  (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
+  "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
   "@
    cmov%O2%C1\t{%2, %0|%0, %2}
    cmov%O2%c1\t{%3, %0|%0, %3}"
   [(set_attr "type" "icmov")
-   (set_attr "mode" "SI")])
-
-(define_expand "movhicc"
-  [(set (match_operand:HI 0 "register_operand" "")
-	(if_then_else:HI (match_operand 1 "comparison_operator" "")
-			 (match_operand:HI 2 "general_operand" "")
-			 (match_operand:HI 3 "general_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
-
-(define_insn "*movhicc_noc"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(if_then_else:HI (match_operator 1 "ix86_comparison_operator"
-				[(reg FLAGS_REG) (const_int 0)])
-		      (match_operand:HI 2 "nonimmediate_operand" "rm,0")
-		      (match_operand:HI 3 "nonimmediate_operand" "0,rm")))]
-  "TARGET_CMOVE
-   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
-  "@
-   cmov%O2%C1\t{%2, %0|%0, %2}
-   cmov%O2%c1\t{%3, %0|%0, %3}"
-  [(set_attr "type" "icmov")
-   (set_attr "mode" "HI")])
-
-(define_expand "movqicc"
-  [(set (match_operand:QI 0 "register_operand" "")
-	(if_then_else:QI (match_operand 1 "comparison_operator" "")
-			 (match_operand:QI 2 "general_operand" "")
-			 (match_operand:QI 3 "general_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn_and_split "*movqicc_noc"
   [(set (match_operand:QI 0 "register_operand" "=r,r")
 	(if_then_else:QI (match_operator 1 "ix86_comparison_operator"
-				[(match_operand 4 "flags_reg_operand" "")
-				 (const_int 0)])
+			   [(match_operand 4 "flags_reg_operand" "")
+			    (const_int 0)])
 		      (match_operand:QI 2 "register_operand" "r,0")
 		      (match_operand:QI 3 "register_operand" "0,r")))]
   "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
@@ -20424,6 +19700,20 @@
   [(set_attr "type" "fcmov")
    (set_attr "mode" "XF")])
 
+;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
+;; XOP conditional move
+(define_insn "*xop_pcmov_<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(if_then_else:MODEF
+	  (match_operand:MODEF 1 "register_operand" "x")
+	  (match_operand:MODEF 2 "register_operand" "x")
+	  (match_operand:MODEF 3 "register_operand" "x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+  [(set_attr "type" "sse4arg")])
+
 ;; These versions of the min/max patterns are intentionally ignorant of
 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
@@ -20733,6 +20023,18 @@
   DONE;
 })
 
+;; Use IOR for stack probes, this is shorter.
+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  ""
+{
+  if (GET_MODE (operands[0]) == DImode)
+    emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx));
+  else
+    emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx));
+  DONE;
+})
+
 (define_expand "builtin_setjmp_receiver"
   [(label_ref (match_operand 0 "" ""))]
   "!TARGET_64BIT && flag_pic"
@@ -21236,7 +20538,9 @@
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
@@ -21251,7 +20555,9 @@
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
@@ -21856,13 +21162,27 @@
 }
   [(set_attr "type" "callv")])
 
-(define_insn "*call_value_pop_1"
+(define_insn "*call_value_pop_1_esp"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
 	      (match_operand:SI 2 "" "")))
    (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
 			    (match_operand:SI 3 "immediate_operand" "i")))]
-  "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    return "call\t%P1";
+  return "call\t%A1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_pop_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 3 "immediate_operand" "i")))]
+  "!TARGET_64BIT && !TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
 {
   if (constant_call_address_operand (operands[1], Pmode))
     return "call\t%P1";
@@ -21876,7 +21196,7 @@
 	      (match_operand:SI 2 "" "")))
    (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
 			    (match_operand:SI 3 "immediate_operand" "i,i")))]
-  "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
   "@
    jmp\t%P1
    jmp\t%A1"
@@ -21925,7 +21245,7 @@
    (clobber (reg:TI XMM15_REG))
    (clobber (reg:DI SI_REG))
    (clobber (reg:DI DI_REG))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
 {
   if (SIBLING_CALL_P (insn))
     return "jmp\t%P1";
@@ -21934,11 +21254,23 @@
 }
   [(set_attr "type" "callv")])
 
-(define_insn "*call_value_1"
+(define_insn "*call_value_1_esp"
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
 	      (match_operand:SI 2 "" "")))]
-  "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    return "call\t%P1";
+  return "call\t%A1";
+}
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+	      (match_operand:SI 2 "" "")))]
+  "!TARGET_64BIT && !TARGET_CALL_ESP && !SIBLING_CALL_P (insn)"
 {
   if (constant_call_address_operand (operands[1], Pmode))
     return "call\t%P1";
@@ -21950,7 +21282,7 @@
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
 	      (match_operand:SI 2 "" "")))]
-  "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
   "@
    jmp\t%P1
    jmp\t%A1"
@@ -21960,7 +21292,7 @@
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
 	      (match_operand:DI 2 "" "")))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)
    && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
 {
   if (constant_call_address_operand (operands[1], Pmode))
@@ -21974,19 +21306,19 @@
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
 	      (match_operand:DI 2 "" "")))
    (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
-   (clobber (reg:TI 27))
-   (clobber (reg:TI 28))
-   (clobber (reg:TI 45))
-   (clobber (reg:TI 46))
-   (clobber (reg:TI 47))
-   (clobber (reg:TI 48))
-   (clobber (reg:TI 49))
-   (clobber (reg:TI 50))
-   (clobber (reg:TI 51))
-   (clobber (reg:TI 52))
+   (clobber (reg:TI XMM6_REG))
+   (clobber (reg:TI XMM7_REG))
+   (clobber (reg:TI XMM8_REG))
+   (clobber (reg:TI XMM9_REG))
+   (clobber (reg:TI XMM10_REG))
+   (clobber (reg:TI XMM11_REG))
+   (clobber (reg:TI XMM12_REG))
+   (clobber (reg:TI XMM13_REG))
+   (clobber (reg:TI XMM14_REG))
+   (clobber (reg:TI XMM15_REG))
    (clobber (reg:DI SI_REG))
    (clobber (reg:DI DI_REG))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
 {
   if (constant_call_address_operand (operands[1], Pmode))
     return "call\t%P1";
@@ -21998,7 +21330,7 @@
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
 	      (match_operand:DI 2 "" "")))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
   "call\t%A1"
   [(set_attr "type" "callv")])
 
@@ -22006,7 +21338,7 @@
   [(set (match_operand 0 "" "")
 	(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
 	      (match_operand:DI 2 "" "")))]
-  "SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && SIBLING_CALL_P (insn)"
   "@
    jmp\t%P1
    jmp\t%A1"
@@ -22025,14 +21357,14 @@
 
 (define_expand "sse_prologue_save"
   [(parallel [(set (match_operand:BLK 0 "" "")
-		   (unspec:BLK [(reg:DI 21)
-				(reg:DI 22)
-				(reg:DI 23)
-				(reg:DI 24)
-				(reg:DI 25)
-				(reg:DI 26)
-				(reg:DI 27)
-				(reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+		   (unspec:BLK [(reg:DI XMM0_REG)
+				(reg:DI XMM1_REG)
+				(reg:DI XMM2_REG)
+				(reg:DI XMM3_REG)
+				(reg:DI XMM4_REG)
+				(reg:DI XMM5_REG)
+				(reg:DI XMM6_REG)
+				(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
 	      (use (match_operand:DI 1 "register_operand" ""))
 	      (use (match_operand:DI 2 "immediate_operand" ""))
 	      (use (label_ref:DI (match_operand 3 "" "")))])]
@@ -22042,14 +21374,14 @@
 (define_insn "*sse_prologue_save_insn"
   [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
 			  (match_operand:DI 4 "const_int_operand" "n")))
-	(unspec:BLK [(reg:DI 21)
-		     (reg:DI 22)
-		     (reg:DI 23)
-		     (reg:DI 24)
-		     (reg:DI 25)
-		     (reg:DI 26)
-		     (reg:DI 27)
-		     (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+	(unspec:BLK [(reg:DI XMM0_REG)
+		     (reg:DI XMM1_REG)
+		     (reg:DI XMM2_REG)
+		     (reg:DI XMM3_REG)
+		     (reg:DI XMM4_REG)
+		     (reg:DI XMM5_REG)
+		     (reg:DI XMM6_REG)
+		     (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
    (use (match_operand:DI 1 "register_operand" "r"))
    (use (match_operand:DI 2 "const_int_operand" "i"))
    (use (label_ref:DI (match_operand 3 "" "X")))]
@@ -22526,6 +21858,120 @@
   [(set_attr "type" "other")
    (set_attr "length" "3")])
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; LWP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "lwp_llwpcbhi1"
+  [(unspec [(match_operand:HI 0 "register_operand" "r")]
+  	   UNSPEC_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_llwpcbsi1"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")]
+  	   UNSPEC_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_llwpcbdi1"
+  [(unspec [(match_operand:DI 0 "register_operand" "r")]
+  	   UNSPEC_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
+(define_insn "lwp_slwpcbhi1"
+  [(unspec [(match_operand:HI 0 "register_operand" "r")]
+  	   UNSPEC_SLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_slwpcbsi1"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")]
+  	   UNSPEC_SLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_slwpcbdi1"
+  [(unspec [(match_operand:DI 0 "register_operand" "r")]
+  	   UNSPEC_SLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
+(define_insn "lwp_lwpvalhi3"
+  [(unspec_volatile [(match_operand:HI 0 "register_operand" "r")
+  	   	     (match_operand:SI 1 "nonimmediate_operand" "rm")
+	   	     (match_operand:HI 2 "const_int_operand" "")]
+  	   	    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_lwpvalsi3"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+    	    	     (match_operand:SI 1 "nonimmediate_operand" "rm")
+	    	     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_lwpvaldi3"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
+(define_insn "lwp_lwpinshi3"
+  [(unspec_volatile [(match_operand:HI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:HI 2 "const_int_operand" "")]
+		    UNSPECV_LWPINS_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_lwpinssi3"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPINS_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_lwpinsdi3"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPINS_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9668ff6504d..dd47b7d1dc5 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -314,6 +314,14 @@ mfma4
 Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) VarExists Save
 Support FMA4 built-in functions and code generation 
 
+mxop
+Target Report Mask(ISA_XOP) Var(ix86_isa_flags) VarExists Save
+Support XOP built-in functions and code generation 
+
+mlwp
+Target Report Mask(ISA_LWP) Var(ix86_isa_flags) VarExists Save
+Support LWP built-in functions and code generation 
+
 mabm
 Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save
 Support code generation of Advanced Bit Manipulation (ABM) instructions.
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index e701b19e2a8..540bc3f09ee 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -49,6 +49,7 @@ __bswapd (int __X)
   return __builtin_bswap32 (__X);
 }
 
+#ifdef __SSE4_2__
 /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
 extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -70,6 +71,7 @@ __crc32d (unsigned int __C, unsigned int __V)
 {
   return __builtin_ia32_crc32si (__C, __V);
 }
+#endif /* SSE4.2 */
 
 /* 32bit popcnt */
 extern __inline int
diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h
index 9b51496a864..5d8e5ad2cbe 100644
--- a/gcc/config/i386/linux.h
+++ b/gcc/config/i386/linux.h
@@ -104,7 +104,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef  ASM_SPEC
 #define ASM_SPEC \
-  "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} %{Wa,*:%*} \
+  "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} %{Wa,*:%*} --32 \
   %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
 
 #undef  SUBTARGET_EXTRA_SPECS
@@ -207,6 +207,9 @@ along with GCC; see the file COPYING3.  If not see
 
 #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
 
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+
 /* This macro may be overridden in i386/k*bsd-gnu.h.  */
 #define REG_NAME(reg) reg
 
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index cfa3f49e870..d07547a804f 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -110,6 +110,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
 
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+
 /* This macro may be overridden in i386/k*bsd-gnu.h.  */
 #define REG_NAME(reg) reg
 
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
new file mode 100644
index 00000000000..e5137ec24f4
--- /dev/null
+++ b/gcc/config/i386/lwpintrin.h
@@ -0,0 +1,109 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _LWPINTRIN_H_INCLUDED
+#define _LWPINTRIN_H_INCLUDED
+
+#ifndef __LWP__
+# error "LWP instruction set not enabled"
+#else
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb16 (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb16 (pcbAddress);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb32 (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb32 (pcbAddress);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb64 (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb64 (pcbAddress);
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb16 (void)
+{
+  return __builtin_ia32_slwpcb16 ();
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb32 (void)
+{
+  return __builtin_ia32_slwpcb32 ();
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb64 (void)
+{
+  return __builtin_ia32_slwpcb64 ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval16 (unsigned short data2, unsigned int data1, unsigned short flags)
+{
+  __builtin_ia32_lwpval16 (data2, data1, flags);
+}
+/*
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval32 (data2, data1, flags);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval64 (unsigned __int64 data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval64 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins16 (unsigned short data2, unsigned int data1, unsigned short flags)
+{
+  return __builtin_ia32_lwpins16 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins32 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins64 (unsigned __int64 data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins64 (data2, data1, flags);
+}
+*/
+#endif /* __LWP__ */
+
+#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/mingw.opt b/gcc/config/i386/mingw.opt
index 6be904e968a..bd9a4b63035 100644
--- a/gcc/config/i386/mingw.opt
+++ b/gcc/config/i386/mingw.opt
@@ -21,3 +21,7 @@
 Wpedantic-ms-format
 C ObjC C++ ObjC++ Var(warn_pedantic_ms_format) Init(1) Warning
 Warn about none ISO msvcrt scanf/printf width extensions
+
+fset-stack-executable
+Common Report Var(flag_setstackexecutable) Init(1) Optimization
+For nested functions on stack executable permission is set.
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
index 9dcc5ba1f67..4f8b101a7ba 100644
--- a/gcc/config/i386/mingw32.h
+++ b/gcc/config/i386/mingw32.h
@@ -202,6 +202,8 @@ __enable_execute_stack (void *addr)					\
 
 #undef ENABLE_EXECUTE_STACK
 #define ENABLE_EXECUTE_STACK MINGW_ENABLE_EXECUTE_STACK
+#undef  CHECK_EXECUTE_STACK_ENABLED
+#define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable
 
 #ifdef IN_LIBGCC2
 #include <windows.h>
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index f9a4744d1de..dee6df9fa01 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -561,7 +561,9 @@
 ;; Test for a valid operand for a call instruction.
 (define_predicate "call_insn_operand"
   (ior (match_operand 0 "constant_call_address_operand")
-       (ior (match_operand 0 "register_no_elim_operand")
+       (ior (and (match_operand 0 "register_no_elim_operand")
+		 (ior (match_test "TARGET_CALL_ESP")
+		      (match_operand 0 "index_register_operand")))
 	    (match_operand 0 "memory_operand"))))
 
 ;; Similarly, but for tail calls, in which we cannot allow memory references.
diff --git a/gcc/config/i386/sol2-unwind.h b/gcc/config/i386/sol2-unwind.h
new file mode 100644
index 00000000000..41ffb03b6cd
--- /dev/null
+++ b/gcc/config/i386/sol2-unwind.h
@@ -0,0 +1,208 @@
+/* DWARF2 EH unwinding support for AMD x86-64 and x86.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <ucontext.h>
+
+#ifdef __x86_64__
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_64_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_64_fallback_frame_state (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  mcontext_t *mctx;
+  long new_cfa;
+
+  if (/* Solaris 2.10
+	------------
+	<__sighndlr+0>:      push   %rbp
+	<__sighndlr+1>:      mov    %rsp,%rbp
+	<__sighndlr+4>:      callq  *%rcx
+	<__sighndlr+6>:      leaveq           <--- PC
+	<__sighndlr+7>:      retq  */
+      *(unsigned long *)(pc - 6) == 0xc3c9d1ffe5894855)
+    /* We need to move up four frames (the kernel frame, the signal frame,
+       the call_user_handler frame and the __sighndlr frame).  Two of them
+       have the minimum stack frame size (kernel and __sighndlr frames),
+       the signal frame has a stack frame size of 32 and there is another
+       with a stack frame size of 112 bytes (the call_user_handler frame).
+       The ucontext_t structure is after this offset.  */
+    {
+      int off = 16 + 16 + 32 + 112;
+      mctx = &((ucontext_t *) (context->cfa + off))->uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = mctx->gregs[REG_RSP];
+
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 7;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  /* The SVR4 register numbering macros aren't usable in libgcc.  */
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)&mctx->gregs[REG_RAX] - new_cfa;
+  fs->regs.reg[1].how = REG_SAVED_OFFSET;
+  fs->regs.reg[1].loc.offset = (long)&mctx->gregs[REG_RDX] - new_cfa;
+  fs->regs.reg[2].how = REG_SAVED_OFFSET;
+  fs->regs.reg[2].loc.offset = (long)&mctx->gregs[REG_RCX] - new_cfa;
+  fs->regs.reg[3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[3].loc.offset = (long)&mctx->gregs[REG_RBX] - new_cfa;
+  fs->regs.reg[4].how = REG_SAVED_OFFSET;
+  fs->regs.reg[4].loc.offset = (long)&mctx->gregs[REG_RSI] - new_cfa;
+  fs->regs.reg[5].how = REG_SAVED_OFFSET;
+  fs->regs.reg[5].loc.offset = (long)&mctx->gregs[REG_RDI] - new_cfa;
+  fs->regs.reg[6].how = REG_SAVED_OFFSET;
+  fs->regs.reg[6].loc.offset = (long)&mctx->gregs[REG_RBP] - new_cfa;
+  fs->regs.reg[8].how = REG_SAVED_OFFSET;
+  fs->regs.reg[8].loc.offset = (long)&mctx->gregs[REG_R8] - new_cfa;
+  fs->regs.reg[9].how = REG_SAVED_OFFSET;
+  fs->regs.reg[9].loc.offset = (long)&mctx->gregs[REG_R9] - new_cfa;
+  fs->regs.reg[10].how = REG_SAVED_OFFSET;
+  fs->regs.reg[10].loc.offset = (long)&mctx->gregs[REG_R10] - new_cfa;
+  fs->regs.reg[11].how = REG_SAVED_OFFSET;
+  fs->regs.reg[11].loc.offset = (long)&mctx->gregs[REG_R11] - new_cfa;
+  fs->regs.reg[12].how = REG_SAVED_OFFSET;
+  fs->regs.reg[12].loc.offset = (long)&mctx->gregs[REG_R12] - new_cfa;
+  fs->regs.reg[13].how = REG_SAVED_OFFSET;
+  fs->regs.reg[13].loc.offset = (long)&mctx->gregs[REG_R13] - new_cfa;
+  fs->regs.reg[14].how = REG_SAVED_OFFSET;
+  fs->regs.reg[14].loc.offset = (long)&mctx->gregs[REG_R14] - new_cfa;
+  fs->regs.reg[15].how = REG_SAVED_OFFSET;
+  fs->regs.reg[15].loc.offset = (long)&mctx->gregs[REG_R15] - new_cfa;
+  fs->regs.reg[16].how = REG_SAVED_OFFSET;
+  fs->regs.reg[16].loc.offset = (long)&mctx->gregs[REG_RIP] - new_cfa;
+  fs->retaddr_column = 16;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#else
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_fallback_frame_state (struct _Unwind_Context *context,
+			  _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  mcontext_t *mctx;
+  long new_cfa;
+
+  if (/* Solaris 2.8 - single thread
+	-------------------------
+	<sigacthandler+17>:  mov    0x10(%ebp),%esi
+	<sigacthandler+20>:  push   %esi
+	<sigacthandler+21>:  pushl  0xc(%ebp)
+	<sigacthandler+24>:  mov    0x8(%ebp),%ecx
+	<sigacthandler+27>:  push   %ecx
+	<sigacthandler+28>:  mov    offset(%ebx),%eax
+	<sigacthandler+34>:  call   *(%eax,%ecx,4)
+	<sigacthandler+37>:  add    $0xc,%esp        <--- PC
+	<sigacthandler+40>:  push   %esi ... */
+      (*(unsigned long *)(pc - 20) == 0x5610758b
+       && *(unsigned long *)(pc - 16) == 0x8b0c75ff
+       && *(unsigned long *)(pc - 12) == 0x8b51084d
+       && *(unsigned char *)(pc - 8)  == 0x83
+       && *(unsigned long *)(pc - 4)  == 0x8814ff00
+       && *(unsigned long *)(pc - 0)  == 0x560cc483)
+
+      || /* Solaris 2.8 - multi thread
+	   ---------------------------
+	   <__sighndlr+0>:      push   %ebp
+	   <__sighndlr+1>:      mov    %esp,%ebp
+	   <__sighndlr+3>:      pushl  0x10(%ebp)
+	   <__sighndlr+6>:      pushl  0xc(%ebp)
+	   <__sighndlr+9>:      pushl  0x8(%ebp)
+	   <__sighndlr+12>:     call   *0x14(%ebp)
+	   <__sighndlr+15>:     leave               <--- PC  */
+	 (*(unsigned long *)(pc - 15) == 0xffec8b55
+	  && *(unsigned long *)(pc - 11) == 0x75ff1075
+	  && *(unsigned long *)(pc - 7)  == 0x0875ff0c
+	  && *(unsigned long *)(pc - 3)  == 0xc91455ff)
+
+      || /* Solaris 2.10
+	   ------------
+	   <__sighndlr+0>:      push   %ebp
+	   <__sighndlr+1>:      mov    %esp,%ebp
+	   <__sighndlr+3>:      pushl  0x10(%ebp)
+	   <__sighndlr+6>:      pushl  0xc(%ebp)
+	   <__sighndlr+9>:      pushl  0x8(%ebp)
+	   <__sighndlr+12>:     call   *0x14(%ebp)
+	   <__sighndlr+15>:     add    $0xc,%esp     <--- PC
+	   <__sighndlr+18>:     leave
+	   <__sighndlr+19>:     ret  */
+	 (*(unsigned long *)(pc - 15) == 0xffec8b55
+	  && *(unsigned long *)(pc - 11) == 0x75ff1075
+	  && *(unsigned long *)(pc - 7)  == 0x0875ff0c
+	  && *(unsigned long *)(pc - 3)  == 0x831455ff
+	  && *(unsigned long *)(pc + 1)  == 0xc3c90cc4))
+    {
+      struct handler_args {
+	int signo;
+	siginfo_t *sip;
+	ucontext_t *ucontext;
+      } *handler_args = context->cfa;
+      mctx = &handler_args->ucontext->uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = mctx->gregs[UESP];
+
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 4;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  /* The SVR4 register numbering macros aren't usable in libgcc.  */
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)&mctx->gregs[EAX] - new_cfa;
+  fs->regs.reg[3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[3].loc.offset = (long)&mctx->gregs[EBX] - new_cfa;
+  fs->regs.reg[1].how = REG_SAVED_OFFSET;
+  fs->regs.reg[1].loc.offset = (long)&mctx->gregs[ECX] - new_cfa;
+  fs->regs.reg[2].how = REG_SAVED_OFFSET;
+  fs->regs.reg[2].loc.offset = (long)&mctx->gregs[EDX] - new_cfa;
+  fs->regs.reg[6].how = REG_SAVED_OFFSET;
+  fs->regs.reg[6].loc.offset = (long)&mctx->gregs[ESI] - new_cfa;
+  fs->regs.reg[7].how = REG_SAVED_OFFSET;
+  fs->regs.reg[7].loc.offset = (long)&mctx->gregs[EDI] - new_cfa;
+  fs->regs.reg[5].how = REG_SAVED_OFFSET;
+  fs->regs.reg[5].loc.offset = (long)&mctx->gregs[EBP] - new_cfa;
+  fs->regs.reg[8].how = REG_SAVED_OFFSET;
+  fs->regs.reg[8].loc.offset = (long)&mctx->gregs[EIP] - new_cfa;
+  fs->retaddr_column = 8;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#endif
diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h
index 4c2dfe975cf..f062280fd18 100644
--- a/gcc/config/i386/sol2.h
+++ b/gcc/config/i386/sol2.h
@@ -118,3 +118,5 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef TARGET_GNU_LD
 #define USE_HIDDEN_LINKONCE 0
 #endif
+
+#define MD_UNWIND_SUPPORT "config/i386/sol2-unwind.h"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e90296512ad..bad39bb69c8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -86,6 +86,9 @@
 
 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
 
+;; Mapping of the max integer size for xop rotate immediate constraint
+(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
+
 ;; Mapping of vector modes back to the scalar modes
 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
 				 (V16QI "QI") (V8HI "HI")
@@ -1455,7 +1458,8 @@
 	(match_operator:SSEMODEF4 3 "sse_comparison_operator"
 		[(match_operand:SSEMODEF4 1 "register_operand" "0")
 		 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
-  "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
+  "!TARGET_XOP 
+  && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
   "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -5614,7 +5618,7 @@
 		   (match_operand:V4SI 2 "register_operand" "")))]
   "TARGET_SSE2"
 {
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE4_1 || TARGET_XOP)
     ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
 })
 
@@ -5643,7 +5647,7 @@
   [(set (match_operand:V4SI 0 "register_operand" "")
 	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
 		   (match_operand:V4SI 2 "register_operand" "")))]
-  "TARGET_SSE2 && !TARGET_SSE4_1
+  "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP
    && can_create_pseudo_p ()"
   "#"
   "&& 1"
@@ -5705,6 +5709,42 @@
   rtx t1, t2, t3, t4, t5, t6, thirtytwo;
   rtx op0, op1, op2;
 
+  if (TARGET_XOP)
+    {
+      /* op1: A,B,C,D, op2: E,F,G,H */
+      op0 = operands[0];
+      op1 = gen_lowpart (V4SImode, operands[1]);
+      op2 = gen_lowpart (V4SImode, operands[2]);
+      t1 = gen_reg_rtx (V4SImode);
+      t2 = gen_reg_rtx (V4SImode);
+      t3 = gen_reg_rtx (V4SImode);
+      t4 = gen_reg_rtx (V2DImode);
+      t5 = gen_reg_rtx (V2DImode);
+
+      /* t1: B,A,D,C */
+      emit_insn (gen_sse2_pshufd_1 (t1, op1,
+				    GEN_INT (1),
+				    GEN_INT (0),
+				    GEN_INT (3),
+				    GEN_INT (2)));
+
+      /* t2: 0 */
+      emit_move_insn (t2, CONST0_RTX (V4SImode));
+
+      /* t3: (B*E),(A*F),(D*G),(C*H) */
+      emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2));
+
+      /* t4: (B*E)+(A*F), (D*G)+(C*H) */
+      emit_insn (gen_xop_phadddq (t4, t3));
+
+      /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
+      emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
+
+      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
+      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5));
+      DONE;
+    }
+
   op0 = operands[0];
   op1 = operands[1];
   op2 = operands[2];
@@ -5820,6 +5860,56 @@
   DONE;
 })
 
+(define_expand "vec_widen_smult_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx t1, t2;
+
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx t1, t2;
+
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+  DONE;
+})
+
 (define_expand "vec_widen_umult_hi_v4si"
   [(match_operand:V2DI 0 "register_operand" "")
    (match_operand:V4SI 1 "register_operand" "")
@@ -6217,7 +6307,7 @@
 	(eq:SSEMODE124
 	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
 	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !TARGET_XOP "
   "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
 
 (define_insn "*avx_eq<mode>3"
@@ -6240,7 +6330,7 @@
 	(eq:SSEMODE124
 	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
 	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
+  "TARGET_SSE2 && !TARGET_XOP
    && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
   "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
@@ -6286,7 +6376,7 @@
 	(gt:SSEMODE124
 	  (match_operand:SSEMODE124 1 "register_operand" "0")
 	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !TARGET_XOP"
   "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
    (set_attr "prefix_data16" "1")
@@ -10364,6 +10454,1445 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; XOP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; XOP parallel integer multiply/add instructions.
+;; Note the instruction does not allow the value being added to be a memory
+;; operation.  However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;;	a[i] = b[i] * c[i] + d[i];
+(define_insn "xop_pmacsww"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+        (plus:V8HI
+	 (mult:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
+	 (match_operand:V8HI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
+  "@
+   vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; Split pmacsww with two memory operands into a load and the pmacsww.
+(define_split
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(plus:V8HI
+	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+		    (match_operand:V8HI 2 "nonimmediate_operand" ""))
+	 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
+  "TARGET_XOP
+   && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
+   && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
+   && !reg_mentioned_p (operands[0], operands[1])
+   && !reg_mentioned_p (operands[0], operands[2])
+   && !reg_mentioned_p (operands[0], operands[3])"
+  [(const_int 0)]
+{
+  ix86_expand_fma4_multiple_memory (operands, 4, V8HImode);
+  emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
+			      operands[3]));
+  DONE;
+})
+
+(define_insn "xop_pmacssww"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+        (ss_plus:V8HI
+	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+		    (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
+	 (match_operand:V8HI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; Note the instruction does not allow the value being added to be a memory
+;; operation.  However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;;	a[i] = b[i] * c[i] + d[i];
+(define_insn "xop_pmacsdd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+        (plus:V4SI
+	 (mult:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
+  "@
+   vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; Split pmacsdd with two memory operands into a load and the pmacsdd.
+(define_split
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(plus:V4SI
+	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
+		    (match_operand:V4SI 2 "nonimmediate_operand" ""))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
+  "TARGET_XOP
+   && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
+   && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
+   && !reg_mentioned_p (operands[0], operands[1])
+   && !reg_mentioned_p (operands[0], operands[2])
+   && !reg_mentioned_p (operands[0], operands[3])"
+  [(const_int 0)]
+{
+  ix86_expand_fma4_multiple_memory (operands, 4, V4SImode);
+  emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
+			      operands[3]));
+  DONE;
+})
+
+(define_insn "xop_pmacssdd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+        (ss_plus:V4SI
+	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+		    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdql"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(ss_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (vec_select:V2SI
+	   (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	   (parallel [(const_int 1)
+		      (const_int 3)])))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdqh"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(ss_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdql"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "*xop_pmacsdql_mem"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add.  In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as operands 1/2
+(define_insn_and_split "xop_mulv2div2di3_low"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (parallel [(const_int 1)
+			 (const_int 3)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 1)
+			 (const_int 3)])))))]
+  "TARGET_XOP"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_dup 0)))]
+{
+  operands[3] = CONST0_RTX (V2DImode);
+}
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdqh"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "*xop_pmacsdqh_mem"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add.  In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as either operands[1] or operands[2]
+(define_insn_and_split "xop_mulv2div2di3_high"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (parallel [(const_int 0)
+			 (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0)
+			 (const_int 2)])))))]
+  "TARGET_XOP"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_dup 0)))]
+{
+  operands[3] = CONST0_RTX (V2DImode);
+}
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel integer multiply/add instructions for the intrinisics
+(define_insn "xop_pmacsswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(ss_plus:V4SI
+	 (mult:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)]))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(plus:V4SI
+	 (mult:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)]))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcsswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(ss_plus:V4SI
+	 (plus:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)]))))
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 2)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)]))))
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 2)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel XMM conditional moves
+(define_insn "xop_pcmov_<mode>"
+  [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
+	(if_then_else:SSEMODE
+	  (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,xm")
+	  (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,x")
+	  (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "@
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
+(define_insn "xop_pcmov_<mode>256"
+  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
+	(if_then_else:AVX256MODE
+	  (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,xm")
+	  (match_operand:AVX256MODE 1 "vector_move_operand" "x,xm,x")
+	  (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "@
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
+;; XOP horizontal add/subtract instructions
+(define_insn "xop_phaddbw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(plus:V8HI
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphaddbw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)
+		       (const_int 8)
+		       (const_int 12)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)
+		       (const_int 9)
+		       (const_int 13)]))))
+	 (plus:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)
+		       (const_int 10)
+		       (const_int 14)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)
+		       (const_int 11)
+		       (const_int 15)]))))))]
+  "TARGET_XOP"
+  "vphaddbd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 4)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 5)]))))
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 2)
+			(const_int 6)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 3)
+			(const_int 7)])))))
+	 (plus:V2DI
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 8)
+			(const_int 12)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 9)
+			(const_int 13)]))))
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 10)
+			(const_int 14)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 11)
+			(const_int 15)])))))))]
+  "TARGET_XOP"
+  "vphaddbq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphaddwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)]))))
+	 (plus:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)]))))))]
+  "TARGET_XOP"
+  "vphaddwq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadddq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphadddq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(plus:V8HI
+	 (zero_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (zero_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphaddubw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)
+		       (const_int 8)
+		       (const_int 12)])))
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)
+		       (const_int 9)
+		       (const_int 13)]))))
+	 (plus:V4SI
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)
+		       (const_int 10)
+		       (const_int 14)])))
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)
+		       (const_int 11)
+		       (const_int 15)]))))))]
+  "TARGET_XOP"
+  "vphaddubd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 4)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 5)]))))
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 2)
+			(const_int 6)])))
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 3)
+			(const_int 7)])))))
+	 (plus:V2DI
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 8)
+			(const_int 12)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 9)
+			(const_int 13)]))))
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 10)
+			(const_int 14)])))
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 11)
+			(const_int 15)])))))))]
+  "TARGET_XOP"
+  "vphaddubq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (zero_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (zero_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphadduwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)])))
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)]))))
+	 (plus:V2DI
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)])))
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)]))))))]
+  "TARGET_XOP"
+  "vphadduwq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddudq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (zero_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (zero_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphaddudq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubbw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(minus:V8HI
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphsubbw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(minus:V4SI
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphsubwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(minus:V2DI
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphsubdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+;; XOP permute instructions
+(define_insn "xop_pperm"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,xm")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,x")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
+	  UNSPEC_XOP_PERMUTE))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP pack instructions that combine two vectors into a smaller vector
+(define_insn "xop_pperm_pack_v2di_v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(vec_concat:V4SI
+	 (truncate:V2SI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,xm"))
+	 (truncate:V2SI
+	  (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v4si_v8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+	(vec_concat:V8HI
+	 (truncate:V4HI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "x,x,xm"))
+	 (truncate:V4HI
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v8hi_v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+	(vec_concat:V16QI
+	 (truncate:V8QI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "x,x,xm"))
+	 (truncate:V8QI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP packed rotate instructions
+(define_expand "rotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+	(rotate:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+        {
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+      DONE;
+    }
+})
+
+(define_expand "rotr<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+	(rotatert:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx neg = gen_reg_rtx (<MODE>mode);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+        {
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_neg<mode>2 (neg, reg));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
+      DONE;
+    }
+})
+
+(define_insn "xop_rotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(rotate:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+  "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_rotr<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(rotatert:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+{
+  operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+  return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "vrotr<mode>3"
+  [(match_operand:SSEMODE1248 0 "register_operand" "")
+   (match_operand:SSEMODE1248 1 "register_operand" "")
+   (match_operand:SSEMODE1248 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx reg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (reg, operands[2]));
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "vrotl<mode>3"
+  [(match_operand:SSEMODE1248 0 "register_operand" "")
+   (match_operand:SSEMODE1248 1 "register_operand" "")
+   (match_operand:SSEMODE1248 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "xop_vrotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+	  (const_int 0))
+	 (rotate:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+	  (match_dup 2))
+	 (rotatert:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+;; XOP packed shift instructions.
+;; FIXME: add V2DI back in
+(define_expand "vlshr<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vashr<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vashl<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "xop_ashl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+	  (const_int 0))
+	 (ashift:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+	  (match_dup 2))
+	 (ashiftrt:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_lshl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+	  (const_int 0))
+	 (ashift:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+	  (match_dup 2))
+	 (lshiftrt:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+;; SSE2 doesn't have some shift varients, so define versions for XOP
+(define_expand "ashlv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = operands[2];
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+  emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "lshlv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = operands[2];
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+  emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "ashrv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  rtx ele = ((CONST_INT_P (operands[2]))
+	     ? GEN_INT (- INTVAL (operands[2]))
+	     : operands[2]);
+
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = ele;
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+
+  if (!CONST_INT_P (operands[2]))
+    {
+      rtx neg = gen_reg_rtx (V16QImode);
+      emit_insn (gen_negv16qi2 (neg, reg));
+      emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+
+  DONE;
+})
+
+(define_expand "ashrv2di3"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V2DI 1 "register_operand" "")
+   (match_operand:DI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (2);
+  rtx par = gen_rtx_PARALLEL (V2DImode, vs);
+  rtx reg = gen_reg_rtx (V2DImode);
+  rtx ele;
+
+  if (CONST_INT_P (operands[2]))
+    ele = GEN_INT (- INTVAL (operands[2]));
+  else if (GET_MODE (operands[2]) != DImode)
+    {
+      rtx move = gen_reg_rtx (DImode);
+      ele = gen_reg_rtx (DImode);
+      convert_move (move, operands[2], false);
+      emit_insn (gen_negdi2 (ele, move));
+    }
+  else
+    {
+      ele = gen_reg_rtx (DImode);
+      emit_insn (gen_negdi2 (ele, operands[2]));
+    }
+
+  RTVEC_ELT (vs, 0) = ele;
+  RTVEC_ELT (vs, 1) = ele;
+  emit_insn (gen_vec_initv2di (reg, par));
+  emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+;; XOP FRCZ support
+;; parallel insns
+(define_insn "xop_frcz<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
+	 UNSPEC_FRCZ))]
+  "TARGET_XOP"
+  "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+;; scalar insns
+(define_insn "xop_vmfrcz<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	   [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+	   UNSPEC_FRCZ)
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (const_int 1)))]
+  "TARGET_XOP"
+  "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_frcz<mode>2256"
+  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
+	(unspec:FMA4MODEF4
+	 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
+	 UNSPEC_FRCZ))]
+  "TARGET_XOP"
+  "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_maskcmp<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
+	 [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	  (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_maskcmp_uns<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+	 [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	  (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
+;; and pcomneu* not to be converted to the signed ones in case somebody needs
+;; the exact instruction generated for the intrinsic.
+(define_insn "xop_maskcmp_uns2<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(unspec:SSEMODE1248
+	 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+	  [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	   (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
+	 UNSPEC_XOP_UNSIGNED_CMP))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
+;; being added here to be complete.
+(define_insn "xop_pcom_tf<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(unspec:SSEMODE1248
+	  [(match_operand:SSEMODE1248 1 "register_operand" "x")
+	   (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_int_operand" "n")]
+	  UNSPEC_XOP_TRUEFALSE))]
+  "TARGET_XOP"
+{
+  return ((INTVAL (operands[3]) != 0)
+	  ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+	  : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (define_insn "*avx_aesenc"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
diff --git a/gcc/config/i386/winnt-cxx.c b/gcc/config/i386/winnt-cxx.c
index 9df7cf645bb..48518adc765 100644
--- a/gcc/config/i386/winnt-cxx.c
+++ b/gcc/config/i386/winnt-cxx.c
@@ -1,7 +1,6 @@
 /* Target support for C++ classes on Windows.
    Contributed by Danny Smith (dannysmith@users.sourceforge.net)
-   Copyright (C) 2005, 2007
-   Free Software Foundation, Inc.
+   Copyright (C) 2005, 2007, 2009  Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -28,7 +27,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "hard-reg-set.h"
 #include "output.h"
 #include "tree.h"
-#include "cp/cp-tree.h" /* this is why we're a separate module */
+#include "cp/cp-tree.h" /* This is why we're a separate module.  */
 #include "flags.h"
 #include "tm_p.h"
 #include "toplev.h"
@@ -52,49 +51,44 @@ i386_pe_type_dllimport_p (tree decl)
 	  || DECL_TEMPLATE_INSTANTIATION (decl)
 	  || DECL_ARTIFICIAL (decl)))
     return false;
-
-
-  /* Don't mark defined functions as dllimport.  This code will only be
-     reached if we see a non-inline function defined out-of-class.  */
-  else if (TREE_CODE (decl) ==  FUNCTION_DECL
-	   && (DECL_INITIAL (decl)))
-    return false;
-
-  /*  Don't allow definitions of static data members in dllimport class,
-      If vtable data is marked as DECL_EXTERNAL, import it; otherwise just
-      ignore the class attribute.  */
-  else if (TREE_CODE (decl) == VAR_DECL
-	   && TREE_STATIC (decl) && TREE_PUBLIC (decl)
-	   && !DECL_EXTERNAL (decl))
-    {
-      if (!DECL_VIRTUAL_P (decl))
-	  error ("definition of static data member %q+D of "
-	         "dllimport'd class", decl);
-      return false;
-    }
-
+  
+  /* Overrides of the class dllimport decls by out-of-class definitions are 
+     handled by tree.c:merge_dllimport_decl_attributes.   */
   return true;
 }
 
-
 bool
 i386_pe_type_dllexport_p (tree decl)
 {
-   gcc_assert (TREE_CODE (decl) == VAR_DECL 
-               || TREE_CODE (decl) == FUNCTION_DECL);
-   /* Avoid exporting compiler-generated default dtors and copy ctors.
-      The only artificial methods that need to be exported are virtual
-      and non-virtual thunks.  */
-   if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE
-       && DECL_ARTIFICIAL (decl) && !DECL_THUNK_P (decl))
-     return false;
-   return true;
+  gcc_assert (TREE_CODE (decl) == VAR_DECL 
+              || TREE_CODE (decl) == FUNCTION_DECL);
+
+  /* Avoid exporting compiler-generated default dtors and copy ctors.
+     The only artificial methods that need to be exported are virtual
+     and non-virtual thunks.  */
+  if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE
+      && DECL_ARTIFICIAL (decl) && !DECL_THUNK_P (decl))
+    return false;
+  return true;
 }
 
 static inline void maybe_add_dllimport (tree decl) 
 {
   if (i386_pe_type_dllimport_p (decl))
-    DECL_DLLIMPORT_P (decl) = 1;   
+    DECL_DLLIMPORT_P (decl) = 1;
+}
+
+static inline void maybe_add_dllexport (tree decl) 
+{
+  if (i386_pe_type_dllexport_p (decl))
+    {   
+      tree decl_attrs = DECL_ATTRIBUTES (decl);
+      if (lookup_attribute ("dllexport", decl_attrs) != NULL_TREE)
+	/* Already done.  */
+	return;
+      DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("dllexport"),
+					  NULL_TREE, decl_attrs);
+    }
 }
 
 void
@@ -103,41 +97,69 @@ i386_pe_adjust_class_at_definition (tree t)
   tree member;
 
   gcc_assert (CLASS_TYPE_P (t));
+ 
+ 
+  if (lookup_attribute ("dllexport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
+    {
+      /* Check static VAR_DECL's.  */
+      for (member = TYPE_FIELDS (t); member; member = TREE_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL)     
+	  maybe_add_dllexport (member);
+    
+      /* Check FUNCTION_DECL's.  */
+      for (member = TYPE_METHODS (t); member;  member = TREE_CHAIN (member))
+	if (TREE_CODE (member) == FUNCTION_DECL)
+	  {
+	    tree thunk;
+	    maybe_add_dllexport (member);
+	  
+	    /* Also add the attribute to its thunks.  */
+	    for (thunk = DECL_THUNKS (member); thunk;
+		 thunk = TREE_CHAIN (thunk))
+	      maybe_add_dllexport (thunk);
+	}
+      /* Check vtables  */
+      for (member = CLASSTYPE_VTABLES (t); member;  member = TREE_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL) 
+	  maybe_add_dllexport (member);
+    }
 
- /* We only look at dllimport.  The only thing that dllexport does is
-    add stuff to a '.drectiv' section at end-of-file, so no need to do
-    anything for dllexport'd classes until we generate RTL. */  
-  if (lookup_attribute ("dllimport", TYPE_ATTRIBUTES (t)) == NULL_TREE)
-    return;
-
-  /* We don't actually add the attribute to the decl, just set the flag
-     that signals that the address of this symbol is not a compile-time
-     constant.   Any subsequent out-of-class declaration of members wil
-     cause the DECL_DLLIMPORT_P flag to be unset.
-     (See  tree.c: merge_dllimport_decl_attributes).
-     That is just right since out-of class declarations can only be a
-     definition.  We recheck the class members  at RTL generation to
-     emit warnings if this has happened.  Definition of static data member
-     of dllimport'd class always causes an error (as per MS compiler).
-  */
-
-  /* Check static VAR_DECL's.  */
-  for (member = TYPE_FIELDS (t); member; member = TREE_CHAIN (member))
-    if (TREE_CODE (member) == VAR_DECL)     
-      maybe_add_dllimport (member);
+  else if (lookup_attribute ("dllimport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
+    {
+      /* We don't actually add the attribute to the decl, just set the flag
+	 that signals that the address of this symbol is not a compile-time
+	 constant.   Any subsequent out-of-class declaration of members wil
+	 cause the DECL_DLLIMPORT_P flag to be unset.
+	 (See  tree.c: merge_dllimport_decl_attributes).
+	 That is just right since out-of class declarations can only be a
+	 definition.   */
+
+      /* Check static VAR_DECL's.  */
+      for (member = TYPE_FIELDS (t); member; member = TREE_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL)     
+	  maybe_add_dllimport (member);
     
-  /* Check FUNCTION_DECL's.  */
-  for (member = TYPE_METHODS (t); member;  member = TREE_CHAIN (member))
-    if (TREE_CODE (member) == FUNCTION_DECL)
-      maybe_add_dllimport (member);
+      /* Check FUNCTION_DECL's.  */
+      for (member = TYPE_METHODS (t); member;  member = TREE_CHAIN (member))
+	if (TREE_CODE (member) == FUNCTION_DECL)
+	  {
+	    tree thunk;
+	    maybe_add_dllimport (member);
+	  
+	    /* Also add the attribute to its thunks.  */
+	    for (thunk = DECL_THUNKS (member); thunk;
+		 thunk = TREE_CHAIN (thunk))
+	      maybe_add_dllimport (thunk);
+	 }
  
-  /* Check vtables  */
-  for (member = CLASSTYPE_VTABLES (t); member;  member = TREE_CHAIN (member))
-    if (TREE_CODE (member) == VAR_DECL) 
-      maybe_add_dllimport (member);
-
-/* We leave typeinfo tables alone.  We can't mark TI objects as
-     dllimport, since the address of a secondary VTT may be needed
-     for static initialization of a primary VTT.  VTT's  of
-     dllimport'd classes should always be link-once COMDAT.  */ 
+      /* Check vtables  */
+      for (member = CLASSTYPE_VTABLES (t); member;  member = TREE_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL) 
+	  maybe_add_dllimport (member);
+
+      /* We leave typeinfo tables alone.  We can't mark TI objects as
+	dllimport, since the address of a secondary VTT may be needed
+	for static initialization of a primary VTT.  VTT's  of
+	dllimport'd classes should always be link-once COMDAT.  */ 
+    }
 }
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index 7069c40846f..f8dcaa9673a 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -102,8 +102,6 @@ associated_type (tree decl)
 static bool
 i386_pe_determine_dllexport_p (tree decl)
 {
-  tree assoc;
-
   if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
     return false;
 
@@ -114,11 +112,6 @@ i386_pe_determine_dllexport_p (tree decl)
   if (lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)))
     return true;
 
-  /* Also mark class members of exported classes with dllexport.  */
-  assoc = associated_type (decl);
-  if (assoc && lookup_attribute ("dllexport", TYPE_ATTRIBUTES (assoc)))
-    return i386_pe_type_dllexport_p (decl);
-
   return false;
 }
 
@@ -132,18 +125,23 @@ i386_pe_determine_dllimport_p (tree decl)
   if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
     return false;
 
-  /* Lookup the attribute in addition to checking the DECL_DLLIMPORT_P flag.
-     We may need to override an earlier decision.  */
   if (DECL_DLLIMPORT_P (decl))
     return true;
 
   /* The DECL_DLLIMPORT_P flag was set for decls in the class definition
      by  targetm.cxx.adjust_class_at_definition.  Check again to emit
-     warnings if the class attribute has been overridden by an
-     out-of-class definition.  */
+     error message if the class attribute has been overridden by an
+     out-of-class definition of static data.  */
   assoc = associated_type (decl);
-  if (assoc && lookup_attribute ("dllimport", TYPE_ATTRIBUTES (assoc)))
-    return i386_pe_type_dllimport_p (decl);
+  if (assoc && lookup_attribute ("dllimport", TYPE_ATTRIBUTES (assoc))
+      && TREE_CODE (decl) == VAR_DECL
+      && TREE_STATIC (decl) && TREE_PUBLIC (decl)
+      && !DECL_EXTERNAL (decl)
+      /* vtable's are linkonce constants, so defining a vtable is not
+	 an error as long as we don't try to import it too.  */
+      && !DECL_VIRTUAL_P (decl))
+	error ("definition of static data member %q+D of "
+	       "dllimport'd class", decl);
 
   return false;
 }
@@ -308,17 +306,8 @@ i386_pe_encode_section_info (tree decl, rtx rtl, int first)
   if (i386_pe_determine_dllexport_p (decl))
     flags |= SYMBOL_FLAG_DLLEXPORT;
   else if (i386_pe_determine_dllimport_p (decl))
-    {
-      flags |= SYMBOL_FLAG_DLLIMPORT;
-      /* If we went through the associated_type path, this won't already
-	 be set.  Though, frankly, this seems wrong, and should be fixed
-	 elsewhere.  */
-      if (!DECL_DLLIMPORT_P (decl))
-	{
-	  DECL_DLLIMPORT_P (decl) = 1;
-	  flags &= ~SYMBOL_FLAG_LOCAL;
-	}
-    }
+    flags |= SYMBOL_FLAG_DLLIMPORT;
+ 
   SYMBOL_REF_FLAGS (symbol) = flags;
 }
 
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 7bc47f8f15d..ac7e21fd6f7 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -54,10 +54,6 @@
 #include <smmintrin.h>
 #endif
 
-#ifdef __FMA4__
-#include <fma4intrin.h>
-#endif
-
 #if defined (__AES__) || defined (__PCLMUL__)
 #include <wmmintrin.h>
 #endif
@@ -69,4 +65,16 @@
 #include <mm3dnow.h>
 #endif
 
+#ifdef __FMA4__
+#include <fma4intrin.h>
+#endif
+
+#ifdef __XOP__
+#include <xopintrin.h>
+#endif
+
+#ifdef __LWP__
+#include <lwpintrin.h>
+#endif
+
 #endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h
new file mode 100644
index 00000000000..803417a6a45
--- /dev/null
+++ b/gcc/config/i386/xopintrin.h
@@ -0,0 +1,771 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _XOPMMINTRIN_H_INCLUDED
+#define _XOPMMINTRIN_H_INCLUDED
+
+#ifndef __XOP__
+# error "XOP instruction set not enabled"
+#else
+
+#include <fma4intrin.h>
+
+/* Integer multiply/add intructions. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+/* Packed Integer Horizontal Add and Subtract */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
+}
+
+/* Vector conditional move and permute */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
+/* Packed Integer Rotates and Shifts
+   Rotates - Non-Immediate form */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi64(__m128i __A,  __m128i __B)
+{
+  return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Rotates - Immediate form */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi8(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi16(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi32(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi64(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
+}
+#else
+#define _mm_roti_epi8(A, N) \
+  ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+  ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+  ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+  ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
+#endif
+
+/* Shifts */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
+}
+
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Compare and Predicate Generation
+   pcom (integer, unsinged bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, unsinged words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, unsinged double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, unsinged quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
+}
+
+/*pcom (integer, signed bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, signed words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, signed double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, signed quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
+}
+
+/* FRCZ */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_pd (__m128d __A)
+{
+  return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
+}
+
+#endif /* __XOP__ */
+
+#endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 75c8f0ee6c4..8460475bddb 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -303,6 +303,7 @@ static enum machine_mode ia64_promote_function_mode (const_tree,
 						     const_tree,
 						     int);
 static void ia64_trampoline_init (rtx, tree, rtx);
+static void ia64_override_options_after_change (void);
 
 /* Table of valid machine attributes.  */
 static const struct attribute_spec ia64_attribute_table[] =
@@ -536,6 +537,9 @@ static const struct attribute_spec ia64_attribute_table[] =
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
 
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 typedef enum
@@ -5496,6 +5500,33 @@ ia64_override_options (void)
   if (TARGET_AUTO_PIC)
     target_flags |= MASK_CONST_GP;
 
+  /* Numerous experiment shows that IRA based loop pressure
+     calculation works better for RTL loop invariant motion on targets
+     with enough (>= 32) registers.  It is an expensive optimization.
+     So it is on only for peak performance.  */
+  if (optimize >= 3)
+    flag_ira_loop_pressure = 1;
+
+
+  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
+
+  init_machine_status = ia64_init_machine_status;
+
+  if (align_functions <= 0)
+    align_functions = 64;
+  if (align_loops <= 0)
+    align_loops = 32;
+  if (TARGET_ABI_OPEN_VMS)
+    flag_no_common = 1;
+
+  ia64_override_options_after_change();
+}
+
+/* Implement targetm.override_options_after_change.  */
+
+static void
+ia64_override_options_after_change (void)
+{
   ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
   flag_schedule_insns_after_reload = 0;
 
@@ -5517,18 +5548,6 @@ ia64_override_options (void)
          a transformation.  */
       flag_auto_inc_dec = 0;
     }
-
-  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
-
-  init_machine_status = ia64_init_machine_status;
-
-  if (align_functions <= 0)
-    align_functions = 64;
-  if (align_loops <= 0)
-    align_loops = 32;
-
-  if (TARGET_ABI_OPEN_VMS)
-    flag_no_common = 1;
 }
 
 /* Initialize the record of emitted frame related registers.  */
diff --git a/gcc/config/m32c/m32c-protos.h b/gcc/config/m32c/m32c-protos.h
index e571fe9d25e..42b92feb506 100644
--- a/gcc/config/m32c/m32c-protos.h
+++ b/gcc/config/m32c/m32c-protos.h
@@ -49,7 +49,6 @@ int  m32c_trampoline_size (void);
 #if defined(RTX_CODE) && defined(TREE_CODE)
 
 rtx  m32c_function_arg (CUMULATIVE_ARGS *, MM, tree, int);
-rtx  m32c_function_value (const_tree, const_tree);
 
 #endif
 
@@ -75,7 +74,7 @@ bool m32c_immd_dbl_mov (rtx *, MM);
 rtx  m32c_incoming_return_addr_rtx (void);
 int  m32c_legitimate_constant_p (rtx);
 int  m32c_legitimize_reload_address (rtx *, MM, int, int, int);
-rtx  m32c_libcall_value (MM);
+bool m32c_function_value_regno_p (const unsigned int);
 int  m32c_limit_reload_class (MM, int);
 int  m32c_memory_move_cost (MM, int, int);
 int  m32c_modes_tieable_p (MM, MM);
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c
index 4eeedb183e7..1085aa7c25a 100644
--- a/gcc/config/m32c/m32c.c
+++ b/gcc/config/m32c/m32c.c
@@ -81,6 +81,9 @@ static bool m32c_strict_argument_naming (CUMULATIVE_ARGS *);
 static rtx m32c_struct_value_rtx (tree, int);
 static rtx m32c_subreg (enum machine_mode, rtx, enum machine_mode, int);
 static int need_to_save (int);
+static rtx m32c_function_value (const_tree, const_tree, bool);
+static rtx m32c_libcall_value (enum machine_mode, const_rtx);
+
 int current_function_special_page_vector (rtx);
 
 #define SYMBOL_FLAG_FUNCVEC_FUNCTION    (SYMBOL_FLAG_MACH_DEP << 0)
@@ -1591,15 +1594,19 @@ m32c_valid_pointer_mode (enum machine_mode mode)
 
 /* How Scalar Function Values Are Returned */
 
-/* Implements LIBCALL_VALUE.  Most values are returned in $r0, or some
+/* Implements TARGET_LIBCALL_VALUE.  Most values are returned in $r0, or some
    combination of registers starting there (r2r0 for longs, r3r1r2r0
    for long long, r3r2r1r0 for doubles), except that that ABI
    currently doesn't work because it ends up using all available
    general registers and gcc often can't compile it.  So, instead, we
    return anything bigger than 16 bits in "mem0" (effectively, a
    memory location).  */
-rtx
-m32c_libcall_value (enum machine_mode mode)
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE m32c_libcall_value
+
+static rtx
+m32c_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
 {
   /* return reg or parallel */
 #if 0
@@ -1649,14 +1656,28 @@ m32c_libcall_value (enum machine_mode mode)
   return gen_rtx_REG (mode, R0_REGNO);
 }
 
-/* Implements FUNCTION_VALUE.  Functions and libcalls have the same
+/* Implements TARGET_FUNCTION_VALUE.  Functions and libcalls have the same
    conventions.  */
-rtx
-m32c_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE m32c_function_value
+
+static rtx
+m32c_function_value (const_tree valtype,
+		     const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		     bool outgoing ATTRIBUTE_UNUSED)
 {
   /* return reg or parallel */
   const enum machine_mode mode = TYPE_MODE (valtype);
-  return m32c_libcall_value (mode);
+  return m32c_libcall_value (mode, NULL_RTX);
+}
+
+/* Implements FUNCTION_VALUE_REGNO_P.  */
+
+bool
+m32c_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == R0_REGNO || regno == MEM0_REGNO);
 }
 
 /* How Large Values Are Returned */
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
index 0f12158c0e4..78e3115c291 100644
--- a/gcc/config/m32c/m32c.h
+++ b/gcc/config/m32c/m32c.h
@@ -533,10 +533,7 @@ typedef struct m32c_cumulative_args
 
 /* How Scalar Function Values Are Returned */
 
-#define FUNCTION_VALUE(VT,F) m32c_function_value (VT, F)
-#define LIBCALL_VALUE(MODE) m32c_libcall_value (MODE)
-
-#define FUNCTION_VALUE_REGNO_P(r) ((r) == R0_REGNO || (r) == MEM0_REGNO)
+#define FUNCTION_VALUE_REGNO_P(r) m32c_function_value_regno_p (r)
 
 /* How Large Values Are Returned */
 
@@ -563,7 +560,6 @@ typedef struct m32c_cumulative_args
 
 #define HAVE_PRE_DECREMENT 1
 #define HAVE_POST_INCREMENT 1
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
 #define MAX_REGS_PER_ADDRESS 1
 
 /* This is passed to the macros below, so that they can be implemented
diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h
index ee0f9f67fca..278ba15c4fa 100644
--- a/gcc/config/m68hc11/m68hc11.h
+++ b/gcc/config/m68hc11/m68hc11.h
@@ -1108,9 +1108,6 @@ extern unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER];
      && (GET_CODE (XEXP (operand, 0)) == POST_INC) \
      && (SP_REG_P (XEXP (XEXP (operand, 0), 0))))
 
-/* 1 if X is an rtx for a constant that is a valid address.  */
-#define CONSTANT_ADDRESS_P(X)	(CONSTANT_P (X))
-
 /* Maximum number of registers that can appear in a valid memory address */
 #define MAX_REGS_PER_ADDRESS	2
 
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
index 0862936b1b4..8db98fc4f46 100644
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -1399,6 +1399,30 @@ flags_in_68881 (void)
   return cc_status.flags & CC_IN_68881;
 }
 
+/* Return true if PARALLEL contains register REGNO.  */
+static bool
+m68k_reg_present_p (const_rtx parallel, unsigned int regno)
+{
+  int i;
+
+  if (REG_P (parallel) && REGNO (parallel) == regno)
+    return true;
+
+  if (GET_CODE (parallel) != PARALLEL)
+    return false;
+
+  for (i = 0; i < XVECLEN (parallel, 0); ++i)
+    {
+      const_rtx x;
+
+      x = XEXP (XVECEXP (parallel, 0, i), 0);
+      if (REG_P (x) && REGNO (x) == regno)
+	return true;
+    }
+
+  return false;
+}
+
 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL_P.  */
 
 static bool
@@ -1411,6 +1435,26 @@ m68k_ok_for_sibcall_p (tree decl, tree exp)
   if (CALL_EXPR_STATIC_CHAIN (exp))
     return false;
 
+  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Check that the return value locations are the same.  For
+	 example that we aren't returning a value from the sibling in
+	 a D0 register but then need to transfer it to a A0 register.  */
+      rtx cfun_value;
+      rtx call_value;
+
+      cfun_value = FUNCTION_VALUE (TREE_TYPE (DECL_RESULT (cfun->decl)),
+				   cfun->decl);
+      call_value = FUNCTION_VALUE (TREE_TYPE (exp), decl);
+
+      /* Check that the values are equal or that the result the callee
+	 function returns is superset of what the current function returns.  */
+      if (!(rtx_equal_p (cfun_value, call_value)
+	    || (REG_P (cfun_value)
+		&& m68k_reg_present_p (call_value, REGNO (cfun_value)))))
+	return false;
+    }
+
   kind = m68k_get_function_kind (current_function_decl);
   if (kind == m68k_fk_normal_function)
     /* We can always sibcall from a normal function, because it's
@@ -5188,6 +5232,9 @@ m68k_libcall_value (enum machine_mode mode)
   return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG);
 }
 
+/* Location in which function value is returned.
+   NOTE: Due to differences in ABIs, don't call this function directly,
+   use FUNCTION_VALUE instead.  */
 rtx
 m68k_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
 {
diff --git a/gcc/config/mep/mep.h b/gcc/config/mep/mep.h
index 8b00a444ce2..9d286e33b94 100644
--- a/gcc/config/mep/mep.h
+++ b/gcc/config/mep/mep.h
@@ -567,8 +567,6 @@ typedef struct
 #define TRAMPOLINE_SIZE 20
 
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 #ifdef REG_OK_STRICT
diff --git a/gcc/config/mips/iris.h b/gcc/config/mips/iris.h
index fce82174e66..373691ee6e1 100644
--- a/gcc/config/mips/iris.h
+++ b/gcc/config/mips/iris.h
@@ -63,9 +63,6 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_FINISH_DECLARE_OBJECT
 #define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object
 
-/* The linker needs a space after "-o".  */
-#define SWITCHES_NEED_SPACES "o"
-
 /* Specify wchar_t types.  */
 #undef WCHAR_TYPE
 #define WCHAR_TYPE (Pmode == DImode ? "int" : "long int")
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index abcc2d421ef..716b7acad82 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -343,5 +343,7 @@ extern void mips_expand_vector_init (rtx, rtx);
 extern bool mips_eh_uses (unsigned int);
 extern bool mips_epilogue_uses (unsigned int);
 extern void mips_final_prescan_insn (rtx, rtx *, int);
+extern int mips_trampoline_code_size (void);
+extern void mips_function_profiler (FILE *);
 
 #endif /* ! GCC_MIPS_PROTOS_H */
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 1bead599411..c8c1dca25ce 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -126,6 +126,40 @@ along with GCC; see the file COPYING3.  If not see
 /* True if bit BIT is set in VALUE.  */
 #define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
 
+/* Return the opcode for a ptr_mode load of the form:
+
+       l[wd]    DEST, OFFSET(BASE).  */
+#define MIPS_LOAD_PTR(DEST, OFFSET, BASE)	\
+  (((ptr_mode == DImode ? 0x37 : 0x23) << 26)	\
+   | ((BASE) << 21)				\
+   | ((DEST) << 16)				\
+   | (OFFSET))
+
+/* Return the opcode to move register SRC into register DEST.  */
+#define MIPS_MOVE(DEST, SRC)		\
+  ((TARGET_64BIT ? 0x2d : 0x21)		\
+   | ((DEST) << 11)			\
+   | ((SRC) << 21))
+
+/* Return the opcode for:
+
+       lui      DEST, VALUE.  */
+#define MIPS_LUI(DEST, VALUE) \
+  ((0xf << 26) | ((DEST) << 16) | (VALUE))
+
+/* Return the opcode to jump to register DEST.  */
+#define MIPS_JR(DEST) \
+  (((DEST) << 21) | 0x8)
+
+/* Return the opcode for:
+
+       bal     . + (1 + OFFSET) * 4.  */
+#define MIPS_BAL(OFFSET) \
+  ((0x1 << 26) | (0x11 << 16) | (OFFSET))
+
+/* Return the usual opcode for a nop.  */
+#define MIPS_NOP 0
+
 /* Classifies an address.
 
    ADDRESS_REG
@@ -285,6 +319,9 @@ struct GTY(())  mips_frame_info {
   HOST_WIDE_INT acc_sp_offset;
   HOST_WIDE_INT cop0_sp_offset;
 
+  /* Similar, but the value passed to _mcount.  */
+  HOST_WIDE_INT ra_fp_offset;
+
   /* The offset of arg_pointer_rtx from the bottom of the frame.  */
   HOST_WIDE_INT arg_pointer_offset;
 
@@ -2410,6 +2447,28 @@ mips_emit_move (rtx dest, rtx src)
 	  : emit_move_insn_1 (dest, src));
 }
 
+/* Emit an instruction of the form (set TARGET (CODE OP0)).  */
+
+static void
+mips_emit_unary (enum rtx_code code, rtx target, rtx op0)
+{
+  emit_insn (gen_rtx_SET (VOIDmode, target,
+			  gen_rtx_fmt_e (code, GET_MODE (op0), op0)));
+}
+
+/* Compute (CODE OP0) and store the result in a new register of mode MODE.
+   Return that new register.  */
+
+static rtx
+mips_force_unary (enum machine_mode mode, enum rtx_code code, rtx op0)
+{
+  rtx reg;
+
+  reg = gen_reg_rtx (mode);
+  mips_emit_unary (code, reg, op0);
+  return reg;
+}
+
 /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)).  */
 
 static void
@@ -3353,10 +3412,11 @@ mips_immediate_operand_p (int code, HOST_WIDE_INT x)
 
 /* Return the cost of binary operation X, given that the instruction
    sequence for a word-sized or smaller operation has cost SINGLE_COST
-   and that the sequence of a double-word operation has cost DOUBLE_COST.  */
+   and that the sequence of a double-word operation has cost DOUBLE_COST.
+   If SPEED is true, optimize for speed otherwise optimize for size.  */
 
 static int
-mips_binary_cost (rtx x, int single_cost, int double_cost)
+mips_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
 {
   int cost;
 
@@ -3365,8 +3425,8 @@ mips_binary_cost (rtx x, int single_cost, int double_cost)
   else
     cost = single_cost;
   return (cost
-	  + rtx_cost (XEXP (x, 0), SET, !optimize_size)
-	  + rtx_cost (XEXP (x, 1), GET_CODE (x), !optimize_size));
+	  + rtx_cost (XEXP (x, 0), SET, speed)
+	  + rtx_cost (XEXP (x, 1), GET_CODE (x), speed));
 }
 
 /* Return the cost of floating-point multiplications of mode MODE.  */
@@ -3436,8 +3496,7 @@ mips_zero_extend_cost (enum machine_mode mode, rtx op)
 /* Implement TARGET_RTX_COSTS.  */
 
 static bool
-mips_rtx_costs (rtx x, int code, int outer_code, int *total,
-		bool speed)
+mips_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
 {
   enum machine_mode mode = GET_MODE (x);
   bool float_mode_p = FLOAT_MODE_P (mode);
@@ -3493,8 +3552,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total,
 	     operand needs to be forced into a register, we will often be
 	     able to hoist the constant load out of the loop, so the load
 	     should not contribute to the cost.  */
-	  if (!optimize_size
-	      || mips_immediate_operand_p (outer_code, INTVAL (x)))
+	  if (speed || mips_immediate_operand_p (outer_code, INTVAL (x)))
 	    {
 	      *total = 0;
 	      return true;
@@ -3592,7 +3650,8 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total,
     case IOR:
     case XOR:
       /* Double-word operations use two single-word operations.  */
-      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2));
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+				 speed);
       return true;
 
     case ASHIFT:
@@ -3601,9 +3660,11 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total,
     case ROTATE:
     case ROTATERT:
       if (CONSTANT_P (XEXP (x, 1)))
-	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4));
+	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				   speed);
       else
-	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12));
+	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12),
+				   speed);
       return true;
 
     case ABS:
@@ -3639,7 +3700,8 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total,
 	  *total = mips_cost->fp_add;
 	  return false;
 	}
-      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4));
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				 speed);
       return true;
 
     case MINUS:
@@ -3690,7 +3752,8 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total,
 	 an SLTU.  The MIPS16 version then needs to move the result of
 	 the SLTU from $24 to a MIPS16 register.  */
       *total = mips_binary_cost (x, COSTS_N_INSNS (1),
-				 COSTS_N_INSNS (TARGET_MIPS16 ? 5 : 4));
+				 COSTS_N_INSNS (TARGET_MIPS16 ? 5 : 4),
+				 speed);
       return true;
 
     case NEG:
@@ -3726,10 +3789,10 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total,
       else if (mode == DImode && !TARGET_64BIT)
 	/* Synthesized from 2 mulsi3s, 1 mulsidi3 and two additions,
 	   where the mulsidi3 always includes an MFHI and an MFLO.  */
-	*total = (optimize_size
-		  ? COSTS_N_INSNS (ISA_HAS_MUL3 ? 7 : 9)
-		  : mips_cost->int_mult_si * 3 + 6);
-      else if (optimize_size)
+	*total = (speed
+		  ? mips_cost->int_mult_si * 3 + 6
+		  : COSTS_N_INSNS (ISA_HAS_MUL3 ? 7 : 9));
+      else if (!speed)
 	*total = (ISA_HAS_MUL3 ? 1 : 2);
       else if (mode == DImode)
 	*total = mips_cost->int_mult_di;
@@ -3766,7 +3829,7 @@ mips_rtx_costs (rtx x, int code, int outer_code, int *total,
 
     case UDIV:
     case UMOD:
-      if (optimize_size)
+      if (!speed)
 	{
 	  /* It is our responsibility to make division by a power of 2
 	     as cheap as 2 register additions if we want the division
@@ -6243,7 +6306,7 @@ mips16_build_call_stub (rtx retval, rtx *fn_ptr, rtx args_size, int fp_code)
 	     The stub's caller knows that $18 might be clobbered, even though
 	     $18 is usually a call-saved register.  */
 	  fprintf (asm_out_file, "\tmove\t%s,%s\n",
-		   reg_names[GP_REG_FIRST + 18], reg_names[GP_REG_FIRST + 31]);
+		   reg_names[GP_REG_FIRST + 18], reg_names[RETURN_ADDR_REGNUM]);
 	  output_asm_insn (MIPS_CALL ("jal", &fn, 0, -1), &fn);
 
 	  /* Move the result from floating-point registers to
@@ -6651,7 +6714,14 @@ mips_expand_block_move (rtx dest, rtx src, rtx length)
 void
 mips_expand_synci_loop (rtx begin, rtx end)
 {
-  rtx inc, label, cmp, cmp_result;
+  rtx inc, label, end_label, cmp_result, mask, length;
+
+  /* Create end_label.  */
+  end_label = gen_label_rtx ();
+
+  /* Check if begin equals end.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, begin, end);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
 
   /* Load INC with the cache line size (rdhwr INC,$1).  */
   inc = gen_reg_rtx (Pmode);
@@ -6659,18 +6729,36 @@ mips_expand_synci_loop (rtx begin, rtx end)
 	     ? gen_rdhwr_synci_step_si (inc)
 	     : gen_rdhwr_synci_step_di (inc));
 
+  /* Check if inc is 0.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, inc, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
+
+  /* Calculate mask.  */
+  mask = mips_force_unary (Pmode, NEG, inc);
+
+  /* Mask out begin by mask.  */
+  begin = mips_force_binary (Pmode, AND, begin, mask);
+
+  /* Calculate length.  */
+  length = mips_force_binary (Pmode, MINUS, end, begin);
+
   /* Loop back to here.  */
   label = gen_label_rtx ();
   emit_label (label);
 
   emit_insn (gen_synci (begin));
 
-  cmp = mips_force_binary (Pmode, GTU, begin, end);
+  /* Update length.  */
+  mips_emit_binary (MINUS, length, length, inc);
 
+  /* Update begin.  */
   mips_emit_binary (PLUS, begin, begin, inc);
 
-  cmp_result = gen_rtx_EQ (VOIDmode, cmp, const0_rtx);
+  /* Check if length is greater than 0.  */
+  cmp_result = gen_rtx_GT (VOIDmode, length, const0_rtx);
   emit_jump_insn (gen_condjump (cmp_result, label));
+
+  emit_label (end_label);
 }
 
 /* Expand a QI or HI mode atomic memory operation.
@@ -7276,7 +7364,7 @@ mips_print_operand_punctuation (FILE *file, int ch)
       break;
 
     case '@':
-      fputs (reg_names[GP_REG_FIRST + 1], file);
+      fputs (reg_names[AT_REGNUM], file);
       break;
 
     case '^':
@@ -8144,8 +8232,8 @@ mips_frame_set (rtx mem, rtx reg)
   /* If we're saving the return address register and the DWARF return
      address column differs from the hard register number, adjust the
      note reg to refer to the former.  */
-  if (REGNO (reg) == GP_REG_FIRST + 31
-      && DWARF_FRAME_RETURN_COLUMN != GP_REG_FIRST + 31)
+  if (REGNO (reg) == RETURN_ADDR_REGNUM
+      && DWARF_FRAME_RETURN_COLUMN != RETURN_ADDR_REGNUM)
     reg = gen_rtx_REG (GET_MODE (reg), DWARF_FRAME_RETURN_COLUMN);
 
   set = gen_rtx_SET (VOIDmode, mem, reg);
@@ -8595,8 +8683,8 @@ mips16e_output_save_restore (rtx pattern, HOST_WIDE_INT adjust)
 				    mips16e_a0_a3_regs[end - 1]);
 
   /* Save or restore $31.  */
-  if (BITSET_P (info.mask, 31))
-    s += sprintf (s, ",%s", reg_names[GP_REG_FIRST + 31]);
+  if (BITSET_P (info.mask, RETURN_ADDR_REGNUM))
+    s += sprintf (s, ",%s", reg_names[RETURN_ADDR_REGNUM]);
 
   return buffer;
 }
@@ -8764,7 +8852,7 @@ mips_global_pointer (void)
   return GLOBAL_POINTER_REGNUM;
 }
 
-/* Return true if current function's prologue must load the global
+/* Return true if the current function's prologue must load the global
    pointer value into pic_offset_table_rtx and store the same value in
    the function's cprestore slot (if any).
 
@@ -8969,7 +9057,7 @@ mips_cfun_might_clobber_call_saved_reg_p (unsigned int regno)
   /* If a MIPS16 function returns a value in FPRs, its epilogue
      will need to call an external libgcc routine.  This yet-to-be
      generated call_insn will clobber $31.  */
-  if (regno == GP_REG_FIRST + 31 && mips16_cfun_returns_in_fpr_p ())
+  if (regno == RETURN_ADDR_REGNUM && mips16_cfun_returns_in_fpr_p ())
     return true;
 
   /* If REGNO is ordinarily call-clobbered, we must assume that any
@@ -9003,7 +9091,7 @@ mips_save_reg_p (unsigned int regno)
 
   /* We need to save the incoming return address if __builtin_eh_return
      is being used to set a different return address.  */
-  if (regno == GP_REG_FIRST + 31 && crtl->calls_eh_return)
+  if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
     return true;
 
   return false;
@@ -9378,7 +9466,7 @@ mips_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
   if (count != 0)
     return const0_rtx;
 
-  return get_hard_reg_initial_val (Pmode, GP_REG_FIRST + 31);
+  return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
 }
 
 /* Emit code to change the current function's return address to
@@ -9390,7 +9478,7 @@ mips_set_return_address (rtx address, rtx scratch)
 {
   rtx slot_address;
 
-  gcc_assert (BITSET_P (cfun->machine->frame.mask, 31));
+  gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
   slot_address = mips_add_offset (scratch, stack_pointer_rtx,
 				  cfun->machine->frame.gp_sp_offset);
   mips_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
@@ -9499,7 +9587,10 @@ mips_restore_gp_from_cprestore_slot (rtx temp)
   gcc_assert (TARGET_ABICALLS && TARGET_OLDABI && epilogue_completed);
 
   if (!cfun->machine->must_restore_gp_when_clobbered_p)
-    return;
+    {
+      emit_note (NOTE_INSN_DELETED);
+      return;
+    }
 
   if (TARGET_MIPS16)
     {
@@ -9578,6 +9669,9 @@ mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset,
   for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
       {
+	/* Record the ra offset for use by mips_function_profiler.  */
+	if (regno == RETURN_ADDR_REGNUM)
+	  cfun->machine->frame.ra_fp_offset = offset + sp_offset;
 	mips_save_restore_reg (word_mode, regno, offset, fn);
 	offset -= UNITS_PER_WORD;
       }
@@ -9604,7 +9698,7 @@ static bool
 mips_direct_save_slot_move_p (unsigned int regno, rtx mem, bool load_p)
 {
   /* There is a specific MIPS16 instruction for saving $31 to the stack.  */
-  if (TARGET_MIPS16 && !load_p && regno == GP_REG_FIRST + 31)
+  if (TARGET_MIPS16 && !load_p && regno == RETURN_ADDR_REGNUM)
     return false;
 
   return mips_secondary_reload_class (REGNO_REG_CLASS (regno),
@@ -9741,7 +9835,7 @@ mips_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
 	       (frame_pointer_needed
 		? frame->total_size - frame->hard_frame_pointer_offset
 		: frame->total_size),
-	       reg_names[GP_REG_FIRST + 31],
+	       reg_names[RETURN_ADDR_REGNUM],
 	       frame->var_size,
 	       frame->num_gp, frame->num_fp,
 	       frame->args_size,
@@ -10184,7 +10278,7 @@ mips_restore_reg (rtx reg, rtx mem)
 {
   /* There's no MIPS16 instruction to load $31 directly.  Load into
      $7 instead and adjust the return insn appropriately.  */
-  if (TARGET_MIPS16 && REGNO (reg) == GP_REG_FIRST + 31)
+  if (TARGET_MIPS16 && REGNO (reg) == RETURN_ADDR_REGNUM)
     reg = gen_rtx_REG (GET_MODE (reg), GP_REG_FIRST + 7);
 
   mips_emit_save_slot_move (reg, mem, MIPS_EPILOGUE_TEMP (GET_MODE (reg)));
@@ -10399,10 +10493,10 @@ mips_expand_epilogue (bool sibcall_p)
 	     address into $7 rather than $31.  */
 	  if (TARGET_MIPS16
 	      && !GENERATE_MIPS16E_SAVE_RESTORE
-	      && BITSET_P (frame->mask, 31))
+	      && BITSET_P (frame->mask, RETURN_ADDR_REGNUM))
 	    regno = GP_REG_FIRST + 7;
 	  else
-	    regno = GP_REG_FIRST + 31;
+	    regno = RETURN_ADDR_REGNUM;
 	  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, regno)));
 	}
     }
@@ -15830,7 +15924,7 @@ mips_epilogue_uses (unsigned int regno)
   /* Say that the epilogue uses the return address register.  Note that
      in the case of sibcalls, the values "used by the epilogue" are
      considered live at the start of the called function.  */
-  if (regno == 31)
+  if (regno == RETURN_ADDR_REGNUM)
     return true;
 
   /* If using a GOT, say that the epilogue also uses GOT_VERSION_REGNUM.
@@ -15889,41 +15983,21 @@ mips_final_postscan_insn (FILE *file ATTRIBUTE_UNUSED, rtx insn,
     mips_pop_asm_switch (&mips_noat);
 }
 
-/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+/* Return the size in bytes of the trampoline code, padded to
+   TRAMPOLINE_ALIGNMENT bits.  The static chain pointer and target
+   function address immediately follow.  */
 
-static void
-mips_asm_trampoline_template (FILE *f)
-{
-  if (ptr_mode == DImode)
-    fprintf (f, "\t.word\t0x03e0082d\t\t# dmove   $1,$31\n");
-  else
-    fprintf (f, "\t.word\t0x03e00821\t\t# move   $1,$31\n");
-  fprintf (f, "\t.word\t0x04110001\t\t# bgezal $0,.+8\n");
-  fprintf (f, "\t.word\t0x00000000\t\t# nop\n");
-  if (ptr_mode == DImode)
-    {
-      fprintf (f, "\t.word\t0xdff90014\t\t# ld     $25,20($31)\n");
-      fprintf (f, "\t.word\t0xdfef001c\t\t# ld     $15,28($31)\n");
-    }
-  else
-    {
-      fprintf (f, "\t.word\t0x8ff90010\t\t# lw     $25,16($31)\n");
-      fprintf (f, "\t.word\t0x8fef0014\t\t# lw     $15,20($31)\n");
-    }
-  fprintf (f, "\t.word\t0x03200008\t\t# jr     $25\n");
-  if (ptr_mode == DImode)
-    {
-      fprintf (f, "\t.word\t0x0020f82d\t\t# dmove   $31,$1\n");
-      fprintf (f, "\t.word\t0x00000000\t\t# <padding>\n");
-      fprintf (f, "\t.dword\t0x00000000\t\t# <function address>\n");
-      fprintf (f, "\t.dword\t0x00000000\t\t# <static chain value>\n");
-    }
+int
+mips_trampoline_code_size (void)
+{
+  if (TARGET_USE_PIC_FN_ADDR_REG)
+    return 4 * 4;
+  else if (ptr_mode == DImode)
+    return 8 * 4;
+  else if (ISA_HAS_LOAD_DELAY)
+    return 6 * 4;
   else
-    {
-      fprintf (f, "\t.word\t0x0020f821\t\t# move   $31,$1\n");
-      fprintf (f, "\t.word\t0x00000000\t\t# <function address>\n");
-      fprintf (f, "\t.word\t0x00000000\t\t# <static chain value>\n");
-    }
+    return 4 * 4;
 }
 
 /* Implement TARGET_TRAMPOLINE_INIT.  */
@@ -15931,23 +16005,198 @@ mips_asm_trampoline_template (FILE *f)
 static void
 mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 {
-  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
-  rtx mem, addr, end_addr;
+  rtx addr, end_addr, high, low, opcode, mem;
+  rtx trampoline[8];
+  unsigned int i, j;
+  HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
+
+  /* Work out the offsets of the pointers from the start of the
+     trampoline code.  */
+  end_addr_offset = mips_trampoline_code_size ();
+  static_chain_offset = end_addr_offset;
+  target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
 
-  emit_block_move (m_tramp, assemble_trampoline_template (),
-		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+  /* Get pointers to the beginning and end of the code block.  */
+  addr = force_reg (Pmode, XEXP (m_tramp, 0));
+  end_addr = mips_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
+
+#define OP(X) gen_int_mode (X, SImode)
+
+  /* Build up the code in TRAMPOLINE.  */
+  i = 0;
+  if (TARGET_USE_PIC_FN_ADDR_REG)
+    {
+      /* $25 contains the address of the trampoline.  Emit code of the form:
+
+	     l[wd]    $1, target_function_offset($25)
+	     l[wd]    $static_chain, static_chain_offset($25)
+	     jr       $1
+	     move     $25,$1.  */
+      trampoline[i++] = OP (MIPS_LOAD_PTR (AT_REGNUM,
+					   target_function_offset,
+					   PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+					   static_chain_offset,
+					   PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_JR (AT_REGNUM));
+      trampoline[i++] = OP (MIPS_MOVE (PIC_FUNCTION_ADDR_REGNUM, AT_REGNUM));
+    }
+  else if (ptr_mode == DImode)
+    {
+      /* It's too cumbersome to create the full 64-bit address, so let's
+	 instead use:
+
+	     move    $1, $31
+	     bal     1f
+	     nop
+	 1:  l[wd]   $25, target_function_offset - 12($31)
+	     l[wd]   $static_chain, static_chain_offset - 12($31)
+	     jr      $25
+	     move    $31, $1
+
+	where 12 is the offset of "1:" from the start of the code block.  */
+      trampoline[i++] = OP (MIPS_MOVE (AT_REGNUM, RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_BAL (1));
+      trampoline[i++] = OP (MIPS_NOP);
+      trampoline[i++] = OP (MIPS_LOAD_PTR (PIC_FUNCTION_ADDR_REGNUM,
+					   target_function_offset - 12,
+					   RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+					   static_chain_offset - 12,
+					   RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_MOVE (RETURN_ADDR_REGNUM, AT_REGNUM));
+    }
+  else
+    {
+      /* If the target has load delays, emit:
+
+	     lui     $1, %hi(end_addr)
+	     lw      $25, %lo(end_addr + ...)($1)
+	     lw      $static_chain, %lo(end_addr + ...)($1)
+	     jr      $25
+	     nop
+
+	 Otherwise emit:
+
+	     lui     $1, %hi(end_addr)
+	     lw      $25, %lo(end_addr + ...)($1)
+	     jr      $25
+	     lw      $static_chain, %lo(end_addr + ...)($1).  */
+
+      /* Split END_ADDR into %hi and %lo values.  Trampolines are aligned
+	 to 64 bits, so the %lo value will have the bottom 3 bits clear.  */
+      high = expand_simple_binop (SImode, PLUS, end_addr, GEN_INT (0x8000),
+				  NULL, false, OPTAB_WIDEN);
+      high = expand_simple_binop (SImode, LSHIFTRT, high, GEN_INT (16),
+				  NULL, false, OPTAB_WIDEN);
+      low = convert_to_mode (SImode, gen_lowpart (HImode, end_addr), true);
+
+      /* Emit the LUI.  */
+      opcode = OP (MIPS_LUI (AT_REGNUM, 0));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, high,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the load of the target function.  */
+      opcode = OP (MIPS_LOAD_PTR (PIC_FUNCTION_ADDR_REGNUM,
+				  target_function_offset - end_addr_offset,
+				  AT_REGNUM));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, low,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the JR here, if we can.  */
+      if (!ISA_HAS_LOAD_DELAY)
+	trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+
+      /* Emit the load of the static chain register.  */
+      opcode = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+				  static_chain_offset - end_addr_offset,
+				  AT_REGNUM));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, low,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the JR, if we couldn't above.  */
+      if (ISA_HAS_LOAD_DELAY)
+	{
+	  trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+	  trampoline[i++] = OP (MIPS_NOP);
+	}
+    }
 
-  mem = adjust_address (m_tramp, ptr_mode, ptr_mode == DImode ? 32 : 28);
-  mips_emit_move (mem, force_reg (ptr_mode, fnaddr));
-  mem = adjust_address (mem, ptr_mode, GET_MODE_SIZE (ptr_mode));
-  mips_emit_move (mem, force_reg (ptr_mode, chain_value));
+#undef OP
 
-  addr = force_reg (ptr_mode, XEXP (m_tramp, 0));
-  end_addr = gen_reg_rtx (ptr_mode);
+  /* Copy the trampoline code.  Leave any padding uninitialized.  */
+  for (j = 0; j < i; j++)
+    {
+      mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
+      mips_emit_move (mem, trampoline[j]);
+    }
+
+  /* Set up the static chain pointer field.  */
+  mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
+  mips_emit_move (mem, chain_value);
+
+  /* Set up the target function field.  */
+  mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
+  mips_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
+
+  /* Flush the code part of the trampoline.  */
   emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
   emit_insn (gen_clear_cache (addr, end_addr));
 }
 
+/* Implement FUNCTION_PROFILER.  */
+
+void mips_function_profiler (FILE *file)
+{
+  if (TARGET_MIPS16)
+    sorry ("mips16 function profiling");
+  if (TARGET_LONG_CALLS)
+    {
+      /* For TARGET_LONG_CALLS use $3 for the address of _mcount.  */
+      if (Pmode == DImode)
+	fprintf (file, "\tdla\t%s,_mcount\n", reg_names[3]);
+      else
+	fprintf (file, "\tla\t%s,_mcount\n", reg_names[3]);
+    }
+  mips_push_asm_switch (&mips_noat);
+  fprintf (file, "\tmove\t%s,%s\t\t# save current return address\n",
+	   reg_names[AT_REGNUM], reg_names[RETURN_ADDR_REGNUM]);
+  /* _mcount treats $2 as the static chain register.  */
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\tmove\t%s,%s\n", reg_names[2],
+	     reg_names[STATIC_CHAIN_REGNUM]);
+  if (TARGET_MCOUNT_RA_ADDRESS)
+    {
+      /* If TARGET_MCOUNT_RA_ADDRESS load $12 with the address of the
+	 ra save location.  */
+      if (cfun->machine->frame.ra_fp_offset == 0)
+	/* ra not saved, pass zero.  */
+	fprintf (file, "\tmove\t%s,%s\n", reg_names[12], reg_names[0]);
+      else
+	fprintf (file, "\t%s\t%s," HOST_WIDE_INT_PRINT_DEC "(%s)\n",
+		 Pmode == DImode ? "dla" : "la", reg_names[12],
+		 cfun->machine->frame.ra_fp_offset,
+		 reg_names[STACK_POINTER_REGNUM]);
+    }
+  if (!TARGET_NEWABI)
+    fprintf (file,
+	     "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from  stack\n",
+	     TARGET_64BIT ? "dsubu" : "subu",
+	     reg_names[STACK_POINTER_REGNUM],
+	     reg_names[STACK_POINTER_REGNUM],
+	     Pmode == DImode ? 16 : 8);
+
+  if (TARGET_LONG_CALLS)
+    fprintf (file, "\tjalr\t%s\n", reg_names[3]);
+  else
+    fprintf (file, "\tjal\t_mcount\n");
+  mips_pop_asm_switch (&mips_noat);
+  /* _mcount treats $2 as the static chain register.  */
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],
+	     reg_names[2]);
+}
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
@@ -16129,8 +16378,6 @@ mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE mips_can_eliminate
 
-#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
-#define TARGET_ASM_TRAMPOLINE_TEMPLATE mips_asm_trampoline_template
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT mips_trampoline_init
 
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 934e0fafa90..282970890f8 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -1311,10 +1311,10 @@ enum mips_code_readable_setting {
 #define DWARF_FRAME_REGNUM(REGNO) mips_dwarf_regno[REGNO]
 
 /* The DWARF 2 CFA column which tracks the return address.  */
-#define DWARF_FRAME_RETURN_COLUMN (GP_REG_FIRST + 31)
+#define DWARF_FRAME_RETURN_COLUMN RETURN_ADDR_REGNUM
 
 /* Before the prologue, RA lives in r31.  */
-#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (VOIDmode, GP_REG_FIRST + 31)
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, RETURN_ADDR_REGNUM)
 
 /* Describe how we implement __builtin_eh_return.  */
 #define EH_RETURN_DATA_REGNO(N) \
@@ -2372,44 +2372,7 @@ typedef struct mips_args {
 /* Output assembler code to FILE to increment profiler label # LABELNO
    for profiling a function entry.  */
 
-#define FUNCTION_PROFILER(FILE, LABELNO)				\
-{									\
-  if (TARGET_MIPS16)							\
-    sorry ("mips16 function profiling");				\
-  if (TARGET_LONG_CALLS)						\
-    {									\
-      /*  For TARGET_LONG_CALLS use $3 for the address of _mcount.  */	\
-      if (Pmode == DImode)						\
-	fprintf (FILE, "\tdla\t%s,_mcount\n", reg_names[GP_REG_FIRST + 3]); \
-      else								\
-	fprintf (FILE, "\tla\t%s,_mcount\n", reg_names[GP_REG_FIRST + 3]); \
-    }									\
-  mips_push_asm_switch (&mips_noat);					\
-  fprintf (FILE, "\tmove\t%s,%s\t\t# save current return address\n",	\
-	   reg_names[GP_REG_FIRST + 1], reg_names[GP_REG_FIRST + 31]);	\
-  /* _mcount treats $2 as the static chain register.  */		\
-  if (cfun->static_chain_decl != NULL)					\
-    fprintf (FILE, "\tmove\t%s,%s\n", reg_names[2],			\
-	     reg_names[STATIC_CHAIN_REGNUM]);				\
-  if (!TARGET_NEWABI)							\
-    {									\
-      fprintf (FILE,							\
-	       "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from  stack\n", \
-	       TARGET_64BIT ? "dsubu" : "subu",				\
-	       reg_names[STACK_POINTER_REGNUM],				\
-	       reg_names[STACK_POINTER_REGNUM],				\
-	       Pmode == DImode ? 16 : 8);				\
-    }									\
-  if (TARGET_LONG_CALLS)						\
-    fprintf (FILE, "\tjalr\t%s\n", reg_names[GP_REG_FIRST + 3]);	\
-  else									\
-    fprintf (FILE, "\tjal\t_mcount\n");					\
-  mips_pop_asm_switch (&mips_noat);					\
-  /* _mcount treats $2 as the static chain register.  */		\
-  if (cfun->static_chain_decl != NULL)					\
-    fprintf (FILE, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],	\
-	     reg_names[2]);						\
-}
+#define FUNCTION_PROFILER(FILE, LABELNO) mips_function_profiler ((FILE))
 
 /* The profiler preserves all interesting registers, including $31.  */
 #define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) false
@@ -2433,14 +2396,15 @@ typedef struct mips_args {
 #define EXIT_IGNORE_STACK 1
 
 
-/* A C expression for the size in bytes of the trampoline, as an
-   integer.  */
+/* Trampolines are a block of code followed by two pointers.  */
 
-#define TRAMPOLINE_SIZE (ptr_mode == DImode ? 48 : 36)
+#define TRAMPOLINE_SIZE \
+  (mips_trampoline_code_size () + GET_MODE_SIZE (ptr_mode) * 2)
 
-/* Alignment required for trampolines, in bits.  */
+/* Forcing a 64-bit alignment for 32-bit targets allows us to load two
+   pointers from a single LUI base.  */
 
-#define TRAMPOLINE_ALIGNMENT GET_MODE_BITSIZE (ptr_mode)
+#define TRAMPOLINE_ALIGNMENT 64
 
 /* mips_trampoline_init calls this library function to flush
    program and data caches.  */
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 19f3ffc7a06..76fc37bd479 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -82,6 +82,7 @@
    (UNSPEC_ADDRESS_FIRST	100)
 
    (TLS_GET_TP_REGNUM		3)
+   (RETURN_ADDR_REGNUM		31)
    (CPRESTORE_SLOT_REGNUM	76)
    (GOT_VERSION_REGNUM		79)
 
@@ -1878,7 +1879,7 @@
    (set_attr "mode" "SI")
    (set_attr "length" "12")])
 
-(define_insn_and_split "<u>mulsidi3_64bit"
+(define_insn "<u>mulsidi3_64bit"
   [(set (match_operand:DI 0 "register_operand" "=d")
 	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
 		 (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))
@@ -1886,37 +1887,67 @@
    (clobber (match_scratch:DI 4 "=d"))]
   "TARGET_64BIT && !TARGET_FIX_R4000"
   "#"
-  "&& reload_completed"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")
+   (set (attr "length")
+	(if_then_else (ne (symbol_ref "ISA_HAS_EXT_INS") (const_int 0))
+		      (const_int 16)
+		      (const_int 28)))])
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "d_operand"))
+		 (any_extend:DI (match_operand:SI 2 "d_operand"))))
+   (clobber (match_operand:TI 3 "hilo_operand"))
+   (clobber (match_operand:DI 4 "d_operand"))]
+  "TARGET_64BIT && !TARGET_FIX_R4000 && ISA_HAS_EXT_INS && reload_completed"
   [(set (match_dup 3)
 	(unspec:TI [(mult:DI (any_extend:DI (match_dup 1))
 			     (any_extend:DI (match_dup 2)))]
 		   UNSPEC_SET_HILO))
 
-   ;; OP4 <- LO, OP0 <- HI
-   (set (match_dup 4) (match_dup 5))
-   (set (match_dup 0) (unspec:DI [(match_dup 3)] UNSPEC_MFHI))
+   ;; OP0 <- LO, OP4 <- HI
+   (set (match_dup 0) (match_dup 5))
+   (set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI))
 
-   ;; Zero-extend OP4.
-   (set (match_dup 4)
-	(ashift:DI (match_dup 4)
-		   (const_int 32)))
-   (set (match_dup 4)
-	(lshiftrt:DI (match_dup 4)
-		     (const_int 32)))
+   (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 32))
+	(match_dup 4))]
+  { operands[5] = gen_rtx_REG (DImode, LO_REGNUM); })
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "d_operand"))
+		 (any_extend:DI (match_operand:SI 2 "d_operand"))))
+   (clobber (match_operand:TI 3 "hilo_operand"))
+   (clobber (match_operand:DI 4 "d_operand"))]
+  "TARGET_64BIT && !TARGET_FIX_R4000 && !ISA_HAS_EXT_INS && reload_completed"
+  [(set (match_dup 3)
+	(unspec:TI [(mult:DI (any_extend:DI (match_dup 1))
+			     (any_extend:DI (match_dup 2)))]
+		   UNSPEC_SET_HILO))
+
+   ;; OP0 <- LO, OP4 <- HI
+   (set (match_dup 0) (match_dup 5))
+   (set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI))
 
-   ;; Shift OP0 into place.
+   ;; Zero-extend OP0.
    (set (match_dup 0)
 	(ashift:DI (match_dup 0)
 		   (const_int 32)))
+   (set (match_dup 0)
+	(lshiftrt:DI (match_dup 0)
+		     (const_int 32)))
+
+   ;; Shift OP4 into place.
+   (set (match_dup 4)
+	(ashift:DI (match_dup 4)
+		   (const_int 32)))
 
    ;; OR the two halves together
    (set (match_dup 0)
 	(ior:DI (match_dup 0)
 		(match_dup 4)))]
-  { operands[5] = gen_rtx_REG (DImode, LO_REGNUM); }
-  [(set_attr "type" "imul")
-   (set_attr "mode" "SI")
-   (set_attr "length" "24")])
+  { operands[5] = gen_rtx_REG (DImode, LO_REGNUM); })
 
 (define_insn "<u>mulsidi3_64bit_hilo"
   [(set (match_operand:TI 0 "register_operand" "=x")
@@ -4011,7 +4042,7 @@
 
 (define_insn "*mov<mode>_ra"
   [(set (match_operand:GPR 0 "stack_operand" "=m")
-	(reg:GPR 31))]
+	(reg:GPR RETURN_ADDR_REGNUM))]
   "TARGET_MIPS16"
   "<store>\t$31,%0"
   [(set_attr "move_type" "store")
@@ -4938,7 +4969,7 @@
 
 (define_insn "clear_hazard_<mode>"
   [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
-   (clobber (reg:P 31))]
+   (clobber (reg:P RETURN_ADDR_REGNUM))]
   "ISA_HAS_SYNCI"
 {
   return "%(%<bal\t1f\n"
@@ -6123,7 +6154,7 @@
 (define_insn_and_split "call_internal"
   [(call (mem:SI (match_operand 0 "call_insn_operand" "c,S"))
 	 (match_operand 1 "" ""))
-   (clobber (reg:SI 31))]
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
   { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, 1); }
   "reload_completed && TARGET_SPLIT_CALLS && (operands[2] = insn)"
@@ -6137,7 +6168,7 @@
 (define_insn "call_split"
   [(call (mem:SI (match_operand 0 "call_insn_operand" "cS"))
 	 (match_operand 1 "" ""))
-   (clobber (reg:SI 31))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
   { return MIPS_CALL ("jal", operands, 0, 1); }
@@ -6151,7 +6182,7 @@
   [(call (mem:SI (match_operand 0 "const_call_insn_operand"))
 	 (match_operand 1))
    (const_int 1)
-   (clobber (reg:SI 31))]
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
   { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, -1); }
   "reload_completed && TARGET_SPLIT_CALLS && (operands[2] = insn)"
@@ -6167,7 +6198,7 @@
   [(call (mem:SI (match_operand 0 "const_call_insn_operand"))
 	 (match_operand 1))
    (const_int 1)
-   (clobber (reg:SI 31))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
   { return MIPS_CALL ("jal", operands, 0, -1); }
@@ -6190,7 +6221,7 @@
   [(set (match_operand 0 "register_operand" "")
         (call (mem:SI (match_operand 1 "call_insn_operand" "c,S"))
               (match_operand 2 "" "")))
-   (clobber (reg:SI 31))]
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
   { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
   "reload_completed && TARGET_SPLIT_CALLS && (operands[3] = insn)"
@@ -6207,7 +6238,7 @@
   [(set (match_operand 0 "register_operand" "")
         (call (mem:SI (match_operand 1 "call_insn_operand" "cS"))
               (match_operand 2 "" "")))
-   (clobber (reg:SI 31))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
   { return MIPS_CALL ("jal", operands, 1, 2); }
@@ -6219,7 +6250,7 @@
         (call (mem:SI (match_operand 1 "const_call_insn_operand"))
               (match_operand 2)))
    (const_int 1)
-   (clobber (reg:SI 31))]
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
   { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, -1); }
   "reload_completed && TARGET_SPLIT_CALLS && (operands[3] = insn)"
@@ -6237,7 +6268,7 @@
         (call (mem:SI (match_operand 1 "const_call_insn_operand"))
               (match_operand 2)))
    (const_int 1)
-   (clobber (reg:SI 31))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
   { return MIPS_CALL ("jal", operands, 1, -1); }
@@ -6251,7 +6282,7 @@
    (set (match_operand 3 "register_operand" "")
 	(call (mem:SI (match_dup 1))
 	      (match_dup 2)))
-   (clobber (reg:SI 31))]
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
   { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
   "reload_completed && TARGET_SPLIT_CALLS && (operands[4] = insn)"
@@ -6271,7 +6302,7 @@
    (set (match_operand 3 "register_operand" "")
 	(call (mem:SI (match_dup 1))
 	      (match_dup 2)))
-   (clobber (reg:SI 31))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
    (clobber (reg:SI 28))]
   "TARGET_SPLIT_CALLS"
   { return MIPS_CALL ("jal", operands, 1, 2); }
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 8462e4646d6..188d5e17006 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -208,6 +208,10 @@ mlong64
 Target Report RejectNegative Mask(LONG64)
 Use a 64-bit long type
 
+mmcount-ra-address
+Target Report Var(TARGET_MCOUNT_RA_ADDRESS)
+Pass the address of the ra save location to _mcount in $12
+
 mmemcpy
 Target Report Mask(MEMCPY)
 Don't optimize block moves
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
index e1cb4573688..7430dd32b78 100644
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -119,6 +119,10 @@
   (and (match_code "reg")
        (match_test "REGNO (op) == LO_REGNUM")))
 
+(define_predicate "hilo_operand"
+  (and (match_code "reg")
+       (match_test "MD_REG_P (REGNO (op))")))
+
 (define_predicate "fcc_reload_operand"
   (and (match_code "reg,subreg")
        (match_test "ST_REG_P (true_regnum (op))")))
diff --git a/gcc/config/mips/sdemtk.h b/gcc/config/mips/sdemtk.h
index 27dab06f298..a9bb85e82b6 100644
--- a/gcc/config/mips/sdemtk.h
+++ b/gcc/config/mips/sdemtk.h
@@ -101,7 +101,7 @@ extern void mips_sync_icache (void *beg, unsigned long len);
     /* MIPS16 code passes saved $ra in $v1 instead of $at.  */		\
     fprintf (FILE, "\tmove\t%s,%s\n",					\
 	     reg_names[GP_REG_FIRST + (TARGET_MIPS16 ? 3 : 1)],		\
-	     reg_names[GP_REG_FIRST + 31]);				\
+	     reg_names[RETURN_ADDR_REGNUM]);				\
     fprintf (FILE, "\tjal\t_mcount\n");					\
     mips_pop_asm_switch (&mips_noat);					\
     /* _mcount treats $2 as the static chain register.  */		\
@@ -112,4 +112,4 @@ extern void mips_sync_icache (void *beg, unsigned long len);
 
 /* ...nor does the call sequence preserve $31.  */
 #undef MIPS_SAVE_REG_FOR_PROFILING_P
-#define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) ((REGNO) == GP_REG_FIRST + 31)
+#define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) ((REGNO) == RETURN_ADDR_REGNUM)
diff --git a/gcc/config/mn10300/mn10300-protos.h b/gcc/config/mn10300/mn10300-protos.h
index ae4728ae0cb..47488c9b0ba 100644
--- a/gcc/config/mn10300/mn10300-protos.h
+++ b/gcc/config/mn10300/mn10300-protos.h
@@ -37,12 +37,13 @@ extern int symbolic_operand (rtx, enum machine_mode);
 extern int impossible_plus_operand (rtx, enum machine_mode);
 
 extern bool mn10300_wide_const_load_uses_clr (rtx operands[2]);
+
+extern bool mn10300_function_value_regno_p (const unsigned int);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
 extern struct rtx_def *function_arg (CUMULATIVE_ARGS *,
 				     enum machine_mode, tree, int);
-extern rtx mn10300_function_value (const_tree, const_tree, int);
 #endif /* TREE_CODE */
 
 extern void expand_prologue (void);
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index 1a0eb37bbde..8f69dd0d995 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -86,6 +86,8 @@ static unsigned int mn10300_case_values_threshold (void);
 static void mn10300_encode_section_info (tree, rtx, int);
 static void mn10300_asm_trampoline_template (FILE *);
 static void mn10300_trampoline_init (rtx, tree, rtx);
+static rtx mn10300_function_value (const_tree, const_tree, bool);
+static rtx mn10300_libcall_value (enum machine_mode, const_rtx);
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
@@ -139,6 +141,11 @@ static void mn10300_trampoline_init (rtx, tree, rtx);
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT mn10300_trampoline_init
 
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE mn10300_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE mn10300_libcall_value
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Implement TARGET_HANDLE_OPTION.  */
@@ -1624,8 +1631,10 @@ mn10300_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
    we only return the PARALLEL for outgoing values; we do not want
    callers relying on this extra copy.  */
 
-rtx
-mn10300_function_value (const_tree valtype, const_tree func, int outgoing)
+static rtx
+mn10300_function_value (const_tree valtype,
+			const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+			bool outgoing)
 {
   rtx rv;
   enum machine_mode mode = TYPE_MODE (valtype);
@@ -1649,6 +1658,23 @@ mn10300_function_value (const_tree valtype, const_tree func, int outgoing)
   return rv;
 }
 
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+mn10300_libcall_value (enum machine_mode mode,
+		       const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, FIRST_DATA_REGNUM);
+}
+
+/* Implements FUNCTION_VALUE_REGNO_P.  */
+
+bool
+mn10300_function_value_regno_p (const unsigned int regno)
+{
+ return (regno == FIRST_DATA_REGNUM || regno == FIRST_ADDRESS_REGNUM);
+}
+
 /* Output a tst insn.  */
 const char *
 output_tst (rtx operand, rtx insn)
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
index d459387edfc..c732aa07180 100644
--- a/gcc/config/mn10300/mn10300.h
+++ b/gcc/config/mn10300/mn10300.h
@@ -564,25 +564,7 @@ struct cum_arg {int nbytes; };
 #define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
   function_arg (&CUM, MODE, TYPE, NAMED)
 
-/* Define how to find the value returned by a function.
-   VALTYPE is the data type of the value (as a tree).
-   If the precise function being called is known, FUNC is its FUNCTION_DECL;
-   otherwise, FUNC is 0.  */
-
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
-  mn10300_function_value (VALTYPE, FUNC, 0)
-#define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC) \
-  mn10300_function_value (VALTYPE, FUNC, 1)
-
-/* Define how to find the value returned by a library function
-   assuming the value has mode MODE.  */
-
-#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, FIRST_DATA_REGNUM)
-
-/* 1 if N is a possible register number for a function value.  */
-
-#define FUNCTION_VALUE_REGNO_P(N) \
-  ((N) == FIRST_DATA_REGNUM || (N) == FIRST_ADDRESS_REGNUM)
+#define FUNCTION_VALUE_REGNO_P(N)  mn10300_function_value_regno_p (N)
 
 #define DEFAULT_PCC_STRUCT_RETURN 0
 
@@ -618,10 +600,6 @@ struct cum_arg {int nbytes; };
    ? gen_rtx_MEM (Pmode, arg_pointer_rtx) \
    : (rtx) 0)
 
-/* 1 if X is an rtx for a constant that is a valid address.  */
-
-#define CONSTANT_ADDRESS_P(X)   CONSTANT_P (X)
-
 /* Maximum number of registers that can appear in a valid memory address.  */
 
 #define MAX_REGS_PER_ADDRESS 2
diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h
index f1b77eaf0c2..384bce4a986 100644
--- a/gcc/config/moxie/moxie.h
+++ b/gcc/config/moxie/moxie.h
@@ -475,10 +475,6 @@ enum reg_class
    an immediate operand on the target machine.  */
 #define LEGITIMATE_CONSTANT_P(X) 1
 
-/* A C expression that is 1 if the RTX X is a constant which is a
-   valid address.  */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
-
 /* A number, the maximum number of registers that can appear in a
    valid memory address.  */
 #define MAX_REGS_PER_ADDRESS 1
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 20f64449097..d10a40d55e1 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -7120,17 +7120,6 @@
       operands[0] = index;
     }
 
-  /* In 64bit mode we must make sure to wipe the upper bits of the register
-     just in case the addition overflowed or we had random bits in the
-     high part of the register.  */
-  if (TARGET_64BIT)
-    {
-      rtx index = gen_reg_rtx (DImode);
-
-      emit_insn (gen_extendsidi2 (index, operands[0]));
-      operands[0] = gen_rtx_SUBREG (SImode, index, 4);
-    }
-
   if (!INT_5_BITS (operands[2]))
     operands[2] = force_reg (SImode, operands[2]);
 
@@ -7149,6 +7138,17 @@
     emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4]));
   }
 
+  /* In 64bit mode we must make sure to wipe the upper bits of the register
+     just in case the addition overflowed or we had random bits in the
+     high part of the register.  */
+  if (TARGET_64BIT)
+    {
+      rtx index = gen_reg_rtx (DImode);
+
+      emit_insn (gen_extendsidi2 (index, operands[0]));
+      operands[0] = index;
+    }
+
   if (TARGET_BIG_SWITCH)
     {
       if (TARGET_64BIT)
@@ -7209,8 +7209,7 @@
 ;;; 64-bit code, 32-bit relative branch table.
 (define_insn "casesi64p"
   [(set (pc) (mem:DI (plus:DI
-		       (mult:DI (sign_extend:DI
-				  (match_operand:SI 0 "register_operand" "r"))
+		       (mult:DI (match_operand:DI 0 "register_operand" "r")
 				(const_int 8))
 		       (label_ref (match_operand 1 "" "")))))
    (clobber (match_scratch:DI 2 "=&r"))
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
index 8997612ba5a..fe8c9e8aed3 100644
--- a/gcc/config/pdp11/pdp11.h
+++ b/gcc/config/pdp11/pdp11.h
@@ -594,10 +594,6 @@ extern int may_call_alloca;
 
 #define MAX_REGS_PER_ADDRESS 1
 
-/* Recognize any constant value that is a valid address.  */
-
-#define CONSTANT_ADDRESS_P(X)  CONSTANT_P (X)
-
 /* Nonzero if the constant value X is a legitimate general operand.
    It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
 
diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h
index 44559f22333..4d0c96278e0 100644
--- a/gcc/config/picochip/picochip.h
+++ b/gcc/config/picochip/picochip.h
@@ -471,8 +471,6 @@ extern const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER];
 
 /* Addressing Modes  */
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 /* Legitimize reload address tries machine dependent means of
diff --git a/gcc/config/rs6000/40x.md b/gcc/config/rs6000/40x.md
index e11c6539f68..eaf1222eca4 100644
--- a/gcc/config/rs6000/40x.md
+++ b/gcc/config/rs6000/40x.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM PowerPC 403 and PowerPC 405  processors.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -38,7 +38,7 @@
 
 (define_insn_reservation "ppc403-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
-                        var_shift_rotate,cntlz,exts")
+                        var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppc403,ppc405"))
   "iu_40x")
 
diff --git a/gcc/config/rs6000/440.md b/gcc/config/rs6000/440.md
index b146222ac50..b329e7897cd 100644
--- a/gcc/config/rs6000/440.md
+++ b/gcc/config/rs6000/440.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM PowerPC 440 processor.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -55,7 +55,7 @@
 
 (define_insn_reservation "ppc440-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
-                        trap,var_shift_rotate,cntlz,exts")
+                        trap,var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppc440"))
   "ppc440_issue,ppc440_i_pipe|ppc440_j_pipe")
 
diff --git a/gcc/config/rs6000/476.md b/gcc/config/rs6000/476.md
new file mode 100644
index 00000000000..3f50bafa03c
--- /dev/null
+++ b/gcc/config/rs6000/476.md
@@ -0,0 +1,142 @@
+;; Scheduling description for IBM PowerPC 476 processor.
+;; Copyright (C) 2009
+;; Free Software Foundation, Inc.
+;; Contributed by Peter Bergner (bergner@vnet.ibm.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; PPC476 Embedded PowerPC controller
+;; 3 issue (476) / 4 issue (476fp)
+;;
+;; i_pipe  - complex integer / compare
+;; lj_pipe - load-store / simple integer arithmetic
+;; b_pipe  - branch pipe
+;; f_pipe  - floating point arithmetic
+
+(define_automaton "ppc476_core,ppc476_apu")
+
+(define_cpu_unit "ppc476_i_pipe,ppc476_lj_pipe,ppc476_b_pipe" "ppc476_core")
+(define_cpu_unit "ppc476_issue_fp,ppc476_f_pipe" "ppc476_apu")
+(define_cpu_unit "ppc476_issue_0,ppc476_issue_1,ppc476_issue_2" "ppc476_core")
+
+(define_reservation "ppc476_issue" "ppc476_issue_0|ppc476_issue_1|ppc476_issue_2")
+(define_reservation "ppc476_issue2" "ppc476_issue_0+ppc476_issue_1\
+				    |ppc476_issue_0+ppc476_issue_2\
+				    |ppc476_issue_1+ppc476_issue_2")
+(define_reservation "ppc476_issue3" "ppc476_issue_0+ppc476_issue_1+ppc476_issue_2")
+
+(define_insn_reservation "ppc476-load" 4
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-store" 4
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpstore" 4
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-simple-integer" 1
+  (and (eq_attr "type" "integer,insert_word,var_shift_rotate,exts,shift")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-complex-integer" 1
+  (and (eq_attr "type" "cmp,cr_logical,delayed_cr,cntlz,isel,isync,sync,trap")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-compare" 4
+  (and (eq_attr "type" "compare,delayed_compare,fast_compare,mfcr,mfcrf,\
+                        mtcr,mfjmpr,mtjmpr,var_delayed_compare")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-imul" 4
+  (and (eq_attr "type" "imul,imul_compare,imul2,imul3")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-idiv" 11
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe*11")
+
+(define_insn_reservation "ppc476-branch" 1
+  (and (eq_attr "type" "branch,jmpreg")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_b_pipe")
+
+(define_insn_reservation "ppc476-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue2,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue3,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpcompare" 6
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue+ppc476_issue_fp,\
+   ppc476_f_pipe+ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,\
+   ppc476_f_pipe")
+
+(define_insn_reservation "ppc476-sdiv" 19
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,
+   ppc476_f_pipe*19")
+
+(define_insn_reservation "ppc476-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,\
+   ppc476_f_pipe*33")
+
diff --git a/gcc/config/rs6000/603.md b/gcc/config/rs6000/603.md
index c5fea314819..a042729a1da 100644
--- a/gcc/config/rs6000/603.md
+++ b/gcc/config/rs6000/603.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for PowerPC 603 processor.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -59,7 +59,7 @@
 
 (define_insn_reservation "ppc603-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
-                        var_shift_rotate,cntlz,exts")
+                        var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppc603"))
   "iu_603")
 
diff --git a/gcc/config/rs6000/6xx.md b/gcc/config/rs6000/6xx.md
index 88c15ae39ec..b0de9731525 100644
--- a/gcc/config/rs6000/6xx.md
+++ b/gcc/config/rs6000/6xx.md
@@ -1,6 +1,6 @@
 ;; Scheduling description for PowerPC 604, PowerPC 604e, PowerPC 620,
 ;; and PowerPC 630 processors.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -74,7 +74,7 @@
   
 (define_insn_reservation "ppc604-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
-                        var_shift_rotate,cntlz,exts")
+                        var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
   "iu1_6xx|iu2_6xx")
 
diff --git a/gcc/config/rs6000/7450.md b/gcc/config/rs6000/7450.md
index 6f2775744d4..ccaa3b20da3 100644
--- a/gcc/config/rs6000/7450.md
+++ b/gcc/config/rs6000/7450.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Motorola PowerPC 7450 processor.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -75,7 +75,7 @@
 
 (define_insn_reservation "ppc7450-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
-                        trap,var_shift_rotate,cntlz,exts")
+                        trap,var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppc7450"))
   "ppc7450_du,iu1_7450|iu2_7450|iu3_7450")
 
diff --git a/gcc/config/rs6000/7xx.md b/gcc/config/rs6000/7xx.md
index 0129048c07c..edbde75c22a 100644
--- a/gcc/config/rs6000/7xx.md
+++ b/gcc/config/rs6000/7xx.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Motorola PowerPC 750 and PowerPC 7400 processors.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -65,7 +65,7 @@
 
 (define_insn_reservation "ppc750-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
-                        trap,var_shift_rotate,cntlz,exts")
+                        trap,var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppc750,ppc7400"))
   "ppc750_du,iu1_7xx|iu2_7xx")
 
diff --git a/gcc/config/rs6000/8540.md b/gcc/config/rs6000/8540.md
index 2d44b3af94b..4096dff432c 100644
--- a/gcc/config/rs6000/8540.md
+++ b/gcc/config/rs6000/8540.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Motorola PowerPC 8540 processor.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -86,7 +86,7 @@
 (define_insn_reservation "ppc8540_su" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,cmp,compare,\
                         delayed_compare,var_delayed_compare,fast_compare,\
-                        shift,trap,var_shift_rotate,cntlz,exts")
+                        shift,trap,var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppc8540"))
   "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
 
diff --git a/gcc/config/rs6000/a2.md b/gcc/config/rs6000/a2.md
new file mode 100644
index 00000000000..851d8949ff7
--- /dev/null
+++ b/gcc/config/rs6000/a2.md
@@ -0,0 +1,134 @@
+;; Scheduling description for PowerPC A2 processors.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Contributed by Ben Elliston (bje@au.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppca2")
+
+;; CPU units
+
+;; The multiplier pipeline.
+(define_cpu_unit "mult" "ppca2")
+
+;; The auxillary processor unit (FP/vector unit).
+(define_cpu_unit "axu" "ppca2")
+
+;; D.4.6
+;; Some peculiarities for certain SPRs
+
+(define_insn_reservation "ppca2-mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppca2"))
+   "nothing")
+
+(define_insn_reservation "ppca2-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+(define_insn_reservation "ppca2-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; D.4.8
+(define_insn_reservation "ppca2-imul" 1
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; FIXME: latency and multiplier reservation for 64-bit multiply?
+(define_insn_reservation "ppca2-lmul" 6
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "ppca2"))
+  "mult*3")
+
+;; D.4.9
+(define_insn_reservation "ppca2-idiv" 32
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppca2"))
+  "mult*32")
+
+(define_insn_reservation "ppca2-ldiv" 65
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "ppca2"))
+  "mult*65")
+
+;; D.4.13
+(define_insn_reservation "ppca2-load" 5
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; D.8.1
+(define_insn_reservation "ppca2-fp" 6
+  (and (eq_attr "type" "fp")     	   ;; Ignore fpsimple insn types (SPE only).
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.4
+(define_insn_reservation "ppca2-fp-load" 6
+  (and (eq_attr "type" "fpload,fpload_u,fpload_ux")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.5
+(define_insn_reservation "ppca2-fp-store" 2
+  (and (eq_attr "type" "fpstore,fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.6
+(define_insn_reservation "ppca2-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.7
+;;
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed.  Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-ddiv" 72
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppca2"))
+   "axu")
+
+(define_insn_reservation "ppca2-sdiv" 59
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppca2"))
+   "axu")
+
+;; D.8.8
+;; 
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed.  Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-dsqrt" 69
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+(define_insn_reservation "ppca2-ssqrt" 65
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 53b1054d200..6fbb7cdcdac 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1,5 +1,5 @@
 ;; AltiVec patterns.
-;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
 ;; Free Software Foundation, Inc.
 ;; Contributed by Aldy Hernandez (aldy@quesejoda.com)
 
diff --git a/gcc/config/rs6000/cell.md b/gcc/config/rs6000/cell.md
index 3fffd2740f9..dac9da94320 100644
--- a/gcc/config/rs6000/cell.md
+++ b/gcc/config/rs6000/cell.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for cell processor.
-;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009
 ;; Free Software Foundation, Inc.
 ;; Contributed by Sony Computer Entertainment, Inc.,
 
@@ -157,7 +157,7 @@
 ;; Integer latency is 2 cycles
 (define_insn_reservation "cell-integer" 2
   (and (eq_attr "type" "integer,insert_dword,shift,trap,\
-			var_shift_rotate,cntlz,exts")
+			var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "cell"))
   "slot01,fxu_cell")
 
diff --git a/gcc/config/rs6000/e300c2c3.md b/gcc/config/rs6000/e300c2c3.md
index 31bf14ce314..3462a209f74 100644
--- a/gcc/config/rs6000/e300c2c3.md
+++ b/gcc/config/rs6000/e300c2c3.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Motorola PowerPC e300c3 core.
-;;   Copyright (C) 2008 Free Software Foundation, Inc.
+;;   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
 ;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
 ;;
 ;; This file is part of GCC.
@@ -90,7 +90,7 @@
 
 ;; Other one cycle IU insns
 (define_insn_reservation "ppce300c3_iu" 1
-  (and (eq_attr "type" "integer,insert_word")
+  (and (eq_attr "type" "integer,insert_word,isel")
        (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
   "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0+ppce300c3_retire")
 
diff --git a/gcc/config/rs6000/e500mc.md b/gcc/config/rs6000/e500mc.md
index 86434f95fe1..99a4b80ecf6 100644
--- a/gcc/config/rs6000/e500mc.md
+++ b/gcc/config/rs6000/e500mc.md
@@ -72,7 +72,7 @@
 (define_insn_reservation "e500mc_su" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,cmp,compare,\
                         delayed_compare,var_delayed_compare,fast_compare,\
-                        shift,trap,var_shift_rotate,cntlz,exts")
+                        shift,trap,var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "ppce500mc"))
   "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire")
 
diff --git a/gcc/config/rs6000/mpc.md b/gcc/config/rs6000/mpc.md
index a839f936648..415c6887232 100644
--- a/gcc/config/rs6000/mpc.md
+++ b/gcc/config/rs6000/mpc.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Motorola PowerPC processor cores.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -43,7 +43,7 @@
 
 (define_insn_reservation "mpccore-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
-                        var_shift_rotate,cntlz,exts")
+                        var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "mpccore"))
   "iu_mpc")
 
diff --git a/gcc/config/rs6000/option-defaults.h b/gcc/config/rs6000/option-defaults.h
index 682add7605d..7e117d731da 100644
--- a/gcc/config/rs6000/option-defaults.h
+++ b/gcc/config/rs6000/option-defaults.h
@@ -50,15 +50,15 @@
 /* Support for a compile-time default CPU, et cetera.  The rules are:
    --with-cpu is ignored if -mcpu is specified; likewise --with-cpu-32
      and --with-cpu-64.
-   --with-tune is ignored if -mtune is specified; likewise --with-tune-32
-     and --with-tune-64.
+   --with-tune is ignored if -mtune or -mcpu is specified; likewise
+     --with-tune-32 and --with-tune-64.
    --with-float is ignored if -mhard-float or -msoft-float are
-    specified.  */
+     specified.  */
 #define OPTION_DEFAULT_SPECS \
+  {"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
   {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
   {"cpu_32", "%{" OPT_ARCH32 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
   {"cpu_64", "%{" OPT_ARCH64 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
-  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
-  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
-  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
   {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }
diff --git a/gcc/config/rs6000/power4.md b/gcc/config/rs6000/power4.md
index 0214c98b139..60dbffd58c9 100644
--- a/gcc/config/rs6000/power4.md
+++ b/gcc/config/rs6000/power4.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM Power4 and PowerPC 970 processors.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -188,7 +188,7 @@
 ; Integer latency is 2 cycles
 (define_insn_reservation "power4-integer" 2
   (and (eq_attr "type" "integer,insert_dword,shift,trap,\
-                        var_shift_rotate,cntlz,exts")
+                        var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "power4"))
   "iq_power4")
 
diff --git a/gcc/config/rs6000/power5.md b/gcc/config/rs6000/power5.md
index 83ffabcfb3a..b6db0931219 100644
--- a/gcc/config/rs6000/power5.md
+++ b/gcc/config/rs6000/power5.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER5 processor.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -142,7 +142,7 @@
 ; Integer latency is 2 cycles
 (define_insn_reservation "power5-integer" 2
   (and (eq_attr "type" "integer,insert_dword,shift,trap,\
-                        var_shift_rotate,cntlz,exts")
+                        var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "power5"))
   "iq_power5")
 
diff --git a/gcc/config/rs6000/power6.md b/gcc/config/rs6000/power6.md
index ba6524cfa65..8d54c812963 100644
--- a/gcc/config/rs6000/power6.md
+++ b/gcc/config/rs6000/power6.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER6 processor.
-;;   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
 ;;   Contributed by Peter Steinmetz (steinmtz@us.ibm.com)
 ;;
 ;; This file is part of GCC.
@@ -201,6 +201,11 @@
        (eq_attr "cpu" "power6"))
   "FXU_power6")
 
+(define_insn_reservation "power6-isel" 1
+  (and (eq_attr "type" "isel")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
 (define_insn_reservation "power6-exts" 1
   (and (eq_attr "type" "exts")
        (eq_attr "cpu" "power6"))
diff --git a/gcc/config/rs6000/power7.md b/gcc/config/rs6000/power7.md
index 3b6a95e284e..148a7a52a8a 100644
--- a/gcc/config/rs6000/power7.md
+++ b/gcc/config/rs6000/power7.md
@@ -150,7 +150,7 @@
 ; FX Unit
 (define_insn_reservation "power7-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
-                        var_shift_rotate,exts")
+                        var_shift_rotate,exts,isel")
        (eq_attr "cpu" "power7"))
   "DU_power7,FXU_power7")
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index d03cce6f8a2..2d8a2a8d5d1 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for POWER and PowerPC.
-;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/rios1.md b/gcc/config/rs6000/rios1.md
index be2262d1281..9ad9ce3e161 100644
--- a/gcc/config/rs6000/rios1.md
+++ b/gcc/config/rs6000/rios1.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER processor.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -52,7 +52,7 @@
 
 (define_insn_reservation "rios1-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
-                        trap,var_shift_rotate,cntlz,exts")
+                        trap,var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "rios1,ppc601"))
   "iu_rios1")
 
diff --git a/gcc/config/rs6000/rios2.md b/gcc/config/rs6000/rios2.md
index 24fbc15b9ad..96633af2f8e 100644
--- a/gcc/config/rs6000/rios2.md
+++ b/gcc/config/rs6000/rios2.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM Power2 processor.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -40,7 +40,7 @@
 
 (define_insn_reservation "rios2-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
-                         var_shift_rotate,cntlz,exts")
+                         var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "rios2"))
   "iu1_rios2|iu2_rios2")
 
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
new file mode 100644
index 00000000000..f5ad34882d4
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -0,0 +1,990 @@
+/* Builtin functions for rs6000/powerpc.
+   Copyright (C) 2009
+   Free Software Foundation, Inc.
+   Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before including this file, two macros must be defined:
+   RS6000_BUILTIN	 -- 2 arguments, the enum name, and classification
+   RS6000_BUILTIN_EQUATE -- 2 arguments, enum name and value */
+
+/* AltiVec builtins.  */
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_4si,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_4si,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_8hi,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_8hi,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_16qi,	RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_16qi,	RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_4sf,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_4sf,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUBM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUHM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUWM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDCUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUBS,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDSBS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDSHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDSWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAND,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VANDC,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGSW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCFUX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCFSX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCTSXS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCTUXS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPBFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGEFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEXPTEFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VLOGEFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMADDFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXSW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMHADDSHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMHRADDSHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMLADDUHM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGHB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGHH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGHW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGLB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGLH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGLW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMUBM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMMBM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMUHM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMSHM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMUHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMSHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINSW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINFP,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUB_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULESB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUH_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULESH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUB_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUH_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VNMSUBFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VNOR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VOR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUHUM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUWUM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKPX,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUHSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSHSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUWSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSWSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUHUS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSHUS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUWUS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSWUS,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VREFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIN,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRSQRTEFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSL,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLO,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTISB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTISH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTISW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRAB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRAH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRAW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRO,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUBM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUHM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUWM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBCUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUBS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBSBS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBSHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBSWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM4UBS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM4SBS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM4SHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM2SWS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUMSWS,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VXOR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKHSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKHPX,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKHSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKLSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKLPX,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKLSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MTVSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MFVSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSSALL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVSL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVSR,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSTT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSTST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSTSTT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVEBX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVEHX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVEWX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVLX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVLXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVRX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVRXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVEBX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVEHX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVEWX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVLX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVLXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVRX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVRXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPBFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUB_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUH_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUW_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGEFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSB_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSH_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSW_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUB_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUH_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUW_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABSS_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABSS_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABSS_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MASK_FOR_LOAD,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MASK_FOR_STORE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V4SF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V4SF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_COPYSIGN_V4SF,		RS6000_BTC_CONST)
+
+/* Altivec overloaded builtins.  */
+/* For now, don't set the classification for overloaded functions.
+   The function should be converted to the type specific instruction
+   before we get to the point about classifying the builtin type.  */
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQ_P,		RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(ALTIVEC_BUILTIN_OVERLOADED_FIRST,
+		      ALTIVEC_BUILTIN_VCMPEQ_P)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGT_P,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGE_P,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ABSS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ADDC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ADDS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_AND,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ANDC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_AVG,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXTRACT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CEIL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQ,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPGE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPGT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPLE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPLT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_COPYSIGN,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CTF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CTS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CTU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DSTST,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DSTSTT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DSTT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXPTE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_FLOOR,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LDE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LDL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LOGE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVEBX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVEHX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVEWX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVLX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVLXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVRX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVRXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVSL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVSR,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MADD,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MADDS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MAX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MERGEH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MERGEL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MIN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MLADD,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MPERM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRADDS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGHB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGHH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGHW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MSUM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MSUMS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MTVSCR,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MULE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MULO,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_NEARBYINT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_NMSUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_NOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_OR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACK,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKPX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKSU,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PERM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RINT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ROUND,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RSQRTE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SEL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SLD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SLL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SLO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_S16,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_S32,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_S8,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_U16,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_U32,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_U8,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLTB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLTH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLTW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SQRT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SRA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SRL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SRO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVEBX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVEHX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVEWX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVLX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVLXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVRX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVRXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUBC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUM2S,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUM4S,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUMS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_TRUNC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_UNPACKH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_UNPACKL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDSBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDSHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDSWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUWM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCFSX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCFUX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGHB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGHH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGHW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMMBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMSHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMSHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMUBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMUHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMUHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULESB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULESH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULEUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULEUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSHSS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSHUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSWSS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSWUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUHUM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUHUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUWUM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUWUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VRLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VRLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VRLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSPLTB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSPLTH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSPLTW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRAB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRAH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBSBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBSHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBSWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUWM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUM4SBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUM4SHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUM4UBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKHPX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKHSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKHSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKLPX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKLSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKLSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_XOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STEP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PROMOTE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INSERT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLATS,		RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(ALTIVEC_BUILTIN_OVERLOADED_LAST,
+		      ALTIVEC_BUILTIN_VEC_SPLATS)
+
+/* SPE builtins.  */
+RS6000_BUILTIN(SPE_BUILTIN_EVADDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVAND,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVANDC,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVDIVWS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVDIVWU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVEQV,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSDIV,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSMUL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDDX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDHX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDWX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHESPLATX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOSSPLATX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOUSPLATX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHEX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOSX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOUX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHSPLATX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWWSPLATX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGEHI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGEHILO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGELO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGELOHI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVNAND,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVNOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVORC,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVRLW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSLW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDDX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDHX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDWX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHEX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHOX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWEX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWOX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVXOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDSMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDSSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCNTLSW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCNTLZW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVEXTSB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVEXTSH,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFSI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFUF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFUI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTSI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTSIZ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTUF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTUI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTUIZ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSNABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSNEG,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMRA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVNEG,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVRNDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFSMIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFSSIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFUMIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFUSIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDIW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDH,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHESPLAT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOSSPLAT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOUSPLAT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHSPLAT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWWSPLAT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVRLWI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSLWI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWIS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWIU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDH,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBIFW,			RS6000_BTC_MISC)
+
+  /* Compares.  */
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPGTS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPGTU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPLTS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPLTU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCMPEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCMPGT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCMPLT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSTSTEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSTSTGT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSTSTLT,			RS6000_BTC_MISC)
+
+/* EVSEL compares.  */
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPGTS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPGTU,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPLTS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPLTU,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSCMPEQ,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSCMPGT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSCMPLT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSTSTEQ,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSTSTGT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSTSTLT,		RS6000_BTC_MISC)
+
+RS6000_BUILTIN(SPE_BUILTIN_EVSPLATFI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSPLATI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSMAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUSIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUSIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSSFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSSFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_MTSPEFSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_MFSPEFSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_BRINC,			RS6000_BTC_MISC)
+
+/* PAIRED builtins.  */
+RS6000_BUILTIN(PAIRED_BUILTIN_DIVV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_ABSV2SF2,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NEGV2SF2,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SQRTV2SF2,		RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_ADDV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SUBV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_RESV2SF2,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MULV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NMSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NMADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NABSV2SF2,		RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SUM0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SUM1,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MULS0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MULS1,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE00,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE01,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE10,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE11,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MADDS0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MADDS1,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_STX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_LX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SELV2SF4,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_CMPU0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_CMPU1,			RS6000_BTC_MISC)
+
+  /* VSX builtins.  */
+RS6000_BUILTIN(VSX_BUILTIN_LXSDX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVD2X,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVDSX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXSDX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVD2X,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_XSABSDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XSADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCMPODP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCMPUDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCPSGNDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPSXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPSXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPUXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPUXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVSPDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVSXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVUXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSDIVDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMADDADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMADDMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMAXDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMINDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMOVDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMSUBADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMSUBMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMULDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNABSDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNEGDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMADDADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMADDMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMSUBADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMSUBMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPI,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIC,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSREDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRSQRTEDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSSQRTDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_CPSGNDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_CPSGNSP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XSTDIVDP_FE,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSTDIVDP_FG,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSTSQRTDP_FE,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSTSQRTDP_FG,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVABSDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVABSSP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVADDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGEDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGESP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQDP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQSP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGEDP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGESP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTDP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTSP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCPSGNDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVCPSGNSP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPSXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPSXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPUXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPUXDS_UNS,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPUXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPSXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPSXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPUXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPUXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXWDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXWSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXDDP_UNS,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXWDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXWSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVDIVDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVDIVSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMADDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMAXDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMAXSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMINDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMINSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMSUBSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMULDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMULSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNABSDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNABSSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNEGDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNEGSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMADDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMSUBSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPI,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIC,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVREDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRESP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPI,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIC,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSQRTEDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSQRTESP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSQRTDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSQRTSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSUBSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVDP_FE,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVDP_FG,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVSP_FE,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVSP_FG,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTDP_FE,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTDP_FG,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTSP_FE,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTSP_FG,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_8HI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_16QI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_8HI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_16QI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_CONCAT_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_CONCAT_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_SET_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_SET_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_SPLAT_2DF,			RS6000_BTC_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_SPLAT_2DI,			RS6000_BTC_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGHW_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGHW_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGLW_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGLW_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_8HI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_INIT_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_INIT_V2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_SET_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_SET_V2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_EXT_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_EXT_V2DI,		RS6000_BTC_CONST)
+
+/* VSX overloaded builtins, add the overloaded functions not present in
+   Altivec.  */
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MUL,			RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(VSX_BUILTIN_OVERLOADED_FIRST,
+		      VSX_BUILTIN_VEC_MUL)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_NMADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUITLIN_VEC_NMSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_DIV,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXMRGHW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXMRGLW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXPERMDI,		RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSLDWI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTW,			RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(VSX_BUILTIN_OVERLOADED_LAST,
+		      VSX_BUILTIN_VEC_XXSPLTW)
+
+/* Combined VSX/Altivec builtins.  */
+RS6000_BUILTIN(VECTOR_BUILTIN_FLOAT_V4SI_V4SF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF,	RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VECTOR_BUILTIN_FIX_V4SF_V4SI,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VECTOR_BUILTIN_FIXUNS_V4SF_V4SI,		RS6000_BTC_FP_PURE)
+
+/* Power7 builtins, that aren't VSX instructions.  */
+RS6000_BUILTIN(POWER7_BUILTIN_BPERMD,			RS6000_BTC_CONST)
+
+/* Miscellaneous builtins.  */
+RS6000_BUILTIN(RS6000_BUILTIN_RECIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_RECIPF,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_RSQRTF,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_BSWAP_HI,			RS6000_BTC_CONST)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index cafe2b318aa..88649ea0735 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -130,6 +130,8 @@ typedef struct GTY(()) machine_function
      64-bits wide and is allocated early enough so that the offset
      does not overflow the 16-bit load/store offset field.  */
   rtx sdmode_stack_slot;
+  /* True if any VSX or ALTIVEC vector type was used.  */
+  bool vsx_or_altivec_used_p;
 } machine_function;
 
 /* Target cpu type */
@@ -511,6 +513,25 @@ struct processor_costs ppc440_cost = {
   1,			/* streams */
 };
 
+/* Instruction costs on PPC476 processors.  */
+static const
+struct processor_costs ppc476_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (11),   /* divsi */
+  COSTS_N_INSNS (11),   /* divdi */
+  COSTS_N_INSNS (6),    /* fp */
+  COSTS_N_INSNS (6),    /* dmul */
+  COSTS_N_INSNS (19),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,			/* l1 cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* streams */
+};
+
 /* Instruction costs on PPC601 processors.  */
 static const
 struct processor_costs ppc601_cost = {
@@ -797,6 +818,40 @@ struct processor_costs power7_cost = {
   12,			/* prefetch streams */
 };
 
+/* Instruction costs on POWER A2 processors.  */
+static const
+struct processor_costs ppca2_cost = {
+  COSTS_N_INSNS (16),    /* mulsi */
+  COSTS_N_INSNS (16),    /* mulsi_const */
+  COSTS_N_INSNS (16),    /* mulsi_const9 */
+  COSTS_N_INSNS (16),   /* muldi */
+  COSTS_N_INSNS (22),   /* divsi */
+  COSTS_N_INSNS (28),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (59),   /* sdiv */
+  COSTS_N_INSNS (72),   /* ddiv */
+  64,
+  16,			/* l1 cache */
+  2048,			/* l2 cache */
+  16,			/* prefetch streams */
+};
+
+
+/* Table that classifies rs6000 builtin functions (pure, const, etc.).  */
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+#define RS6000_BUILTIN(NAME, TYPE) TYPE,
+#define RS6000_BUILTIN_EQUATE(NAME, VALUE)
+
+static const enum rs6000_btc builtin_classify[(int)RS6000_BUILTIN_COUNT] =
+{
+#include "rs6000-builtin.def"
+};
+
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+
 
 static bool rs6000_function_ok_for_sibcall (tree, tree);
 static const char *rs6000_invalid_within_doloop (const_rtx);
@@ -860,7 +915,7 @@ static void rs6000_elf_encode_section_info (tree, rtx, int)
      ATTRIBUTE_UNUSED;
 #endif
 static bool rs6000_use_blocks_for_constant_p (enum machine_mode, const_rtx);
-static void rs6000_alloc_sdmode_stack_slot (void);
+static void rs6000_expand_to_rtl_hook (void);
 static void rs6000_instantiate_decls (void);
 #if TARGET_XCOFF
 static void rs6000_xcoff_asm_output_anchor (rtx);
@@ -928,6 +983,8 @@ static bool rs6000_builtin_support_vector_misalignment (enum
 static void def_builtin (int, const char *, tree, int);
 static bool rs6000_vector_alignment_reachable (const_tree, bool);
 static void rs6000_init_builtins (void);
+static tree rs6000_builtin_decl (unsigned, bool);
+
 static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx);
 static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx);
 static rtx rs6000_expand_ternop_builtin (enum insn_code, tree, rtx);
@@ -984,7 +1041,6 @@ static void rs6000_init_dwarf_reg_sizes_extra (tree);
 static rtx rs6000_legitimize_address (rtx, rtx, enum machine_mode);
 static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
-static rtx rs6000_delegitimize_address (rtx);
 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 static rtx rs6000_tls_get_addr (void);
 static rtx rs6000_got_sym (void);
@@ -1314,6 +1370,8 @@ static const struct attribute_spec rs6000_attribute_table[] =
 
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS rs6000_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL rs6000_builtin_decl
 
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
@@ -1445,14 +1503,11 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
 
-#undef TARGET_DELEGITIMIZE_ADDRESS
-#define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
-
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
 
 #undef TARGET_EXPAND_TO_RTL_HOOK
-#define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
+#define TARGET_EXPAND_TO_RTL_HOOK rs6000_expand_to_rtl_hook
 
 #undef TARGET_INSTANTIATE_DECLS
 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
@@ -2125,6 +2180,12 @@ rs6000_override_options (const char *default_cpu)
 	  POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB},
 	 {"464fp", PROCESSOR_PPC440,
 	  POWERPC_BASE_MASK | MASK_MULHW | MASK_DLMZB},
+ 	 {"476", PROCESSOR_PPC476,
+	  POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_PPC_GFXOPT | MASK_MFCRF
+	  | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_MULHW | MASK_DLMZB},
+ 	 {"476fp", PROCESSOR_PPC476,
+	  POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB
+	  | MASK_FPRND | MASK_CMPB | MASK_MULHW | MASK_DLMZB},
 	 {"505", PROCESSOR_MPCCORE, POWERPC_BASE_MASK},
 	 {"601", PROCESSOR_PPC601,
 	  MASK_POWER | POWERPC_BASE_MASK | MASK_MULTIPLE | MASK_STRING},
@@ -2149,6 +2210,9 @@ rs6000_override_options (const char *default_cpu)
 	 /* 8548 has a dummy entry for now.  */
 	 {"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN
 	  | MASK_ISEL},
+ 	 {"a2", PROCESSOR_PPCA2,
+ 	  POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_POPCNTB
+ 	  | MASK_CMPB | MASK_NO_UPDATE },
 	 {"e300c2", PROCESSOR_PPCE300C2, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
 	 {"e300c3", PROCESSOR_PPCE300C3, POWERPC_BASE_MASK},
 	 {"e500mc", PROCESSOR_PPCE500MC, POWERPC_BASE_MASK | MASK_PPC_GFXOPT
@@ -2216,9 +2280,16 @@ rs6000_override_options (const char *default_cpu)
 		     | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC
 		     | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW
 		     | MASK_DLMZB | MASK_CMPB | MASK_MFPGPR | MASK_DFP
-		     | MASK_POPCNTD | MASK_VSX | MASK_ISEL)
+		     | MASK_POPCNTD | MASK_VSX | MASK_ISEL | MASK_NO_UPDATE)
   };
 
+  /* Numerous experiment shows that IRA based loop pressure
+     calculation works better for RTL loop invariant motion on targets
+     with enough (>= 32) registers.  It is an expensive optimization.
+     So it is on only for peak performance.  */
+  if (optimize >= 3)
+    flag_ira_loop_pressure = 1;
+
   /* Set the pointer size.  */
   if (TARGET_64BIT)
     {
@@ -2495,6 +2566,7 @@ rs6000_override_options (const char *default_cpu)
 			&& rs6000_cpu != PROCESSOR_POWER5
 			&& rs6000_cpu != PROCESSOR_POWER6
 			&& rs6000_cpu != PROCESSOR_POWER7
+			&& rs6000_cpu != PROCESSOR_PPCA2
 			&& rs6000_cpu != PROCESSOR_CELL);
   rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
 			 || rs6000_cpu == PROCESSOR_POWER5
@@ -2650,6 +2722,10 @@ rs6000_override_options (const char *default_cpu)
 	rs6000_cost = &ppc440_cost;
 	break;
 
+      case PROCESSOR_PPC476:
+	rs6000_cost = &ppc476_cost;
+	break;
+
       case PROCESSOR_PPC601:
 	rs6000_cost = &ppc601_cost;
 	break;
@@ -2713,6 +2789,10 @@ rs6000_override_options (const char *default_cpu)
 	rs6000_cost = &power7_cost;
 	break;
 
+      case PROCESSOR_PPCA2:
+	rs6000_cost = &ppca2_cost;
+	break;
+
       default:
 	gcc_unreachable ();
       }
@@ -5128,33 +5208,6 @@ rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
   return ret;
 }
 
-/* If ORIG_X is a constant pool reference, return its known value,
-   otherwise ORIG_X.  */
-
-static rtx
-rs6000_delegitimize_address (rtx x)
-{
-  rtx orig_x = delegitimize_mem_from_attrs (x);
-
-  x = orig_x;
-
-  if (!MEM_P (x))
-    return orig_x;
-
-  x = XEXP (x, 0);
-
-  if (legitimate_constant_pool_address_p (x)
-      && GET_CODE (XEXP (x, 1)) == CONST
-      && GET_CODE (XEXP (XEXP (x, 1), 0)) == MINUS
-      && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == SYMBOL_REF
-      && constant_pool_expr_p (XEXP (XEXP (XEXP (x, 1), 0), 0))
-      && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 1)) == SYMBOL_REF
-      && toc_relative_expr_p (XEXP (XEXP (XEXP (x, 1), 0), 1)))
-    return get_pool_constant (XEXP (XEXP (XEXP (x, 1), 0), 0));
-
-  return orig_x;
-}
-
 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
    We need to emit DTP-relative relocations.  */
 
@@ -8465,13 +8518,54 @@ def_builtin (int mask, const char *name, tree type, int code)
 {
   if ((mask & target_flags) || TARGET_PAIRED_FLOAT)
     {
+      tree t;
       if (rs6000_builtin_decls[code])
 	fatal_error ("internal error: builtin function to %s already processed.",
 		     name);
 
-      rs6000_builtin_decls[code] =
+      rs6000_builtin_decls[code] = t =
         add_builtin_function (name, type, code, BUILT_IN_MD,
 			      NULL, NULL_TREE);
+
+      gcc_assert (code >= 0 && code < (int)RS6000_BUILTIN_COUNT);
+      switch (builtin_classify[code])
+	{
+	default:
+	  gcc_unreachable ();
+
+	  /* assume builtin can do anything.  */
+	case RS6000_BTC_MISC:
+	  break;
+
+	  /* const function, function only depends on the inputs.  */
+	case RS6000_BTC_CONST:
+	  TREE_READONLY (t) = 1;
+	  TREE_NOTHROW (t) = 1;
+	  break;
+
+	  /* pure function, function can read global memory.  */
+	case RS6000_BTC_PURE:
+	  DECL_PURE_P (t) = 1;
+	  TREE_NOTHROW (t) = 1;
+	  break;
+
+	  /* Function is a math function.  If rounding mode is on, then treat
+	     the function as not reading global memory, but it can have
+	     arbitrary side effects.  If it is off, then assume the function is
+	     a const function.  This mimics the ATTR_MATHFN_FPROUNDING
+	     attribute in builtin-attribute.def that is used for the math
+	     functions. */
+	case RS6000_BTC_FP_PURE:
+	  TREE_NOTHROW (t) = 1;
+	  if (flag_rounding_math)
+	    {
+	      DECL_PURE_P (t) = 1;
+	      DECL_IS_NOVOPS (t) = 1;
+	    }
+	  else
+	    TREE_READONLY (t) = 1;
+	  break;
+	}
     }
 }
 
@@ -11148,6 +11242,17 @@ rs6000_init_builtins (void)
 #endif
 }
 
+/* Returns the rs6000 builtin decl for CODE.  */
+
+static tree
+rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= RS6000_BUILTIN_COUNT)
+    return error_mark_node;
+
+  return rs6000_builtin_decls[code];
+}
+
 /* Search through a set of builtins and enable the mask bits.
    DESC is an array of builtins.
    SIZE is the total number of builtins.
@@ -13087,6 +13192,38 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
   return NULL_TREE;
 }
 
+static tree
+rs6000_check_vector_mode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
+{
+  /* Don't walk into types.  */
+  if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
+    {
+      *walk_subtrees = 0;
+      return NULL_TREE;
+    }
+
+  switch (TREE_CODE (*tp))
+    {
+    case VAR_DECL:
+    case PARM_DECL:
+    case FIELD_DECL:
+    case RESULT_DECL:
+    case SSA_NAME:
+    case REAL_CST:
+    case INDIRECT_REF:
+    case ALIGN_INDIRECT_REF:
+    case MISALIGNED_INDIRECT_REF:
+    case VIEW_CONVERT_EXPR:
+      if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (*tp))))
+	return *tp;
+      break;
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
 enum reload_reg_type {
   GPR_REGISTER_TYPE,
   VECTOR_REGISTER_TYPE,
@@ -13527,11 +13664,17 @@ rs6000_ira_cover_classes (void)
   return (TARGET_VSX) ? cover_vsx : cover_pre_vsx;
 }
 
-/* Allocate a 64-bit stack slot to be used for copying SDmode
-   values through if this function has any SDmode references.  */
+/* Scan the trees looking for certain types.
+
+   Allocate a 64-bit stack slot to be used for copying SDmode values through if
+   this function has any SDmode references.
+
+   If VSX, note whether any vector operation was done so we can set VRSAVE to
+   non-zero, even if we just use the floating point registers to tell the
+   kernel to save the vector registers.  */
 
 static void
-rs6000_alloc_sdmode_stack_slot (void)
+rs6000_expand_to_rtl_hook (void)
 {
   tree t;
   basic_block bb;
@@ -13539,6 +13682,24 @@ rs6000_alloc_sdmode_stack_slot (void)
 
   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
 
+  /* Check for vectors.  */
+  if (TARGET_VSX)
+    {
+      FOR_EACH_BB (bb)
+	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	  {
+	    if (walk_gimple_op (gsi_stmt (gsi), rs6000_check_vector_mode,
+				NULL))
+	      {
+		cfun->machine->vsx_or_altivec_used_p = true;
+		goto found_vector;
+	      }
+	  }
+    found_vector:
+      ;
+    }
+
+  /* Check for SDmode being used.  */
   FOR_EACH_BB (bb)
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       {
@@ -16680,6 +16841,15 @@ compute_vrsave_mask (void)
     if (df_regs_ever_live_p (i))
       mask |= ALTIVEC_REG_BIT (i);
 
+  /* If VSX is used, we might have used a traditional floating point register
+     in a vector mode without using any altivec registers.  However the VRSAVE
+     register does not have room to indicate the floating point registers.
+     Modern kernels only look to see if the value is non-zero to determine if
+     they need to save the vector registers, so we just set an arbitrary
+     value if any vector type was used.  */
+  if (mask == 0 && TARGET_VSX && cfun->machine->vsx_or_altivec_used_p)
+    mask = 0xFFF;
+
   if (mask == 0)
     return mask;
 
@@ -20052,8 +20222,10 @@ rs6000_output_function_epilogue (FILE *file,
 	 use language_string.
 	 C is 0.  Fortran is 1.  Pascal is 2.  Ada is 3.  C++ is 9.
 	 Java is 13.  Objective-C is 14.  Objective-C++ isn't assigned
-	 a number, so for now use 9.  */
-      if (! strcmp (language_string, "GNU C"))
+	 a number, so for now use 9.  LTO isn't assigned a number either,
+	 so for now use 0.  */
+      if (! strcmp (language_string, "GNU C")
+	  || ! strcmp (language_string, "GNU GIMPLE"))
 	i = 0;
       else if (! strcmp (language_string, "GNU F77")
 	       || ! strcmp (language_string, "GNU Fortran"))
@@ -21782,6 +21954,7 @@ rs6000_issue_rate (void)
   case CPU_PPCE500MC:
     return 2;
   case CPU_RIOS2:
+  case CPU_PPC476:
   case CPU_PPC604:
   case CPU_PPC604E:
   case CPU_PPC620:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 52d9a594be2..4b1ca3d635a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -83,6 +83,12 @@
 #define ASM_CPU_POWER7_SPEC "-mpower4 -maltivec"
 #endif
 
+#ifdef HAVE_AS_DCI
+#define ASM_CPU_476_SPEC "-m476"
+#else
+#define ASM_CPU_476_SPEC "-mpower4"
+#endif
+
 /* Common ASM definitions used by ASM_SPEC among the various targets for
    handling -mcpu=xxx switches.  There is a parallel list in driver-rs6000.c to
    provide the default assembler options if the user uses -mcpu=native, so if
@@ -107,6 +113,7 @@
 %{mcpu=power6: %(asm_cpu_power6) -maltivec} \
 %{mcpu=power6x: %(asm_cpu_power6) -maltivec} \
 %{mcpu=power7: %(asm_cpu_power7)} \
+%{mcpu=a2: -ma2} \
 %{mcpu=powerpc: -mppc} \
 %{mcpu=rios: -mpwr} \
 %{mcpu=rios1: -mpwr} \
@@ -122,6 +129,8 @@
 %{mcpu=440fp: -m440} \
 %{mcpu=464: -m440} \
 %{mcpu=464fp: -m440} \
+%{mcpu=476: %(asm_cpu_476)} \
+%{mcpu=476fp: %(asm_cpu_476)} \
 %{mcpu=505: -mppc} \
 %{mcpu=601: -m601} \
 %{mcpu=602: -mppc} \
@@ -177,6 +186,7 @@
   { "asm_cpu_power5",		ASM_CPU_POWER5_SPEC },			\
   { "asm_cpu_power6",		ASM_CPU_POWER6_SPEC },			\
   { "asm_cpu_power7",		ASM_CPU_POWER7_SPEC },			\
+  { "asm_cpu_476",		ASM_CPU_476_SPEC },			\
   SUBTARGET_EXTRA_SPECS
 
 /* -mcpu=native handling only makes sense with compiler running on
@@ -317,6 +327,7 @@ enum processor_type
    PROCESSOR_PPC403,
    PROCESSOR_PPC405,
    PROCESSOR_PPC440,
+   PROCESSOR_PPC476,
    PROCESSOR_PPC601,
    PROCESSOR_PPC603,
    PROCESSOR_PPC604,
@@ -334,7 +345,8 @@ enum processor_type
    PROCESSOR_POWER5,
    PROCESSOR_POWER6,
    PROCESSOR_POWER7,
-   PROCESSOR_CELL
+   PROCESSOR_CELL,
+   PROCESSOR_PPCA2
 };
 
 /* FPU operations supported. 
@@ -1021,10 +1033,12 @@ extern unsigned rs6000_pointer_size;
 
 #define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)]
 
-#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)	\
-  ((TARGET_32BIT && TARGET_POWERPC64			\
-    && (GET_MODE_SIZE (MODE) > 4)  \
-    && INT_REGNO_P (REGNO)) ? 1 : 0)
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
+  (((TARGET_32BIT && TARGET_POWERPC64					\
+     && (GET_MODE_SIZE (MODE) > 4)					\
+     && INT_REGNO_P (REGNO)) ? 1 : 0)					\
+   || (TARGET_VSX && FP_REGNO_P (REGNO)					\
+       && GET_MODE_SIZE (MODE) > 8))
 
 #define VSX_VECTOR_MODE(MODE)		\
 	 ((MODE) == V4SFmode		\
@@ -2412,964 +2426,35 @@ extern int optimize;
 extern int flag_expensive_optimizations;
 extern int frame_pointer_needed;
 
+/* Classification of the builtin functions to properly set the declaration tree
+   flags.  */
+enum rs6000_btc
+{
+  RS6000_BTC_MISC,		/* assume builtin can do anything */
+  RS6000_BTC_CONST,		/* builtin is a 'const' function.  */
+  RS6000_BTC_PURE,		/* builtin is a 'pure' function.  */
+  RS6000_BTC_FP_PURE		/* builtin is 'pure' if rounding math.  */
+};
+
+/* Convenience macros to document the instruction type.  */
+#define RS6000_BTC_MEM	RS6000_BTC_MISC	/* load/store touches memory */
+#define RS6000_BTC_SAT	RS6000_BTC_MISC	/* VMX saturate sets VSCR register */
+
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+#define RS6000_BUILTIN(NAME, TYPE) NAME,
+#define RS6000_BUILTIN_EQUATE(NAME, VALUE) NAME = VALUE,
+
 enum rs6000_builtins
 {
-  /* AltiVec builtins.  */
-  ALTIVEC_BUILTIN_ST_INTERNAL_4si,
-  ALTIVEC_BUILTIN_LD_INTERNAL_4si,
-  ALTIVEC_BUILTIN_ST_INTERNAL_8hi,
-  ALTIVEC_BUILTIN_LD_INTERNAL_8hi,
-  ALTIVEC_BUILTIN_ST_INTERNAL_16qi,
-  ALTIVEC_BUILTIN_LD_INTERNAL_16qi,
-  ALTIVEC_BUILTIN_ST_INTERNAL_4sf,
-  ALTIVEC_BUILTIN_LD_INTERNAL_4sf,
-  ALTIVEC_BUILTIN_VADDUBM,
-  ALTIVEC_BUILTIN_VADDUHM,
-  ALTIVEC_BUILTIN_VADDUWM,
-  ALTIVEC_BUILTIN_VADDFP,
-  ALTIVEC_BUILTIN_VADDCUW,
-  ALTIVEC_BUILTIN_VADDUBS,
-  ALTIVEC_BUILTIN_VADDSBS,
-  ALTIVEC_BUILTIN_VADDUHS,
-  ALTIVEC_BUILTIN_VADDSHS,
-  ALTIVEC_BUILTIN_VADDUWS,
-  ALTIVEC_BUILTIN_VADDSWS,
-  ALTIVEC_BUILTIN_VAND,
-  ALTIVEC_BUILTIN_VANDC,
-  ALTIVEC_BUILTIN_VAVGUB,
-  ALTIVEC_BUILTIN_VAVGSB,
-  ALTIVEC_BUILTIN_VAVGUH,
-  ALTIVEC_BUILTIN_VAVGSH,
-  ALTIVEC_BUILTIN_VAVGUW,
-  ALTIVEC_BUILTIN_VAVGSW,
-  ALTIVEC_BUILTIN_VCFUX,
-  ALTIVEC_BUILTIN_VCFSX,
-  ALTIVEC_BUILTIN_VCTSXS,
-  ALTIVEC_BUILTIN_VCTUXS,
-  ALTIVEC_BUILTIN_VCMPBFP,
-  ALTIVEC_BUILTIN_VCMPEQUB,
-  ALTIVEC_BUILTIN_VCMPEQUH,
-  ALTIVEC_BUILTIN_VCMPEQUW,
-  ALTIVEC_BUILTIN_VCMPEQFP,
-  ALTIVEC_BUILTIN_VCMPGEFP,
-  ALTIVEC_BUILTIN_VCMPGTUB,
-  ALTIVEC_BUILTIN_VCMPGTSB,
-  ALTIVEC_BUILTIN_VCMPGTUH,
-  ALTIVEC_BUILTIN_VCMPGTSH,
-  ALTIVEC_BUILTIN_VCMPGTUW,
-  ALTIVEC_BUILTIN_VCMPGTSW,
-  ALTIVEC_BUILTIN_VCMPGTFP,
-  ALTIVEC_BUILTIN_VEXPTEFP,
-  ALTIVEC_BUILTIN_VLOGEFP,
-  ALTIVEC_BUILTIN_VMADDFP,
-  ALTIVEC_BUILTIN_VMAXUB,
-  ALTIVEC_BUILTIN_VMAXSB,
-  ALTIVEC_BUILTIN_VMAXUH,
-  ALTIVEC_BUILTIN_VMAXSH,
-  ALTIVEC_BUILTIN_VMAXUW,
-  ALTIVEC_BUILTIN_VMAXSW,
-  ALTIVEC_BUILTIN_VMAXFP,
-  ALTIVEC_BUILTIN_VMHADDSHS,
-  ALTIVEC_BUILTIN_VMHRADDSHS,
-  ALTIVEC_BUILTIN_VMLADDUHM,
-  ALTIVEC_BUILTIN_VMRGHB,
-  ALTIVEC_BUILTIN_VMRGHH,
-  ALTIVEC_BUILTIN_VMRGHW,
-  ALTIVEC_BUILTIN_VMRGLB,
-  ALTIVEC_BUILTIN_VMRGLH,
-  ALTIVEC_BUILTIN_VMRGLW,
-  ALTIVEC_BUILTIN_VMSUMUBM,
-  ALTIVEC_BUILTIN_VMSUMMBM,
-  ALTIVEC_BUILTIN_VMSUMUHM,
-  ALTIVEC_BUILTIN_VMSUMSHM,
-  ALTIVEC_BUILTIN_VMSUMUHS,
-  ALTIVEC_BUILTIN_VMSUMSHS,
-  ALTIVEC_BUILTIN_VMINUB,
-  ALTIVEC_BUILTIN_VMINSB,
-  ALTIVEC_BUILTIN_VMINUH,
-  ALTIVEC_BUILTIN_VMINSH,
-  ALTIVEC_BUILTIN_VMINUW,
-  ALTIVEC_BUILTIN_VMINSW,
-  ALTIVEC_BUILTIN_VMINFP,
-  ALTIVEC_BUILTIN_VMULEUB,
-  ALTIVEC_BUILTIN_VMULEUB_UNS,
-  ALTIVEC_BUILTIN_VMULESB,
-  ALTIVEC_BUILTIN_VMULEUH,
-  ALTIVEC_BUILTIN_VMULEUH_UNS,
-  ALTIVEC_BUILTIN_VMULESH,
-  ALTIVEC_BUILTIN_VMULOUB,
-  ALTIVEC_BUILTIN_VMULOUB_UNS,
-  ALTIVEC_BUILTIN_VMULOSB,
-  ALTIVEC_BUILTIN_VMULOUH,
-  ALTIVEC_BUILTIN_VMULOUH_UNS,
-  ALTIVEC_BUILTIN_VMULOSH,
-  ALTIVEC_BUILTIN_VNMSUBFP,
-  ALTIVEC_BUILTIN_VNOR,
-  ALTIVEC_BUILTIN_VOR,
-  ALTIVEC_BUILTIN_VSEL_2DF,		/* needed for VSX */
-  ALTIVEC_BUILTIN_VSEL_2DI,		/* needed for VSX */
-  ALTIVEC_BUILTIN_VSEL_4SI,
-  ALTIVEC_BUILTIN_VSEL_4SF,
-  ALTIVEC_BUILTIN_VSEL_8HI,
-  ALTIVEC_BUILTIN_VSEL_16QI,
-  ALTIVEC_BUILTIN_VSEL_2DI_UNS,
-  ALTIVEC_BUILTIN_VSEL_4SI_UNS,
-  ALTIVEC_BUILTIN_VSEL_8HI_UNS,
-  ALTIVEC_BUILTIN_VSEL_16QI_UNS,
-  ALTIVEC_BUILTIN_VPERM_2DF,		/* needed for VSX */
-  ALTIVEC_BUILTIN_VPERM_2DI,		/* needed for VSX */
-  ALTIVEC_BUILTIN_VPERM_4SI,
-  ALTIVEC_BUILTIN_VPERM_4SF,
-  ALTIVEC_BUILTIN_VPERM_8HI,
-  ALTIVEC_BUILTIN_VPERM_16QI,
-  ALTIVEC_BUILTIN_VPERM_2DI_UNS,
-  ALTIVEC_BUILTIN_VPERM_4SI_UNS,
-  ALTIVEC_BUILTIN_VPERM_8HI_UNS,
-  ALTIVEC_BUILTIN_VPERM_16QI_UNS,
-  ALTIVEC_BUILTIN_VPKUHUM,
-  ALTIVEC_BUILTIN_VPKUWUM,
-  ALTIVEC_BUILTIN_VPKPX,
-  ALTIVEC_BUILTIN_VPKUHSS,
-  ALTIVEC_BUILTIN_VPKSHSS,
-  ALTIVEC_BUILTIN_VPKUWSS,
-  ALTIVEC_BUILTIN_VPKSWSS,
-  ALTIVEC_BUILTIN_VPKUHUS,
-  ALTIVEC_BUILTIN_VPKSHUS,
-  ALTIVEC_BUILTIN_VPKUWUS,
-  ALTIVEC_BUILTIN_VPKSWUS,
-  ALTIVEC_BUILTIN_VREFP,
-  ALTIVEC_BUILTIN_VRFIM,
-  ALTIVEC_BUILTIN_VRFIN,
-  ALTIVEC_BUILTIN_VRFIP,
-  ALTIVEC_BUILTIN_VRFIZ,
-  ALTIVEC_BUILTIN_VRLB,
-  ALTIVEC_BUILTIN_VRLH,
-  ALTIVEC_BUILTIN_VRLW,
-  ALTIVEC_BUILTIN_VRSQRTEFP,
-  ALTIVEC_BUILTIN_VSLB,
-  ALTIVEC_BUILTIN_VSLH,
-  ALTIVEC_BUILTIN_VSLW,
-  ALTIVEC_BUILTIN_VSL,
-  ALTIVEC_BUILTIN_VSLO,
-  ALTIVEC_BUILTIN_VSPLTB,
-  ALTIVEC_BUILTIN_VSPLTH,
-  ALTIVEC_BUILTIN_VSPLTW,
-  ALTIVEC_BUILTIN_VSPLTISB,
-  ALTIVEC_BUILTIN_VSPLTISH,
-  ALTIVEC_BUILTIN_VSPLTISW,
-  ALTIVEC_BUILTIN_VSRB,
-  ALTIVEC_BUILTIN_VSRH,
-  ALTIVEC_BUILTIN_VSRW,
-  ALTIVEC_BUILTIN_VSRAB,
-  ALTIVEC_BUILTIN_VSRAH,
-  ALTIVEC_BUILTIN_VSRAW,
-  ALTIVEC_BUILTIN_VSR,
-  ALTIVEC_BUILTIN_VSRO,
-  ALTIVEC_BUILTIN_VSUBUBM,
-  ALTIVEC_BUILTIN_VSUBUHM,
-  ALTIVEC_BUILTIN_VSUBUWM,
-  ALTIVEC_BUILTIN_VSUBFP,
-  ALTIVEC_BUILTIN_VSUBCUW,
-  ALTIVEC_BUILTIN_VSUBUBS,
-  ALTIVEC_BUILTIN_VSUBSBS,
-  ALTIVEC_BUILTIN_VSUBUHS,
-  ALTIVEC_BUILTIN_VSUBSHS,
-  ALTIVEC_BUILTIN_VSUBUWS,
-  ALTIVEC_BUILTIN_VSUBSWS,
-  ALTIVEC_BUILTIN_VSUM4UBS,
-  ALTIVEC_BUILTIN_VSUM4SBS,
-  ALTIVEC_BUILTIN_VSUM4SHS,
-  ALTIVEC_BUILTIN_VSUM2SWS,
-  ALTIVEC_BUILTIN_VSUMSWS,
-  ALTIVEC_BUILTIN_VXOR,
-  ALTIVEC_BUILTIN_VSLDOI_16QI,
-  ALTIVEC_BUILTIN_VSLDOI_8HI,
-  ALTIVEC_BUILTIN_VSLDOI_4SI,
-  ALTIVEC_BUILTIN_VSLDOI_4SF,
-  ALTIVEC_BUILTIN_VUPKHSB,
-  ALTIVEC_BUILTIN_VUPKHPX,
-  ALTIVEC_BUILTIN_VUPKHSH,
-  ALTIVEC_BUILTIN_VUPKLSB,
-  ALTIVEC_BUILTIN_VUPKLPX,
-  ALTIVEC_BUILTIN_VUPKLSH,
-  ALTIVEC_BUILTIN_MTVSCR,
-  ALTIVEC_BUILTIN_MFVSCR,
-  ALTIVEC_BUILTIN_DSSALL,
-  ALTIVEC_BUILTIN_DSS,
-  ALTIVEC_BUILTIN_LVSL,
-  ALTIVEC_BUILTIN_LVSR,
-  ALTIVEC_BUILTIN_DSTT,
-  ALTIVEC_BUILTIN_DSTST,
-  ALTIVEC_BUILTIN_DSTSTT,
-  ALTIVEC_BUILTIN_DST,
-  ALTIVEC_BUILTIN_LVEBX,
-  ALTIVEC_BUILTIN_LVEHX,
-  ALTIVEC_BUILTIN_LVEWX,
-  ALTIVEC_BUILTIN_LVXL,
-  ALTIVEC_BUILTIN_LVX,
-  ALTIVEC_BUILTIN_STVX,
-  ALTIVEC_BUILTIN_LVLX,
-  ALTIVEC_BUILTIN_LVLXL,
-  ALTIVEC_BUILTIN_LVRX,
-  ALTIVEC_BUILTIN_LVRXL,
-  ALTIVEC_BUILTIN_STVEBX,
-  ALTIVEC_BUILTIN_STVEHX,
-  ALTIVEC_BUILTIN_STVEWX,
-  ALTIVEC_BUILTIN_STVXL,
-  ALTIVEC_BUILTIN_STVLX,
-  ALTIVEC_BUILTIN_STVLXL,
-  ALTIVEC_BUILTIN_STVRX,
-  ALTIVEC_BUILTIN_STVRXL,
-  ALTIVEC_BUILTIN_VCMPBFP_P,
-  ALTIVEC_BUILTIN_VCMPEQFP_P,
-  ALTIVEC_BUILTIN_VCMPEQUB_P,
-  ALTIVEC_BUILTIN_VCMPEQUH_P,
-  ALTIVEC_BUILTIN_VCMPEQUW_P,
-  ALTIVEC_BUILTIN_VCMPGEFP_P,
-  ALTIVEC_BUILTIN_VCMPGTFP_P,
-  ALTIVEC_BUILTIN_VCMPGTSB_P,
-  ALTIVEC_BUILTIN_VCMPGTSH_P,
-  ALTIVEC_BUILTIN_VCMPGTSW_P,
-  ALTIVEC_BUILTIN_VCMPGTUB_P,
-  ALTIVEC_BUILTIN_VCMPGTUH_P,
-  ALTIVEC_BUILTIN_VCMPGTUW_P,
-  ALTIVEC_BUILTIN_ABSS_V4SI,
-  ALTIVEC_BUILTIN_ABSS_V8HI,
-  ALTIVEC_BUILTIN_ABSS_V16QI,
-  ALTIVEC_BUILTIN_ABS_V4SI,
-  ALTIVEC_BUILTIN_ABS_V4SF,
-  ALTIVEC_BUILTIN_ABS_V8HI,
-  ALTIVEC_BUILTIN_ABS_V16QI,
-  ALTIVEC_BUILTIN_MASK_FOR_LOAD,
-  ALTIVEC_BUILTIN_MASK_FOR_STORE,
-  ALTIVEC_BUILTIN_VEC_INIT_V4SI,
-  ALTIVEC_BUILTIN_VEC_INIT_V8HI,
-  ALTIVEC_BUILTIN_VEC_INIT_V16QI,
-  ALTIVEC_BUILTIN_VEC_INIT_V4SF,
-  ALTIVEC_BUILTIN_VEC_SET_V4SI,
-  ALTIVEC_BUILTIN_VEC_SET_V8HI,
-  ALTIVEC_BUILTIN_VEC_SET_V16QI,
-  ALTIVEC_BUILTIN_VEC_SET_V4SF,
-  ALTIVEC_BUILTIN_VEC_EXT_V4SI,
-  ALTIVEC_BUILTIN_VEC_EXT_V8HI,
-  ALTIVEC_BUILTIN_VEC_EXT_V16QI,
-  ALTIVEC_BUILTIN_VEC_EXT_V4SF,
-  ALTIVEC_BUILTIN_COPYSIGN_V4SF,
-
-  /* Altivec overloaded builtins.  */
-  ALTIVEC_BUILTIN_VCMPEQ_P,
-  ALTIVEC_BUILTIN_OVERLOADED_FIRST = ALTIVEC_BUILTIN_VCMPEQ_P,
-  ALTIVEC_BUILTIN_VCMPGT_P,
-  ALTIVEC_BUILTIN_VCMPGE_P,
-  ALTIVEC_BUILTIN_VEC_ABS,
-  ALTIVEC_BUILTIN_VEC_ABSS,
-  ALTIVEC_BUILTIN_VEC_ADD,
-  ALTIVEC_BUILTIN_VEC_ADDC,
-  ALTIVEC_BUILTIN_VEC_ADDS,
-  ALTIVEC_BUILTIN_VEC_AND,
-  ALTIVEC_BUILTIN_VEC_ANDC,
-  ALTIVEC_BUILTIN_VEC_AVG,
-  ALTIVEC_BUILTIN_VEC_EXTRACT,
-  ALTIVEC_BUILTIN_VEC_CEIL,
-  ALTIVEC_BUILTIN_VEC_CMPB,
-  ALTIVEC_BUILTIN_VEC_CMPEQ,
-  ALTIVEC_BUILTIN_VEC_CMPEQUB,
-  ALTIVEC_BUILTIN_VEC_CMPEQUH,
-  ALTIVEC_BUILTIN_VEC_CMPEQUW,
-  ALTIVEC_BUILTIN_VEC_CMPGE,
-  ALTIVEC_BUILTIN_VEC_CMPGT,
-  ALTIVEC_BUILTIN_VEC_CMPLE,
-  ALTIVEC_BUILTIN_VEC_CMPLT,
-  ALTIVEC_BUILTIN_VEC_COPYSIGN,
-  ALTIVEC_BUILTIN_VEC_CTF,
-  ALTIVEC_BUILTIN_VEC_CTS,
-  ALTIVEC_BUILTIN_VEC_CTU,
-  ALTIVEC_BUILTIN_VEC_DST,
-  ALTIVEC_BUILTIN_VEC_DSTST,
-  ALTIVEC_BUILTIN_VEC_DSTSTT,
-  ALTIVEC_BUILTIN_VEC_DSTT,
-  ALTIVEC_BUILTIN_VEC_EXPTE,
-  ALTIVEC_BUILTIN_VEC_FLOOR,
-  ALTIVEC_BUILTIN_VEC_LD,
-  ALTIVEC_BUILTIN_VEC_LDE,
-  ALTIVEC_BUILTIN_VEC_LDL,
-  ALTIVEC_BUILTIN_VEC_LOGE,
-  ALTIVEC_BUILTIN_VEC_LVEBX,
-  ALTIVEC_BUILTIN_VEC_LVEHX,
-  ALTIVEC_BUILTIN_VEC_LVEWX,
-  ALTIVEC_BUILTIN_VEC_LVLX,
-  ALTIVEC_BUILTIN_VEC_LVLXL,
-  ALTIVEC_BUILTIN_VEC_LVRX,
-  ALTIVEC_BUILTIN_VEC_LVRXL,
-  ALTIVEC_BUILTIN_VEC_LVSL,
-  ALTIVEC_BUILTIN_VEC_LVSR,
-  ALTIVEC_BUILTIN_VEC_MADD,
-  ALTIVEC_BUILTIN_VEC_MADDS,
-  ALTIVEC_BUILTIN_VEC_MAX,
-  ALTIVEC_BUILTIN_VEC_MERGEH,
-  ALTIVEC_BUILTIN_VEC_MERGEL,
-  ALTIVEC_BUILTIN_VEC_MIN,
-  ALTIVEC_BUILTIN_VEC_MLADD,
-  ALTIVEC_BUILTIN_VEC_MPERM,
-  ALTIVEC_BUILTIN_VEC_MRADDS,
-  ALTIVEC_BUILTIN_VEC_MRGHB,
-  ALTIVEC_BUILTIN_VEC_MRGHH,
-  ALTIVEC_BUILTIN_VEC_MRGHW,
-  ALTIVEC_BUILTIN_VEC_MRGLB,
-  ALTIVEC_BUILTIN_VEC_MRGLH,
-  ALTIVEC_BUILTIN_VEC_MRGLW,
-  ALTIVEC_BUILTIN_VEC_MSUM,
-  ALTIVEC_BUILTIN_VEC_MSUMS,
-  ALTIVEC_BUILTIN_VEC_MTVSCR,
-  ALTIVEC_BUILTIN_VEC_MULE,
-  ALTIVEC_BUILTIN_VEC_MULO,
-  ALTIVEC_BUILTIN_VEC_NEARBYINT,
-  ALTIVEC_BUILTIN_VEC_NMSUB,
-  ALTIVEC_BUILTIN_VEC_NOR,
-  ALTIVEC_BUILTIN_VEC_OR,
-  ALTIVEC_BUILTIN_VEC_PACK,
-  ALTIVEC_BUILTIN_VEC_PACKPX,
-  ALTIVEC_BUILTIN_VEC_PACKS,
-  ALTIVEC_BUILTIN_VEC_PACKSU,
-  ALTIVEC_BUILTIN_VEC_PERM,
-  ALTIVEC_BUILTIN_VEC_RE,
-  ALTIVEC_BUILTIN_VEC_RL,
-  ALTIVEC_BUILTIN_VEC_RINT,
-  ALTIVEC_BUILTIN_VEC_ROUND,
-  ALTIVEC_BUILTIN_VEC_RSQRTE,
-  ALTIVEC_BUILTIN_VEC_SEL,
-  ALTIVEC_BUILTIN_VEC_SL,
-  ALTIVEC_BUILTIN_VEC_SLD,
-  ALTIVEC_BUILTIN_VEC_SLL,
-  ALTIVEC_BUILTIN_VEC_SLO,
-  ALTIVEC_BUILTIN_VEC_SPLAT,
-  ALTIVEC_BUILTIN_VEC_SPLAT_S16,
-  ALTIVEC_BUILTIN_VEC_SPLAT_S32,
-  ALTIVEC_BUILTIN_VEC_SPLAT_S8,
-  ALTIVEC_BUILTIN_VEC_SPLAT_U16,
-  ALTIVEC_BUILTIN_VEC_SPLAT_U32,
-  ALTIVEC_BUILTIN_VEC_SPLAT_U8,
-  ALTIVEC_BUILTIN_VEC_SPLTB,
-  ALTIVEC_BUILTIN_VEC_SPLTH,
-  ALTIVEC_BUILTIN_VEC_SPLTW,
-  ALTIVEC_BUILTIN_VEC_SQRT,
-  ALTIVEC_BUILTIN_VEC_SR,
-  ALTIVEC_BUILTIN_VEC_SRA,
-  ALTIVEC_BUILTIN_VEC_SRL,
-  ALTIVEC_BUILTIN_VEC_SRO,
-  ALTIVEC_BUILTIN_VEC_ST,
-  ALTIVEC_BUILTIN_VEC_STE,
-  ALTIVEC_BUILTIN_VEC_STL,
-  ALTIVEC_BUILTIN_VEC_STVEBX,
-  ALTIVEC_BUILTIN_VEC_STVEHX,
-  ALTIVEC_BUILTIN_VEC_STVEWX,
-  ALTIVEC_BUILTIN_VEC_STVLX,
-  ALTIVEC_BUILTIN_VEC_STVLXL,
-  ALTIVEC_BUILTIN_VEC_STVRX,
-  ALTIVEC_BUILTIN_VEC_STVRXL,
-  ALTIVEC_BUILTIN_VEC_SUB,
-  ALTIVEC_BUILTIN_VEC_SUBC,
-  ALTIVEC_BUILTIN_VEC_SUBS,
-  ALTIVEC_BUILTIN_VEC_SUM2S,
-  ALTIVEC_BUILTIN_VEC_SUM4S,
-  ALTIVEC_BUILTIN_VEC_SUMS,
-  ALTIVEC_BUILTIN_VEC_TRUNC,
-  ALTIVEC_BUILTIN_VEC_UNPACKH,
-  ALTIVEC_BUILTIN_VEC_UNPACKL,
-  ALTIVEC_BUILTIN_VEC_VADDFP,
-  ALTIVEC_BUILTIN_VEC_VADDSBS,
-  ALTIVEC_BUILTIN_VEC_VADDSHS,
-  ALTIVEC_BUILTIN_VEC_VADDSWS,
-  ALTIVEC_BUILTIN_VEC_VADDUBM,
-  ALTIVEC_BUILTIN_VEC_VADDUBS,
-  ALTIVEC_BUILTIN_VEC_VADDUHM,
-  ALTIVEC_BUILTIN_VEC_VADDUHS,
-  ALTIVEC_BUILTIN_VEC_VADDUWM,
-  ALTIVEC_BUILTIN_VEC_VADDUWS,
-  ALTIVEC_BUILTIN_VEC_VAVGSB,
-  ALTIVEC_BUILTIN_VEC_VAVGSH,
-  ALTIVEC_BUILTIN_VEC_VAVGSW,
-  ALTIVEC_BUILTIN_VEC_VAVGUB,
-  ALTIVEC_BUILTIN_VEC_VAVGUH,
-  ALTIVEC_BUILTIN_VEC_VAVGUW,
-  ALTIVEC_BUILTIN_VEC_VCFSX,
-  ALTIVEC_BUILTIN_VEC_VCFUX,
-  ALTIVEC_BUILTIN_VEC_VCMPEQFP,
-  ALTIVEC_BUILTIN_VEC_VCMPEQUB,
-  ALTIVEC_BUILTIN_VEC_VCMPEQUH,
-  ALTIVEC_BUILTIN_VEC_VCMPEQUW,
-  ALTIVEC_BUILTIN_VEC_VCMPGTFP,
-  ALTIVEC_BUILTIN_VEC_VCMPGTSB,
-  ALTIVEC_BUILTIN_VEC_VCMPGTSH,
-  ALTIVEC_BUILTIN_VEC_VCMPGTSW,
-  ALTIVEC_BUILTIN_VEC_VCMPGTUB,
-  ALTIVEC_BUILTIN_VEC_VCMPGTUH,
-  ALTIVEC_BUILTIN_VEC_VCMPGTUW,
-  ALTIVEC_BUILTIN_VEC_VMAXFP,
-  ALTIVEC_BUILTIN_VEC_VMAXSB,
-  ALTIVEC_BUILTIN_VEC_VMAXSH,
-  ALTIVEC_BUILTIN_VEC_VMAXSW,
-  ALTIVEC_BUILTIN_VEC_VMAXUB,
-  ALTIVEC_BUILTIN_VEC_VMAXUH,
-  ALTIVEC_BUILTIN_VEC_VMAXUW,
-  ALTIVEC_BUILTIN_VEC_VMINFP,
-  ALTIVEC_BUILTIN_VEC_VMINSB,
-  ALTIVEC_BUILTIN_VEC_VMINSH,
-  ALTIVEC_BUILTIN_VEC_VMINSW,
-  ALTIVEC_BUILTIN_VEC_VMINUB,
-  ALTIVEC_BUILTIN_VEC_VMINUH,
-  ALTIVEC_BUILTIN_VEC_VMINUW,
-  ALTIVEC_BUILTIN_VEC_VMRGHB,
-  ALTIVEC_BUILTIN_VEC_VMRGHH,
-  ALTIVEC_BUILTIN_VEC_VMRGHW,
-  ALTIVEC_BUILTIN_VEC_VMRGLB,
-  ALTIVEC_BUILTIN_VEC_VMRGLH,
-  ALTIVEC_BUILTIN_VEC_VMRGLW,
-  ALTIVEC_BUILTIN_VEC_VMSUMMBM,
-  ALTIVEC_BUILTIN_VEC_VMSUMSHM,
-  ALTIVEC_BUILTIN_VEC_VMSUMSHS,
-  ALTIVEC_BUILTIN_VEC_VMSUMUBM,
-  ALTIVEC_BUILTIN_VEC_VMSUMUHM,
-  ALTIVEC_BUILTIN_VEC_VMSUMUHS,
-  ALTIVEC_BUILTIN_VEC_VMULESB,
-  ALTIVEC_BUILTIN_VEC_VMULESH,
-  ALTIVEC_BUILTIN_VEC_VMULEUB,
-  ALTIVEC_BUILTIN_VEC_VMULEUH,
-  ALTIVEC_BUILTIN_VEC_VMULOSB,
-  ALTIVEC_BUILTIN_VEC_VMULOSH,
-  ALTIVEC_BUILTIN_VEC_VMULOUB,
-  ALTIVEC_BUILTIN_VEC_VMULOUH,
-  ALTIVEC_BUILTIN_VEC_VPKSHSS,
-  ALTIVEC_BUILTIN_VEC_VPKSHUS,
-  ALTIVEC_BUILTIN_VEC_VPKSWSS,
-  ALTIVEC_BUILTIN_VEC_VPKSWUS,
-  ALTIVEC_BUILTIN_VEC_VPKUHUM,
-  ALTIVEC_BUILTIN_VEC_VPKUHUS,
-  ALTIVEC_BUILTIN_VEC_VPKUWUM,
-  ALTIVEC_BUILTIN_VEC_VPKUWUS,
-  ALTIVEC_BUILTIN_VEC_VRLB,
-  ALTIVEC_BUILTIN_VEC_VRLH,
-  ALTIVEC_BUILTIN_VEC_VRLW,
-  ALTIVEC_BUILTIN_VEC_VSLB,
-  ALTIVEC_BUILTIN_VEC_VSLH,
-  ALTIVEC_BUILTIN_VEC_VSLW,
-  ALTIVEC_BUILTIN_VEC_VSPLTB,
-  ALTIVEC_BUILTIN_VEC_VSPLTH,
-  ALTIVEC_BUILTIN_VEC_VSPLTW,
-  ALTIVEC_BUILTIN_VEC_VSRAB,
-  ALTIVEC_BUILTIN_VEC_VSRAH,
-  ALTIVEC_BUILTIN_VEC_VSRAW,
-  ALTIVEC_BUILTIN_VEC_VSRB,
-  ALTIVEC_BUILTIN_VEC_VSRH,
-  ALTIVEC_BUILTIN_VEC_VSRW,
-  ALTIVEC_BUILTIN_VEC_VSUBFP,
-  ALTIVEC_BUILTIN_VEC_VSUBSBS,
-  ALTIVEC_BUILTIN_VEC_VSUBSHS,
-  ALTIVEC_BUILTIN_VEC_VSUBSWS,
-  ALTIVEC_BUILTIN_VEC_VSUBUBM,
-  ALTIVEC_BUILTIN_VEC_VSUBUBS,
-  ALTIVEC_BUILTIN_VEC_VSUBUHM,
-  ALTIVEC_BUILTIN_VEC_VSUBUHS,
-  ALTIVEC_BUILTIN_VEC_VSUBUWM,
-  ALTIVEC_BUILTIN_VEC_VSUBUWS,
-  ALTIVEC_BUILTIN_VEC_VSUM4SBS,
-  ALTIVEC_BUILTIN_VEC_VSUM4SHS,
-  ALTIVEC_BUILTIN_VEC_VSUM4UBS,
-  ALTIVEC_BUILTIN_VEC_VUPKHPX,
-  ALTIVEC_BUILTIN_VEC_VUPKHSB,
-  ALTIVEC_BUILTIN_VEC_VUPKHSH,
-  ALTIVEC_BUILTIN_VEC_VUPKLPX,
-  ALTIVEC_BUILTIN_VEC_VUPKLSB,
-  ALTIVEC_BUILTIN_VEC_VUPKLSH,
-  ALTIVEC_BUILTIN_VEC_XOR,
-  ALTIVEC_BUILTIN_VEC_STEP,
-  ALTIVEC_BUILTIN_VEC_PROMOTE,
-  ALTIVEC_BUILTIN_VEC_INSERT,
-  ALTIVEC_BUILTIN_VEC_SPLATS,
-  ALTIVEC_BUILTIN_OVERLOADED_LAST = ALTIVEC_BUILTIN_VEC_SPLATS,
-
-  /* SPE builtins.  */
-  SPE_BUILTIN_EVADDW,
-  SPE_BUILTIN_EVAND,
-  SPE_BUILTIN_EVANDC,
-  SPE_BUILTIN_EVDIVWS,
-  SPE_BUILTIN_EVDIVWU,
-  SPE_BUILTIN_EVEQV,
-  SPE_BUILTIN_EVFSADD,
-  SPE_BUILTIN_EVFSDIV,
-  SPE_BUILTIN_EVFSMUL,
-  SPE_BUILTIN_EVFSSUB,
-  SPE_BUILTIN_EVLDDX,
-  SPE_BUILTIN_EVLDHX,
-  SPE_BUILTIN_EVLDWX,
-  SPE_BUILTIN_EVLHHESPLATX,
-  SPE_BUILTIN_EVLHHOSSPLATX,
-  SPE_BUILTIN_EVLHHOUSPLATX,
-  SPE_BUILTIN_EVLWHEX,
-  SPE_BUILTIN_EVLWHOSX,
-  SPE_BUILTIN_EVLWHOUX,
-  SPE_BUILTIN_EVLWHSPLATX,
-  SPE_BUILTIN_EVLWWSPLATX,
-  SPE_BUILTIN_EVMERGEHI,
-  SPE_BUILTIN_EVMERGEHILO,
-  SPE_BUILTIN_EVMERGELO,
-  SPE_BUILTIN_EVMERGELOHI,
-  SPE_BUILTIN_EVMHEGSMFAA,
-  SPE_BUILTIN_EVMHEGSMFAN,
-  SPE_BUILTIN_EVMHEGSMIAA,
-  SPE_BUILTIN_EVMHEGSMIAN,
-  SPE_BUILTIN_EVMHEGUMIAA,
-  SPE_BUILTIN_EVMHEGUMIAN,
-  SPE_BUILTIN_EVMHESMF,
-  SPE_BUILTIN_EVMHESMFA,
-  SPE_BUILTIN_EVMHESMFAAW,
-  SPE_BUILTIN_EVMHESMFANW,
-  SPE_BUILTIN_EVMHESMI,
-  SPE_BUILTIN_EVMHESMIA,
-  SPE_BUILTIN_EVMHESMIAAW,
-  SPE_BUILTIN_EVMHESMIANW,
-  SPE_BUILTIN_EVMHESSF,
-  SPE_BUILTIN_EVMHESSFA,
-  SPE_BUILTIN_EVMHESSFAAW,
-  SPE_BUILTIN_EVMHESSFANW,
-  SPE_BUILTIN_EVMHESSIAAW,
-  SPE_BUILTIN_EVMHESSIANW,
-  SPE_BUILTIN_EVMHEUMI,
-  SPE_BUILTIN_EVMHEUMIA,
-  SPE_BUILTIN_EVMHEUMIAAW,
-  SPE_BUILTIN_EVMHEUMIANW,
-  SPE_BUILTIN_EVMHEUSIAAW,
-  SPE_BUILTIN_EVMHEUSIANW,
-  SPE_BUILTIN_EVMHOGSMFAA,
-  SPE_BUILTIN_EVMHOGSMFAN,
-  SPE_BUILTIN_EVMHOGSMIAA,
-  SPE_BUILTIN_EVMHOGSMIAN,
-  SPE_BUILTIN_EVMHOGUMIAA,
-  SPE_BUILTIN_EVMHOGUMIAN,
-  SPE_BUILTIN_EVMHOSMF,
-  SPE_BUILTIN_EVMHOSMFA,
-  SPE_BUILTIN_EVMHOSMFAAW,
-  SPE_BUILTIN_EVMHOSMFANW,
-  SPE_BUILTIN_EVMHOSMI,
-  SPE_BUILTIN_EVMHOSMIA,
-  SPE_BUILTIN_EVMHOSMIAAW,
-  SPE_BUILTIN_EVMHOSMIANW,
-  SPE_BUILTIN_EVMHOSSF,
-  SPE_BUILTIN_EVMHOSSFA,
-  SPE_BUILTIN_EVMHOSSFAAW,
-  SPE_BUILTIN_EVMHOSSFANW,
-  SPE_BUILTIN_EVMHOSSIAAW,
-  SPE_BUILTIN_EVMHOSSIANW,
-  SPE_BUILTIN_EVMHOUMI,
-  SPE_BUILTIN_EVMHOUMIA,
-  SPE_BUILTIN_EVMHOUMIAAW,
-  SPE_BUILTIN_EVMHOUMIANW,
-  SPE_BUILTIN_EVMHOUSIAAW,
-  SPE_BUILTIN_EVMHOUSIANW,
-  SPE_BUILTIN_EVMWHSMF,
-  SPE_BUILTIN_EVMWHSMFA,
-  SPE_BUILTIN_EVMWHSMI,
-  SPE_BUILTIN_EVMWHSMIA,
-  SPE_BUILTIN_EVMWHSSF,
-  SPE_BUILTIN_EVMWHSSFA,
-  SPE_BUILTIN_EVMWHUMI,
-  SPE_BUILTIN_EVMWHUMIA,
-  SPE_BUILTIN_EVMWLSMIAAW,
-  SPE_BUILTIN_EVMWLSMIANW,
-  SPE_BUILTIN_EVMWLSSIAAW,
-  SPE_BUILTIN_EVMWLSSIANW,
-  SPE_BUILTIN_EVMWLUMI,
-  SPE_BUILTIN_EVMWLUMIA,
-  SPE_BUILTIN_EVMWLUMIAAW,
-  SPE_BUILTIN_EVMWLUMIANW,
-  SPE_BUILTIN_EVMWLUSIAAW,
-  SPE_BUILTIN_EVMWLUSIANW,
-  SPE_BUILTIN_EVMWSMF,
-  SPE_BUILTIN_EVMWSMFA,
-  SPE_BUILTIN_EVMWSMFAA,
-  SPE_BUILTIN_EVMWSMFAN,
-  SPE_BUILTIN_EVMWSMI,
-  SPE_BUILTIN_EVMWSMIA,
-  SPE_BUILTIN_EVMWSMIAA,
-  SPE_BUILTIN_EVMWSMIAN,
-  SPE_BUILTIN_EVMWHSSFAA,
-  SPE_BUILTIN_EVMWSSF,
-  SPE_BUILTIN_EVMWSSFA,
-  SPE_BUILTIN_EVMWSSFAA,
-  SPE_BUILTIN_EVMWSSFAN,
-  SPE_BUILTIN_EVMWUMI,
-  SPE_BUILTIN_EVMWUMIA,
-  SPE_BUILTIN_EVMWUMIAA,
-  SPE_BUILTIN_EVMWUMIAN,
-  SPE_BUILTIN_EVNAND,
-  SPE_BUILTIN_EVNOR,
-  SPE_BUILTIN_EVOR,
-  SPE_BUILTIN_EVORC,
-  SPE_BUILTIN_EVRLW,
-  SPE_BUILTIN_EVSLW,
-  SPE_BUILTIN_EVSRWS,
-  SPE_BUILTIN_EVSRWU,
-  SPE_BUILTIN_EVSTDDX,
-  SPE_BUILTIN_EVSTDHX,
-  SPE_BUILTIN_EVSTDWX,
-  SPE_BUILTIN_EVSTWHEX,
-  SPE_BUILTIN_EVSTWHOX,
-  SPE_BUILTIN_EVSTWWEX,
-  SPE_BUILTIN_EVSTWWOX,
-  SPE_BUILTIN_EVSUBFW,
-  SPE_BUILTIN_EVXOR,
-  SPE_BUILTIN_EVABS,
-  SPE_BUILTIN_EVADDSMIAAW,
-  SPE_BUILTIN_EVADDSSIAAW,
-  SPE_BUILTIN_EVADDUMIAAW,
-  SPE_BUILTIN_EVADDUSIAAW,
-  SPE_BUILTIN_EVCNTLSW,
-  SPE_BUILTIN_EVCNTLZW,
-  SPE_BUILTIN_EVEXTSB,
-  SPE_BUILTIN_EVEXTSH,
-  SPE_BUILTIN_EVFSABS,
-  SPE_BUILTIN_EVFSCFSF,
-  SPE_BUILTIN_EVFSCFSI,
-  SPE_BUILTIN_EVFSCFUF,
-  SPE_BUILTIN_EVFSCFUI,
-  SPE_BUILTIN_EVFSCTSF,
-  SPE_BUILTIN_EVFSCTSI,
-  SPE_BUILTIN_EVFSCTSIZ,
-  SPE_BUILTIN_EVFSCTUF,
-  SPE_BUILTIN_EVFSCTUI,
-  SPE_BUILTIN_EVFSCTUIZ,
-  SPE_BUILTIN_EVFSNABS,
-  SPE_BUILTIN_EVFSNEG,
-  SPE_BUILTIN_EVMRA,
-  SPE_BUILTIN_EVNEG,
-  SPE_BUILTIN_EVRNDW,
-  SPE_BUILTIN_EVSUBFSMIAAW,
-  SPE_BUILTIN_EVSUBFSSIAAW,
-  SPE_BUILTIN_EVSUBFUMIAAW,
-  SPE_BUILTIN_EVSUBFUSIAAW,
-  SPE_BUILTIN_EVADDIW,
-  SPE_BUILTIN_EVLDD,
-  SPE_BUILTIN_EVLDH,
-  SPE_BUILTIN_EVLDW,
-  SPE_BUILTIN_EVLHHESPLAT,
-  SPE_BUILTIN_EVLHHOSSPLAT,
-  SPE_BUILTIN_EVLHHOUSPLAT,
-  SPE_BUILTIN_EVLWHE,
-  SPE_BUILTIN_EVLWHOS,
-  SPE_BUILTIN_EVLWHOU,
-  SPE_BUILTIN_EVLWHSPLAT,
-  SPE_BUILTIN_EVLWWSPLAT,
-  SPE_BUILTIN_EVRLWI,
-  SPE_BUILTIN_EVSLWI,
-  SPE_BUILTIN_EVSRWIS,
-  SPE_BUILTIN_EVSRWIU,
-  SPE_BUILTIN_EVSTDD,
-  SPE_BUILTIN_EVSTDH,
-  SPE_BUILTIN_EVSTDW,
-  SPE_BUILTIN_EVSTWHE,
-  SPE_BUILTIN_EVSTWHO,
-  SPE_BUILTIN_EVSTWWE,
-  SPE_BUILTIN_EVSTWWO,
-  SPE_BUILTIN_EVSUBIFW,
-
-  /* Compares.  */
-  SPE_BUILTIN_EVCMPEQ,
-  SPE_BUILTIN_EVCMPGTS,
-  SPE_BUILTIN_EVCMPGTU,
-  SPE_BUILTIN_EVCMPLTS,
-  SPE_BUILTIN_EVCMPLTU,
-  SPE_BUILTIN_EVFSCMPEQ,
-  SPE_BUILTIN_EVFSCMPGT,
-  SPE_BUILTIN_EVFSCMPLT,
-  SPE_BUILTIN_EVFSTSTEQ,
-  SPE_BUILTIN_EVFSTSTGT,
-  SPE_BUILTIN_EVFSTSTLT,
-
-  /* EVSEL compares.  */
-  SPE_BUILTIN_EVSEL_CMPEQ,
-  SPE_BUILTIN_EVSEL_CMPGTS,
-  SPE_BUILTIN_EVSEL_CMPGTU,
-  SPE_BUILTIN_EVSEL_CMPLTS,
-  SPE_BUILTIN_EVSEL_CMPLTU,
-  SPE_BUILTIN_EVSEL_FSCMPEQ,
-  SPE_BUILTIN_EVSEL_FSCMPGT,
-  SPE_BUILTIN_EVSEL_FSCMPLT,
-  SPE_BUILTIN_EVSEL_FSTSTEQ,
-  SPE_BUILTIN_EVSEL_FSTSTGT,
-  SPE_BUILTIN_EVSEL_FSTSTLT,
-
-  SPE_BUILTIN_EVSPLATFI,
-  SPE_BUILTIN_EVSPLATI,
-  SPE_BUILTIN_EVMWHSSMAA,
-  SPE_BUILTIN_EVMWHSMFAA,
-  SPE_BUILTIN_EVMWHSMIAA,
-  SPE_BUILTIN_EVMWHUSIAA,
-  SPE_BUILTIN_EVMWHUMIAA,
-  SPE_BUILTIN_EVMWHSSFAN,
-  SPE_BUILTIN_EVMWHSSIAN,
-  SPE_BUILTIN_EVMWHSMFAN,
-  SPE_BUILTIN_EVMWHSMIAN,
-  SPE_BUILTIN_EVMWHUSIAN,
-  SPE_BUILTIN_EVMWHUMIAN,
-  SPE_BUILTIN_EVMWHGSSFAA,
-  SPE_BUILTIN_EVMWHGSMFAA,
-  SPE_BUILTIN_EVMWHGSMIAA,
-  SPE_BUILTIN_EVMWHGUMIAA,
-  SPE_BUILTIN_EVMWHGSSFAN,
-  SPE_BUILTIN_EVMWHGSMFAN,
-  SPE_BUILTIN_EVMWHGSMIAN,
-  SPE_BUILTIN_EVMWHGUMIAN,
-  SPE_BUILTIN_MTSPEFSCR,
-  SPE_BUILTIN_MFSPEFSCR,
-  SPE_BUILTIN_BRINC,
-
-  /* PAIRED builtins.  */
-  PAIRED_BUILTIN_DIVV2SF3,
-  PAIRED_BUILTIN_ABSV2SF2,
-  PAIRED_BUILTIN_NEGV2SF2,
-  PAIRED_BUILTIN_SQRTV2SF2,
-  PAIRED_BUILTIN_ADDV2SF3,
-  PAIRED_BUILTIN_SUBV2SF3,
-  PAIRED_BUILTIN_RESV2SF2,
-  PAIRED_BUILTIN_MULV2SF3,
-  PAIRED_BUILTIN_MSUB,
-  PAIRED_BUILTIN_MADD,
-  PAIRED_BUILTIN_NMSUB,
-  PAIRED_BUILTIN_NMADD,
-  PAIRED_BUILTIN_NABSV2SF2,
-  PAIRED_BUILTIN_SUM0,
-  PAIRED_BUILTIN_SUM1,
-  PAIRED_BUILTIN_MULS0,
-  PAIRED_BUILTIN_MULS1,
-  PAIRED_BUILTIN_MERGE00,
-  PAIRED_BUILTIN_MERGE01,
-  PAIRED_BUILTIN_MERGE10,
-  PAIRED_BUILTIN_MERGE11,
-  PAIRED_BUILTIN_MADDS0,
-  PAIRED_BUILTIN_MADDS1,
-  PAIRED_BUILTIN_STX,
-  PAIRED_BUILTIN_LX,
-  PAIRED_BUILTIN_SELV2SF4,
-  PAIRED_BUILTIN_CMPU0,
-  PAIRED_BUILTIN_CMPU1,
-
-  RS6000_BUILTIN_RECIP,
-  RS6000_BUILTIN_RECIPF,
-  RS6000_BUILTIN_RSQRTF,
-  RS6000_BUILTIN_BSWAP_HI,
-
-  /* VSX builtins.  */
-  VSX_BUILTIN_LXSDX,
-  VSX_BUILTIN_LXVD2X,
-  VSX_BUILTIN_LXVDSX,
-  VSX_BUILTIN_LXVW4X,
-  VSX_BUILTIN_STXSDX,
-  VSX_BUILTIN_STXVD2X,
-  VSX_BUILTIN_STXVW4X,
-  VSX_BUILTIN_XSABSDP,
-  VSX_BUILTIN_XSADDDP,
-  VSX_BUILTIN_XSCMPODP,
-  VSX_BUILTIN_XSCMPUDP,
-  VSX_BUILTIN_XSCPSGNDP,
-  VSX_BUILTIN_XSCVDPSP,
-  VSX_BUILTIN_XSCVDPSXDS,
-  VSX_BUILTIN_XSCVDPSXWS,
-  VSX_BUILTIN_XSCVDPUXDS,
-  VSX_BUILTIN_XSCVDPUXWS,
-  VSX_BUILTIN_XSCVSPDP,
-  VSX_BUILTIN_XSCVSXDDP,
-  VSX_BUILTIN_XSCVUXDDP,
-  VSX_BUILTIN_XSDIVDP,
-  VSX_BUILTIN_XSMADDADP,
-  VSX_BUILTIN_XSMADDMDP,
-  VSX_BUILTIN_XSMAXDP,
-  VSX_BUILTIN_XSMINDP,
-  VSX_BUILTIN_XSMOVDP,
-  VSX_BUILTIN_XSMSUBADP,
-  VSX_BUILTIN_XSMSUBMDP,
-  VSX_BUILTIN_XSMULDP,
-  VSX_BUILTIN_XSNABSDP,
-  VSX_BUILTIN_XSNEGDP,
-  VSX_BUILTIN_XSNMADDADP,
-  VSX_BUILTIN_XSNMADDMDP,
-  VSX_BUILTIN_XSNMSUBADP,
-  VSX_BUILTIN_XSNMSUBMDP,
-  VSX_BUILTIN_XSRDPI,
-  VSX_BUILTIN_XSRDPIC,
-  VSX_BUILTIN_XSRDPIM,
-  VSX_BUILTIN_XSRDPIP,
-  VSX_BUILTIN_XSRDPIZ,
-  VSX_BUILTIN_XSREDP,
-  VSX_BUILTIN_XSRSQRTEDP,
-  VSX_BUILTIN_XSSQRTDP,
-  VSX_BUILTIN_XSSUBDP,
-  VSX_BUILTIN_CPSGNDP,
-  VSX_BUILTIN_CPSGNSP,
-  VSX_BUILTIN_XSTDIVDP_FE,
-  VSX_BUILTIN_XSTDIVDP_FG,
-  VSX_BUILTIN_XSTSQRTDP_FE,
-  VSX_BUILTIN_XSTSQRTDP_FG,
-  VSX_BUILTIN_XVABSDP,
-  VSX_BUILTIN_XVABSSP,
-  VSX_BUILTIN_XVADDDP,
-  VSX_BUILTIN_XVADDSP,
-  VSX_BUILTIN_XVCMPEQDP,
-  VSX_BUILTIN_XVCMPEQSP,
-  VSX_BUILTIN_XVCMPGEDP,
-  VSX_BUILTIN_XVCMPGESP,
-  VSX_BUILTIN_XVCMPGTDP,
-  VSX_BUILTIN_XVCMPGTSP,
-  VSX_BUILTIN_XVCMPEQDP_P,
-  VSX_BUILTIN_XVCMPEQSP_P,
-  VSX_BUILTIN_XVCMPGEDP_P,
-  VSX_BUILTIN_XVCMPGESP_P,
-  VSX_BUILTIN_XVCMPGTDP_P,
-  VSX_BUILTIN_XVCMPGTSP_P,
-  VSX_BUILTIN_XVCPSGNDP,
-  VSX_BUILTIN_XVCPSGNSP,
-  VSX_BUILTIN_XVCVDPSP,
-  VSX_BUILTIN_XVCVDPSXDS,
-  VSX_BUILTIN_XVCVDPSXWS,
-  VSX_BUILTIN_XVCVDPUXDS,
-  VSX_BUILTIN_XVCVDPUXDS_UNS,
-  VSX_BUILTIN_XVCVDPUXWS,
-  VSX_BUILTIN_XVCVSPDP,
-  VSX_BUILTIN_XVCVSPSXDS,
-  VSX_BUILTIN_XVCVSPSXWS,
-  VSX_BUILTIN_XVCVSPUXDS,
-  VSX_BUILTIN_XVCVSPUXWS,
-  VSX_BUILTIN_XVCVSXDDP,
-  VSX_BUILTIN_XVCVSXDSP,
-  VSX_BUILTIN_XVCVSXWDP,
-  VSX_BUILTIN_XVCVSXWSP,
-  VSX_BUILTIN_XVCVUXDDP,
-  VSX_BUILTIN_XVCVUXDDP_UNS,
-  VSX_BUILTIN_XVCVUXDSP,
-  VSX_BUILTIN_XVCVUXWDP,
-  VSX_BUILTIN_XVCVUXWSP,
-  VSX_BUILTIN_XVDIVDP,
-  VSX_BUILTIN_XVDIVSP,
-  VSX_BUILTIN_XVMADDDP,
-  VSX_BUILTIN_XVMADDSP,
-  VSX_BUILTIN_XVMAXDP,
-  VSX_BUILTIN_XVMAXSP,
-  VSX_BUILTIN_XVMINDP,
-  VSX_BUILTIN_XVMINSP,
-  VSX_BUILTIN_XVMSUBDP,
-  VSX_BUILTIN_XVMSUBSP,
-  VSX_BUILTIN_XVMULDP,
-  VSX_BUILTIN_XVMULSP,
-  VSX_BUILTIN_XVNABSDP,
-  VSX_BUILTIN_XVNABSSP,
-  VSX_BUILTIN_XVNEGDP,
-  VSX_BUILTIN_XVNEGSP,
-  VSX_BUILTIN_XVNMADDDP,
-  VSX_BUILTIN_XVNMADDSP,
-  VSX_BUILTIN_XVNMSUBDP,
-  VSX_BUILTIN_XVNMSUBSP,
-  VSX_BUILTIN_XVRDPI,
-  VSX_BUILTIN_XVRDPIC,
-  VSX_BUILTIN_XVRDPIM,
-  VSX_BUILTIN_XVRDPIP,
-  VSX_BUILTIN_XVRDPIZ,
-  VSX_BUILTIN_XVREDP,
-  VSX_BUILTIN_XVRESP,
-  VSX_BUILTIN_XVRSPI,
-  VSX_BUILTIN_XVRSPIC,
-  VSX_BUILTIN_XVRSPIM,
-  VSX_BUILTIN_XVRSPIP,
-  VSX_BUILTIN_XVRSPIZ,
-  VSX_BUILTIN_XVRSQRTEDP,
-  VSX_BUILTIN_XVRSQRTESP,
-  VSX_BUILTIN_XVSQRTDP,
-  VSX_BUILTIN_XVSQRTSP,
-  VSX_BUILTIN_XVSUBDP,
-  VSX_BUILTIN_XVSUBSP,
-  VSX_BUILTIN_XVTDIVDP_FE,
-  VSX_BUILTIN_XVTDIVDP_FG,
-  VSX_BUILTIN_XVTDIVSP_FE,
-  VSX_BUILTIN_XVTDIVSP_FG,
-  VSX_BUILTIN_XVTSQRTDP_FE,
-  VSX_BUILTIN_XVTSQRTDP_FG,
-  VSX_BUILTIN_XVTSQRTSP_FE,
-  VSX_BUILTIN_XVTSQRTSP_FG,
-  VSX_BUILTIN_XXSEL_2DI,
-  VSX_BUILTIN_XXSEL_2DF,
-  VSX_BUILTIN_XXSEL_4SI,
-  VSX_BUILTIN_XXSEL_4SF,
-  VSX_BUILTIN_XXSEL_8HI,
-  VSX_BUILTIN_XXSEL_16QI,
-  VSX_BUILTIN_XXSEL_2DI_UNS,
-  VSX_BUILTIN_XXSEL_4SI_UNS,
-  VSX_BUILTIN_XXSEL_8HI_UNS,
-  VSX_BUILTIN_XXSEL_16QI_UNS,
-  VSX_BUILTIN_VPERM_2DI,
-  VSX_BUILTIN_VPERM_2DF,
-  VSX_BUILTIN_VPERM_4SI,
-  VSX_BUILTIN_VPERM_4SF,
-  VSX_BUILTIN_VPERM_8HI,
-  VSX_BUILTIN_VPERM_16QI,
-  VSX_BUILTIN_VPERM_2DI_UNS,
-  VSX_BUILTIN_VPERM_4SI_UNS,
-  VSX_BUILTIN_VPERM_8HI_UNS,
-  VSX_BUILTIN_VPERM_16QI_UNS,
-  VSX_BUILTIN_XXPERMDI_2DF,
-  VSX_BUILTIN_XXPERMDI_2DI,
-  VSX_BUILTIN_XXPERMDI_4SF,
-  VSX_BUILTIN_XXPERMDI_4SI,
-  VSX_BUILTIN_XXPERMDI_8HI,
-  VSX_BUILTIN_XXPERMDI_16QI,
-  VSX_BUILTIN_CONCAT_2DF,
-  VSX_BUILTIN_CONCAT_2DI,
-  VSX_BUILTIN_SET_2DF,
-  VSX_BUILTIN_SET_2DI,
-  VSX_BUILTIN_SPLAT_2DF,
-  VSX_BUILTIN_SPLAT_2DI,
-  VSX_BUILTIN_XXMRGHW_4SF,
-  VSX_BUILTIN_XXMRGHW_4SI,
-  VSX_BUILTIN_XXMRGLW_4SF,
-  VSX_BUILTIN_XXMRGLW_4SI,
-  VSX_BUILTIN_XXSLDWI_16QI,
-  VSX_BUILTIN_XXSLDWI_8HI,
-  VSX_BUILTIN_XXSLDWI_4SI,
-  VSX_BUILTIN_XXSLDWI_4SF,
-  VSX_BUILTIN_XXSLDWI_2DI,
-  VSX_BUILTIN_XXSLDWI_2DF,
-  VSX_BUILTIN_VEC_INIT_V2DF,
-  VSX_BUILTIN_VEC_INIT_V2DI,
-  VSX_BUILTIN_VEC_SET_V2DF,
-  VSX_BUILTIN_VEC_SET_V2DI,
-  VSX_BUILTIN_VEC_EXT_V2DF,
-  VSX_BUILTIN_VEC_EXT_V2DI,
-
-  /* VSX overloaded builtins, add the overloaded functions not present in
-     Altivec.  */
-  VSX_BUILTIN_VEC_MUL,
-  VSX_BUILTIN_OVERLOADED_FIRST = VSX_BUILTIN_VEC_MUL,
-  VSX_BUILTIN_VEC_MSUB,
-  VSX_BUILTIN_VEC_NMADD,
-  VSX_BUITLIN_VEC_NMSUB,
-  VSX_BUILTIN_VEC_DIV,
-  VSX_BUILTIN_VEC_XXMRGHW,
-  VSX_BUILTIN_VEC_XXMRGLW,
-  VSX_BUILTIN_VEC_XXPERMDI,
-  VSX_BUILTIN_VEC_XXSLDWI,
-  VSX_BUILTIN_VEC_XXSPLTD,
-  VSX_BUILTIN_VEC_XXSPLTW,
-  VSX_BUILTIN_OVERLOADED_LAST = VSX_BUILTIN_VEC_XXSPLTW,
-
-  /* Combined VSX/Altivec builtins.  */
-  VECTOR_BUILTIN_FLOAT_V4SI_V4SF,
-  VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF,
-  VECTOR_BUILTIN_FIX_V4SF_V4SI,
-  VECTOR_BUILTIN_FIXUNS_V4SF_V4SI,
-
-  /* Power7 builtins, that aren't VSX instructions.  */
-  POWER7_BUILTIN_BPERMD,
+#include "rs6000-builtin.def"
 
   RS6000_BUILTIN_COUNT
 };
 
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+
 enum rs6000_builtin_type_index
 {
   RS6000_BTI_NOT_OPAQUE,
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index ba51f1cebc7..7ff1b3c1a47 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -117,7 +117,7 @@
 
 ;; Define an insn type attribute.  This is used in function unit delay
 ;; computations.
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr"
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel"
   (const_string "integer"))
 
 ;; Define floating point instruction sub-types for use with Xfpu.md
@@ -139,7 +139,7 @@
 ;; Processor type -- this attribute must exactly match the processor_type
 ;; enumeration in rs6000.h.
 
-(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,power4,power5,power6,power7,cell"
+(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,power4,power5,power6,power7,cell,ppca2"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
@@ -158,6 +158,7 @@
 (include "mpc.md")
 (include "40x.md")
 (include "440.md")
+(include "476.md")
 (include "603.md")
 (include "6xx.md")
 (include "7xx.md")
@@ -171,6 +172,7 @@
 (include "power7.md")
 (include "cell.md")
 (include "xfpu.md")
+(include "a2.md")
 
 (include "predicates.md")
 (include "constraints.md")
@@ -974,7 +976,7 @@
   [(set_attr "type" "compare")
    (set_attr "length" "4,8")])
 
-;; IBM 405, 440 and 464 half-word multiplication operations.
+;; IBM 405, 440, 464 and 476 half-word multiplication operations.
 
 (define_insn "*macchwc"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x")
@@ -1438,7 +1440,7 @@
   "mullhwu %0, %1, %2"
   [(set_attr "type" "imul3")])
 
-;; IBM 405, 440 and 464 string-search dlmzb instruction support.
+;; IBM 405, 440, 464 and 476 string-search dlmzb instruction support.
 (define_insn "dlmzb"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x")
         (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r")
@@ -6040,7 +6042,8 @@
   "TARGET_ISEL<sel>"
   "*
 { return output_isel (operands); }"
-  [(set_attr "length" "4")])
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
 
 (define_insn "isel_unsigned_<mode>"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
@@ -6053,7 +6056,8 @@
   "TARGET_ISEL<sel>"
   "*
 { return output_isel (operands); }"
-  [(set_attr "length" "4")])
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
 
 (define_expand "movsfcc"
    [(set (match_operand:SF 0 "gpc_reg_operand" "")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 90af9dce47b..63f0f8c1582 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -155,8 +155,12 @@ mvectorize-builtins
 Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1)
 ; Explicitly control whether we vectorize the builtins or not.
 
+mno-update
+Target Report RejectNegative Mask(NO_UPDATE)
+Do not generate load/store with update instructions
+
 mupdate
-Target Report Var(TARGET_UPDATE) Init(1)
+Target Report RejectNegative InverseMask(NO_UPDATE, UPDATE)
 Generate load/store with update instructions
 
 mavoid-indexed-addresses
diff --git a/gcc/config/rs6000/rs64.md b/gcc/config/rs6000/rs64.md
index f7234408ade..e221b52a370 100644
--- a/gcc/config/rs6000/rs64.md
+++ b/gcc/config/rs6000/rs64.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM RS64 processors.
-;;   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
@@ -47,7 +47,7 @@
 
 (define_insn_reservation "rs64a-integer" 1
   (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
-                        var_shift_rotate,cntlz,exts")
+                        var_shift_rotate,cntlz,exts,isel")
        (eq_attr "cpu" "rs64a"))
   "iu_rs64")
 
diff --git a/gcc/config/rs6000/t-fprules b/gcc/config/rs6000/t-fprules
index 272e00c1ada..42d8fd77b5b 100644
--- a/gcc/config/rs6000/t-fprules
+++ b/gcc/config/rs6000/t-fprules
@@ -21,6 +21,7 @@ MULTILIB_MATCHES_FLOAT	= msoft-float=mcpu?401 \
 			  msoft-float=mcpu?405 \
 			  msoft-float=mcpu?440 \
 			  msoft-float=mcpu?464 \
+			  msoft-float=mcpu?476 \
 			  msoft-float=mcpu?ec603e \
 			  msoft-float=mcpu?801 \
 			  msoft-float=mcpu?821 \
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 66a367a7b62..773d710fa3f 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -19,6 +19,8 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
+TM_H += $(srcdir)/config/rs6000/rs6000-builtin.def
+
 rs6000.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
   $(RTL_H) $(REGS_H) hard-reg-set.h \
   real.h insn-config.h conditions.h insn-attr.h flags.h $(RECOG_H) \
@@ -56,6 +58,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rios1.md \
 	$(srcdir)/config/rs6000/power7.md \
 	$(srcdir)/config/rs6000/cell.md \
 	$(srcdir)/config/rs6000/xfpu.md \
+	$(srcdir)/config/rs6000/a2.md \
 	$(srcdir)/config/rs6000/predicates.md \
 	$(srcdir)/config/rs6000/constraints.md \
 	$(srcdir)/config/rs6000/darwin.md \
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
index cfd11eb6d79..c302ad2015d 100644
--- a/gcc/config/rs6000/vxworks.h
+++ b/gcc/config/rs6000/vxworks.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  Vxworks PowerPC version.
-   Copyright (C) 1996, 2000, 2002, 2003, 2004, 2005, 2007
+   Copyright (C) 1996, 2000, 2002, 2003, 2004, 2005, 2007, 2009
    Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
@@ -68,6 +68,8 @@ along with GCC; see the file COPYING3.  If not see
    %{mcpu=403 : -DCPU=PPC403  ; \
      mcpu=405 : -DCPU=PPC405  ; \
      mcpu=440 : -DCPU=PPC440  ; \
+     mcpu=464 : -DCPU=PPC464  ; \
+     mcpu=476 : -DCPU=PPC476  ; \
      mcpu=603 : -DCPU=PPC603  ; \
      mcpu=604 : -DCPU=PPC604  ; \
      mcpu=860 : -DCPU=PPC860  ; \
diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md
new file mode 100644
index 00000000000..52bf7df3621
--- /dev/null
+++ b/gcc/config/rx/constraints.md
@@ -0,0 +1,81 @@
+;; Constraint definitions for Renesas RX.
+;; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constraint "Symbol"
+  "@internal Constraint on the type of rtx allowed in call insns"
+  (match_test "GET_CODE (op) == SYMBOL_REF")
+)
+
+
+(define_constraint "Int08"
+  "@internal A signed or unsigned 8-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 8), (1 << 8) - 1)")
+  )
+)
+
+(define_constraint "Sint08"
+  "@internal A signed 8-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 7), (1 << 7) - 1)")
+  )
+)
+
+(define_constraint "Sint16"
+  "@internal A signed 16-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 15), (1 << 15) - 1)")
+  )
+)
+
+(define_constraint "Sint24"
+  "@internal A signed 24-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 23), (1 << 23) - 1)")
+  )
+)
+
+;; This constraint is used by the SUBSI3 pattern because the
+;; RX SUB instruction can only take a 4-bit unsigned integer
+;; value.  Also used by the MVTIPL instruction.
+(define_constraint "Uint04"
+  "@internal An unsigned 4-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 15)")
+  )
+)
+
+;; This is used in arithmetic and logic instructions for
+;; a source operand that lies in memory and which satisfies
+;; rx_restricted_memory_address().
+
+(define_memory_constraint "Q"
+  "A MEM which only uses REG or REG+INT addressing."
+  (and (match_code "mem")
+       (ior (match_code "reg" "0")
+	    (and (match_code "plus" "0")
+	         (and (match_code "reg,subreg" "00")
+		      (match_code "const_int" "01")
+		 )
+	    )
+       )
+  )
+)
diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md
new file mode 100644
index 00000000000..d7a363ebb88
--- /dev/null
+++ b/gcc/config/rx/predicates.md
@@ -0,0 +1,288 @@
+;; Predicate definitions for Renesas RX.
+;; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+
+;; Check that the operand is suitable for a call insn.
+;; Only registers and symbol refs are allowed.
+
+(define_predicate "rx_call_operand"
+  (match_code "symbol_ref,reg")
+)
+
+;; For sibcall operations we can only use a symbolic address.
+
+(define_predicate "rx_symbolic_call_operand"
+  (match_code "symbol_ref")
+)
+
+;; Check that the operand is suitable for a shift insn
+;; Only small integers or a value in a register are permitted.
+
+(define_predicate "rx_shift_operand"
+  (match_code "const_int,reg")
+  {
+    if (CONST_INT_P (op))
+      return IN_RANGE (INTVAL (op), 0, 31);
+    return true;
+  }
+)
+
+;; Check that the operand is suitable as the source operand
+;; for a logic or arithmeitc instruction.  Registers, integers
+;; and a restricted subset of memory addresses are allowed.
+
+(define_predicate "rx_source_operand"
+  (match_code "const_int,reg,mem")
+  {
+    if (CONST_INT_P (op))
+      return rx_is_legitimate_constant (op);
+
+    if (! MEM_P (op))
+      return true;
+      
+    /* Do not allow size conversions whilst accessing memory.  */
+    if (GET_MODE (op) != mode)
+      return false;
+
+    return rx_is_restricted_memory_address (XEXP (op, 0), mode);
+  }
+)
+
+;; Check that the operand is suitable as the source operand
+;; for a comparison instruction.  This is the same as
+;; rx_source_operand except that SUBREGs are allowed but
+;; CONST_INTs are not.
+
+(define_predicate "rx_compare_operand"
+  (match_code "subreg,reg,mem")
+  {
+    if (GET_CODE (op) == SUBREG)
+      return REG_P (XEXP (op, 0));
+    
+    if (! MEM_P (op))
+      return true;
+
+    return rx_is_restricted_memory_address (XEXP (op, 0), mode);
+  }
+)
+
+;; Return true if OP is a store multiple operation.  This looks like:
+;;
+;;   [(set (SP) (MINUS (SP) (INT)))
+;;    (set (MEM (SP)) (REG))
+;;    (set (MEM (MINUS (SP) (INT))) (REG)) {optionally repeated}
+;;   ]
+
+(define_special_predicate "rx_store_multiple_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int src_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != MINUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_SRC (element))
+      || GET_MODE (SET_SRC (element)) != SImode
+      || ! MEM_P (SET_DEST (element))
+      || GET_MODE (SET_DEST (element)) != SImode
+      || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+      || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+      ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+      || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+        != GET_MODE_SIZE (SImode))
+    return false;
+
+  src_regno = REGNO (SET_SRC (element));
+
+  /* Check that the remaining elements use SP-<disp>
+     addressing and decreasing register numbers.  */
+  for (i = 2; i < count; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || REGNO (SET_SRC (element)) != src_regno - (i - 1)
+	  || ! MEM_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+          || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+	     != i * GET_MODE_SIZE (SImode))
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a load multiple operation.
+;; This looks like:
+;;  [(set (SP) (PLUS (SP) (INT)))
+;;   (set (REG) (MEM (SP)))
+;;   (set (REG) (MEM (PLUS (SP) (INT)))) {optionally repeated}
+;;  ]
+
+(define_special_predicate "rx_load_multiple_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != PLUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      || ! MEM_P (SET_SRC (element))
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (element));
+
+  /* Check that the remaining elements use SP+<disp>
+     addressing and incremental register numbers.  */
+  for (i = 2; i < count; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || REGNO (SET_DEST (element)) != dest_regno + (i - 1)
+	  || ! MEM_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (element), 0)) != PLUS
+          || ! REG_P (XEXP (XEXP (SET_SRC (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_SRC (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_SRC (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (element), 0), 1))
+	     != (i - 1) * GET_MODE_SIZE (SImode))
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a pop-and-return load multiple operation.
+;; This looks like:
+;;  [(set (SP) (PLUS (SP) (INT)))
+;;   (set (REG) (MEM (SP)))
+;;   (set (REG) (MEM (PLUS (SP) (INT)))) {optional and possibly repeated}
+;;   (return)
+;;  ]
+
+(define_special_predicate "rx_rtsd_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != PLUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      || ! MEM_P (SET_SRC (element))
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (element));
+
+  /* Check that the remaining elements, if any, and except
+     for the last one, use SP+<disp> addressing and incremental
+     register numbers.  */
+  for (i = 2; i < count - 1; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || REGNO (SET_DEST (element)) != dest_regno + (i - 1)
+	  || ! MEM_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (element), 0)) != PLUS
+          || ! REG_P (XEXP (XEXP (SET_SRC (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_SRC (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_SRC (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (element), 0), 1))
+	     != (i - 1) * GET_MODE_SIZE (SImode))
+	return false;
+    }
+
+  /* The last element must be a RETURN.  */    
+  element = XVECEXP (op, 0, count - 1);
+  return GET_CODE (element) == RETURN;
+})
diff --git a/gcc/config/rx/rx-protos.h b/gcc/config/rx/rx-protos.h
new file mode 100644
index 00000000000..5c37fe0a83c
--- /dev/null
+++ b/gcc/config/rx/rx-protos.h
@@ -0,0 +1,52 @@
+/* Exported function prototypes from the Renesas RX backend.
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RX_PROTOS_H
+#define GCC_RX_PROTOS_H
+
+/* A few abbreviations to make the prototypes shorter.  */
+#define Mmode 	enum machine_mode
+#define Fargs	CUMULATIVE_ARGS
+
+extern void		rx_conditional_register_usage (void);
+extern void		rx_expand_prologue (void);
+extern int		rx_initial_elimination_offset (int, int);
+
+#ifdef RTX_CODE
+extern void             rx_emit_stack_popm (rtx *, bool);
+extern void             rx_emit_stack_pushm (rtx *);
+extern void		rx_expand_epilogue (bool);
+extern bool		rx_expand_insv (rtx *);
+extern const char *	rx_gen_cond_branch_template (rtx, bool);
+extern char *		rx_gen_move_template (rtx *, bool);
+extern bool		rx_is_legitimate_constant (rtx);
+extern bool 		rx_is_mode_dependent_addr (rtx);
+extern bool		rx_is_restricted_memory_address (rtx, Mmode);
+extern void		rx_notice_update_cc (rtx body, rtx insn);
+extern void		rx_print_operand (FILE *, rtx, int);
+extern void		rx_print_operand_address (FILE *, rtx);
+#endif
+
+#ifdef TREE_CODE
+extern unsigned int     rx_function_arg_size (Mmode, const_tree);
+extern struct rtx_def * rx_function_arg (Fargs *, Mmode, const_tree, bool);
+#endif
+
+#endif /* GCC_RX_PROTOS_H */
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
new file mode 100644
index 00000000000..885f52581de
--- /dev/null
+++ b/gcc/config/rx/rx.c
@@ -0,0 +1,2517 @@
+/* Subroutines used for code generation on Renesas RX processors.
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* To Do:
+
+ * Re-enable memory-to-memory copies and fix up reload.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "recog.h"
+#include "toplev.h"
+#include "reload.h"
+#include "df.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+
+enum rx_cpu_types  rx_cpu_type = RX600;
+
+/* Return true if OP is a reference to an object in a small data area.  */
+
+static bool
+rx_small_data_operand (rtx op)
+{
+  if (rx_small_data_limit == 0)
+    return false;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_SMALL_P (op);
+
+  return false;
+}
+
+static bool
+rx_is_legitimate_address (Mmode mode, rtx x, bool strict ATTRIBUTE_UNUSED)
+{
+  if (RTX_OK_FOR_BASE (x, strict))
+    /* Register Indirect.  */
+    return true;
+
+  if (GET_MODE_SIZE (mode) == 4
+      && (GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC))
+    /* Pre-decrement Register Indirect or
+       Post-increment Register Indirect.  */
+    return RTX_OK_FOR_BASE (XEXP (x, 0), strict);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx arg1 = XEXP (x, 0);
+      rtx arg2 = XEXP (x, 1);
+      rtx index = NULL_RTX;
+
+      if (REG_P (arg1) && RTX_OK_FOR_BASE (arg1, strict))
+	index = arg2;
+      else if (REG_P (arg2) && RTX_OK_FOR_BASE (arg2, strict))
+	index = arg1;
+      else
+	return false;
+
+      switch (GET_CODE (index))
+	{
+	case CONST_INT:
+	  {
+	    /* Register Relative: REG + INT.
+	       Only positive, mode-aligned, mode-sized
+	       displacements are allowed.  */
+	    HOST_WIDE_INT val = INTVAL (index);
+	    int factor;
+
+	    if (val < 0)
+	      return false;
+	    
+	    switch (GET_MODE_SIZE (mode))
+	      {
+	      default: 
+	      case 4: factor = 4; break;
+	      case 2: factor = 2; break;
+	      case 1: factor = 1; break;
+	      }
+
+	    if (val > (65535 * factor))
+	      return false;
+	    return (val % factor) == 0;
+	  }
+
+	case REG:
+	  /* Unscaled Indexed Register Indirect: REG + REG
+	     Size has to be "QI", REG has to be valid.  */
+	  return GET_MODE_SIZE (mode) == 1 && RTX_OK_FOR_BASE (index, strict);
+
+	case MULT:
+	  {
+	    /* Scaled Indexed Register Indirect: REG + (REG * FACTOR)
+	       Factor has to equal the mode size, REG has to be valid.  */
+	    rtx factor;
+
+	    factor = XEXP (index, 1);
+	    index = XEXP (index, 0);
+
+	    return REG_P (index)
+	      && RTX_OK_FOR_BASE (index, strict)
+	      && CONST_INT_P (factor)
+	      && GET_MODE_SIZE (mode) == INTVAL (factor);
+	  }
+
+	default:
+	  return false;
+	}
+    }
+
+  /* Small data area accesses turn into register relative offsets.  */
+  return rx_small_data_operand (x);
+}
+
+/* Returns TRUE for simple memory addreses, ie ones
+   that do not involve register indirect addressing
+   or pre/post increment/decrement.  */
+
+bool
+rx_is_restricted_memory_address (rtx mem, enum machine_mode mode)
+{
+  rtx base, index;
+
+  if (! rx_is_legitimate_address
+      (mode, mem, reload_in_progress || reload_completed))
+    return false;
+
+  switch (GET_CODE (mem))
+    {
+    case REG:
+      /* Simple memory addresses are OK.  */
+      return true;
+
+    case PRE_DEC:
+    case POST_INC:
+      return false;
+
+    case PLUS:
+      /* Only allow REG+INT addressing.  */
+      base = XEXP (mem, 0);
+      index = XEXP (mem, 1);
+
+      return RX_REG_P (base) && CONST_INT_P (index);
+
+    case SYMBOL_REF:
+      /* Can happen when small data is being supported.
+         Assume that it will be resolved into GP+INT.  */
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+bool
+rx_is_mode_dependent_addr (rtx addr)
+{
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+      /* --REG and REG++ only work in SImode.  */
+    case PRE_DEC:
+    case POST_INC:
+      return true;
+
+    case MINUS:
+    case PLUS:
+      if (! REG_P (XEXP (addr, 0)))
+	return true;
+
+      addr = XEXP (addr, 1);
+
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	  /* REG+REG only works in SImode.  */
+	  return true;
+
+	case CONST_INT:
+	  /* REG+INT is only mode independent if INT is a
+	     multiple of 4, positive and will fit into 8-bits.  */
+	  if (((INTVAL (addr) & 3) == 0)
+	      && IN_RANGE (INTVAL (addr), 4, 252))
+	    return false;
+	  return true;
+
+	case SYMBOL_REF:
+	case LABEL_REF:
+	  return true;
+
+	case MULT:
+	  gcc_assert (REG_P (XEXP (addr, 0)));
+	  gcc_assert (CONST_INT_P (XEXP (addr, 1)));
+	  /* REG+REG*SCALE is always mode dependent.  */
+	  return true;
+
+	default:
+	  /* Not recognized, so treat as mode dependent.  */
+	  return true;
+	}
+
+    case CONST_INT:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case REG:
+      /* These are all mode independent.  */
+      return false;
+
+    default:
+      /* Everything else is unrecognized,
+	 so treat as mode dependent.  */
+      return true;
+    }
+}
+
+/* A C compound statement to output to stdio stream FILE the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  */
+
+void
+rx_print_operand_address (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "[");
+      rx_print_operand (file, addr, 0);
+      fprintf (file, "]");
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "[-");
+      rx_print_operand (file, XEXP (addr, 0), 0);
+      fprintf (file, "]");
+      break;
+
+    case POST_INC:
+      fprintf (file, "[");
+      rx_print_operand (file, XEXP (addr, 0), 0);
+      fprintf (file, "+]");
+      break;
+
+    case PLUS:
+      {
+	rtx arg1 = XEXP (addr, 0);
+	rtx arg2 = XEXP (addr, 1);
+	rtx base, index;
+
+	if (REG_P (arg1) && RTX_OK_FOR_BASE (arg1, true))
+	  base = arg1, index = arg2;
+	else if (REG_P (arg2) && RTX_OK_FOR_BASE (arg2, true))
+	  base = arg2, index = arg1;
+	else
+	  {
+	    rx_print_operand (file, arg1, 0);
+	    fprintf (file, " + ");
+	    rx_print_operand (file, arg2, 0);
+	    break;
+	  }
+
+	if (REG_P (index) || GET_CODE (index) == MULT)
+	  {
+	    fprintf (file, "[");
+	    rx_print_operand (file, index, 'A');
+	    fprintf (file, ",");
+	  }
+	else /* GET_CODE (index) == CONST_INT  */
+	  {
+	    rx_print_operand (file, index, 'A');
+	    fprintf (file, "[");
+	  }
+	rx_print_operand (file, base, 0);
+	fprintf (file, "]");
+	break;
+      }
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST:
+      fprintf (file, "#");
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+static void
+rx_print_integer (FILE * file, HOST_WIDE_INT val)
+{
+  if (IN_RANGE (val, -64, 64))
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+  else
+    fprintf (file,
+	     TARGET_AS100_SYNTAX
+	     ? "0%" HOST_WIDE_INT_PRINT "xH" : HOST_WIDE_INT_PRINT_HEX,
+	     val);
+}
+
+static bool
+rx_assemble_integer (rtx x, unsigned int size, int is_aligned)
+{
+  const char *  op = integer_asm_op (size, is_aligned);
+
+  if (! CONST_INT_P (x))
+    return default_assemble_integer (x, size, is_aligned);
+
+  if (op == NULL)
+    return false;
+  fputs (op, asm_out_file);
+
+  rx_print_integer (asm_out_file, INTVAL (x));
+  fputc ('\n', asm_out_file);
+  return true;
+}
+
+
+int rx_float_compare_mode;
+
+/* Handles the insertion of a single operand into the assembler output.
+   The %<letter> directives supported are:
+
+     %A  Print an operand without a leading # character.
+     %B  Print an integer comparison name.
+     %C  Print a control register name.
+     %F  Print a condition code flag name.
+     %H  Print high part of a DImode register, integer or address.
+     %L  Print low part of a DImode register, integer or address.
+     %Q  If the operand is a MEM, then correctly generate
+         register indirect or register relative addressing.  */
+
+void
+rx_print_operand (FILE * file, rtx op, int letter)
+{
+  switch (letter)
+    {
+    case 'A':
+      /* Print an operand without a leading #.  */
+      if (MEM_P (op))
+	op = XEXP (op, 0);
+
+      switch (GET_CODE (op))
+	{
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  output_addr_const (file, op);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "%ld", (long) INTVAL (op));
+	  break;
+	default:
+	  rx_print_operand (file, op, 0);
+	  break;
+	}
+      break;
+
+    case 'B':
+      switch (GET_CODE (op))
+	{
+	case LT:  fprintf (file, "lt"); break;
+	case GE:  fprintf (file, "ge"); break;
+	case GT:  fprintf (file, "gt"); break;
+	case LE:  fprintf (file, "le"); break;
+	case GEU: fprintf (file, "geu"); break;
+	case LTU: fprintf (file, "ltu"); break;
+	case GTU: fprintf (file, "gtu"); break;
+	case LEU: fprintf (file, "leu"); break;
+	case EQ:  fprintf (file, "eq"); break;
+	case NE:  fprintf (file, "ne"); break;
+	default:  debug_rtx (op); gcc_unreachable ();
+	}
+      break;
+
+    case 'C':
+      gcc_assert (CONST_INT_P (op));
+      switch (INTVAL (op))
+	{
+	case 0:   fprintf (file, "psw"); break;
+	case 2:   fprintf (file, "usp"); break;
+	case 3:   fprintf (file, "fpsw"); break;
+	case 4:   fprintf (file, "cpen"); break;
+	case 8:   fprintf (file, "bpsw"); break;
+	case 9:   fprintf (file, "bpc"); break;
+	case 0xa: fprintf (file, "isp"); break;
+	case 0xb: fprintf (file, "fintv"); break;
+	case 0xc: fprintf (file, "intb"); break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'F':
+      gcc_assert (CONST_INT_P (op));
+      switch (INTVAL (op))
+	{
+	case 0: case 'c': case 'C': fprintf (file, "C"); break;
+	case 1:	case 'z': case 'Z': fprintf (file, "Z"); break;
+	case 2: case 's': case 'S': fprintf (file, "S"); break;
+	case 3: case 'o': case 'O': fprintf (file, "O"); break;
+	case 8: case 'i': case 'I': fprintf (file, "I"); break;
+	case 9: case 'u': case 'U': fprintf (file, "U"); break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'H':
+      if (REG_P (op))
+	fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
+      else if (CONST_INT_P (op))
+	{
+	  HOST_WIDE_INT v = INTVAL (op);
+
+	  fprintf (file, "#");
+	  /* Trickery to avoid problems with shifting 32 bits at a time.  */
+	  v = v >> 16;
+	  v = v >> 16;	  
+	  rx_print_integer (file, v);
+	}
+      else
+	{
+	  gcc_assert (MEM_P (op));
+
+	  if (! WORDS_BIG_ENDIAN)
+	    op = adjust_address (op, SImode, 4);
+	  output_address (XEXP (op, 0));
+	}
+      break;
+
+    case 'L':
+      if (REG_P (op))
+	fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
+      else if (CONST_INT_P (op))
+	{
+	  fprintf (file, "#");
+	  rx_print_integer (file, INTVAL (op) & 0xffffffff);
+	}
+      else
+	{
+	  gcc_assert (MEM_P (op));
+
+	  if (WORDS_BIG_ENDIAN)
+	    op = adjust_address (op, SImode, 4);
+	  output_address (XEXP (op, 0));
+	}
+      break;
+
+    case 'Q':
+      if (MEM_P (op))
+	{
+	  HOST_WIDE_INT offset;
+
+	  op = XEXP (op, 0);
+
+	  if (REG_P (op))
+	    offset = 0;
+	  else if (GET_CODE (op) == PLUS)
+	    {
+	      rtx displacement;
+
+	      if (REG_P (XEXP (op, 0)))
+		{
+		  displacement = XEXP (op, 1);
+		  op = XEXP (op, 0);
+		}
+	      else
+		{
+		  displacement = XEXP (op, 0);
+		  op = XEXP (op, 1);
+		  gcc_assert (REG_P (op));
+		}
+
+	      gcc_assert (CONST_INT_P (displacement));
+	      offset = INTVAL (displacement);
+	      gcc_assert (offset >= 0);
+
+	      fprintf (file, "%ld", offset);
+	    }
+	  else
+	    gcc_unreachable ();
+
+	  fprintf (file, "[");
+	  rx_print_operand (file, op, 0);
+	  fprintf (file, "].");
+
+	  switch (GET_MODE_SIZE (GET_MODE (op)))
+	    {
+	    case 1:
+	      gcc_assert (offset < 65535 * 1);
+	      fprintf (file, "B");
+	      break;
+	    case 2:
+	      gcc_assert (offset % 2 == 0);
+	      gcc_assert (offset < 65535 * 2);
+	      fprintf (file, "W");
+	      break;
+	    default:
+	      gcc_assert (offset % 4 == 0);
+	      gcc_assert (offset < 65535 * 4);
+	      fprintf (file, "L");
+	      break;
+	    }
+	  break;
+	}
+
+      /* Fall through.  */
+
+    default:
+      switch (GET_CODE (op))
+	{
+	case MULT:
+	  /* Should be the scaled part of an
+	     indexed register indirect address.  */
+	  {
+	    rtx base = XEXP (op, 0);
+	    rtx index = XEXP (op, 1);
+
+	    /* Check for a swaped index register and scaling factor.
+	       Not sure if this can happen, but be prepared to handle it.  */
+	    if (CONST_INT_P (base) && REG_P (index))
+	      {
+		rtx tmp = base;
+		base = index;
+		index = tmp;
+	      }
+
+	    gcc_assert (REG_P (base));
+	    gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
+	    gcc_assert (CONST_INT_P (index));
+	    /* Do not try to verify the value of the scalar as it is based
+	       on the mode of the MEM not the mode of the MULT.  (Which
+	       will always be SImode).  */
+	    fprintf (file, "%s", reg_names [REGNO (base)]);
+	    break;
+	  }
+
+	case MEM:
+	  output_address (XEXP (op, 0));
+	  break;
+
+	case PLUS:
+	  output_address (op);
+	  break;
+
+	case REG:
+	  gcc_assert (REGNO (op) < FIRST_PSEUDO_REGISTER);
+	  fprintf (file, "%s", reg_names [REGNO (op)]);
+	  break;
+
+	case SUBREG:
+	  gcc_assert (subreg_regno (op) < FIRST_PSEUDO_REGISTER);
+	  fprintf (file, "%s", reg_names [subreg_regno (op)]);
+	  break;
+
+	  /* This will only be single precision....  */
+	case CONST_DOUBLE:
+	  {
+	    unsigned long val;
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, TARGET_AS100_SYNTAX ? "#0%lxH" : "#0x%lx", val);
+	    break;
+	  }
+
+	case CONST_INT:
+	  fprintf (file, "#");
+	  rx_print_integer (file, INTVAL (op));
+	  break;
+
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	case CODE_LABEL:
+	case UNSPEC:
+	  rx_print_operand_address (file, op);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    }
+}
+
+/* Returns an assembler template for a move instruction.  */
+
+char *
+rx_gen_move_template (rtx * operands, bool is_movu)
+{
+  static char  template [64];
+  const char * extension = TARGET_AS100_SYNTAX ? ".L" : "";
+  const char * src_template;
+  const char * dst_template;
+  rtx          dest = operands[0];
+  rtx          src  = operands[1];
+
+  /* Decide which extension, if any, should be given to the move instruction.  */
+  switch (CONST_INT_P (src) ? GET_MODE (dest) : GET_MODE (src))
+    {
+    case QImode:
+      /* The .B extension is not valid when
+	 loading an immediate into a register.  */
+      if (! REG_P (dest) || ! CONST_INT_P (src))
+	extension = ".B";
+      break;
+    case HImode:
+      if (! REG_P (dest) || ! CONST_INT_P (src))
+	/* The .W extension is not valid when
+	   loading an immediate into a register.  */
+	extension = ".W";
+      break;
+    case SFmode:
+    case SImode:
+      extension = ".L";
+      break;
+    case VOIDmode:
+      /* This mode is used by constants.  */
+      break;
+    default:
+      debug_rtx (src);
+      gcc_unreachable ();
+    }
+
+  if (MEM_P (src) && rx_small_data_operand (XEXP (src, 0)))
+    src_template = "%%gp(%A1)[r13]";
+  else
+    src_template = "%1";
+
+  if (MEM_P (dest) && rx_small_data_operand (XEXP (dest, 0)))
+    dst_template = "%%gp(%A0)[r13]";
+  else
+    dst_template = "%0";
+
+  sprintf (template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
+	   extension, src_template, dst_template);
+  return template;
+}
+
+/* Returns an assembler template for a conditional branch instruction.  */
+
+const char *
+rx_gen_cond_branch_template (rtx condition, bool reversed)
+{
+  enum rtx_code code = GET_CODE (condition);
+
+
+  if ((cc_status.flags & CC_NO_OVERFLOW) && ! rx_float_compare_mode)
+    gcc_assert (code != GT && code != GE && code != LE && code != LT);
+
+  if ((cc_status.flags & CC_NO_CARRY) || rx_float_compare_mode)
+    gcc_assert (code != GEU && code != GTU && code != LEU && code != LTU);
+
+  if (reversed)
+    {
+      if (rx_float_compare_mode)
+	code = reverse_condition_maybe_unordered (code);
+      else
+	code = reverse_condition (code);
+    }
+
+  /* We do not worry about encoding the branch length here as GAS knows
+     how to choose the smallest version, and how to expand a branch that
+     is to a destination that is out of range.  */
+
+  switch (code)
+    {
+    case UNEQ:	    return "bo\t1f\n\tbeq\t%0\n1:";
+    case LTGT:	    return "bo\t1f\n\tbne\t%0\n1:";
+    case UNLT:      return "bo\t1f\n\tbn\t%0\n1:";
+    case UNGE:      return "bo\t1f\n\tbpz\t%0\n1:";
+    case UNLE:      return "bo\t1f\n\tbgt\t1f\n\tbra\t%0\n1:";
+    case UNGT:      return "bo\t1f\n\tble\t1f\n\tbra\t%0\n1:";
+    case UNORDERED: return "bo\t%0";
+    case ORDERED:   return "bno\t%0";
+
+    case LT:        return rx_float_compare_mode ? "bn\t%0" : "blt\t%0";
+    case GE:        return rx_float_compare_mode ? "bpz\t%0" : "bge\t%0";
+    case GT:        return "bgt\t%0";
+    case LE:        return "ble\t%0";
+    case GEU:       return "bgeu\t%0";
+    case LTU:       return "bltu\t%0";
+    case GTU:       return "bgtu\t%0";
+    case LEU:       return "bleu\t%0";
+    case EQ:        return "beq\t%0";
+    case NE:        return "bne\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return VALUE rounded up to the next ALIGNMENT boundary.  */
+
+static inline unsigned int
+rx_round_up (unsigned int value, unsigned int alignment)
+{
+  alignment -= 1;
+  return (value + alignment) & (~ alignment);
+}
+
+/* Return the number of bytes in the argument registers
+   occupied by an argument of type TYPE and mode MODE.  */
+
+unsigned int
+rx_function_arg_size (Mmode mode, const_tree type)
+{
+  unsigned int num_bytes;
+
+  num_bytes = (mode == BLKmode)
+    ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  return rx_round_up (num_bytes, UNITS_PER_WORD);
+}
+
+#define NUM_ARG_REGS		4
+#define MAX_NUM_ARG_BYTES	(NUM_ARG_REGS * UNITS_PER_WORD)
+
+/* Return an RTL expression describing the register holding a function
+   parameter of mode MODE and type TYPE or NULL_RTX if the parameter should
+   be passed on the stack.  CUM describes the previous parameters to the
+   function and NAMED is false if the parameter is part of a variable
+   parameter list, or the last named parameter before the start of a
+   variable parameter list.  */
+
+rtx
+rx_function_arg (Fargs * cum, Mmode mode, const_tree type, bool named)
+{
+  unsigned int next_reg;
+  unsigned int bytes_so_far = *cum;
+  unsigned int size;
+  unsigned int rounded_size;
+
+  /* An exploded version of rx_function_arg_size.  */
+  size = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+
+  rounded_size = rx_round_up (size, UNITS_PER_WORD);
+
+  /* Don't pass this arg via registers if there
+     are insufficient registers to hold all of it.  */
+  if (rounded_size + bytes_so_far > MAX_NUM_ARG_BYTES)
+    return NULL_RTX;
+
+  /* Unnamed arguments and the last named argument in a
+     variadic function are always passed on the stack.  */
+  if (!named)
+    return NULL_RTX;
+
+  /* Structures must occupy an exact number of registers,
+     otherwise they are passed on the stack.  */
+  if ((type == NULL || AGGREGATE_TYPE_P (type))
+      && (size % UNITS_PER_WORD) != 0)
+    return NULL_RTX;
+
+  next_reg = (bytes_so_far / UNITS_PER_WORD) + 1;
+
+  return gen_rtx_REG (mode, next_reg);
+}
+
+/* Return an RTL describing where a function return value of type RET_TYPE
+   is held.  */
+
+static rtx
+rx_function_value (const_tree ret_type,
+		   const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		   bool       outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (ret_type), FUNC_RETURN_REGNUM);
+}
+
+static bool
+rx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+
+  if (TYPE_MODE (type) != BLKmode
+      && ! AGGREGATE_TYPE_P (type))
+    return false;
+
+  size = int_size_in_bytes (type);
+  /* Large structs and those whose size is not an
+     exact multiple of 4 are returned in memory.  */
+  return size < 1
+    || size > 16
+    || (size % UNITS_PER_WORD) != 0;
+}
+
+static rtx
+rx_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
+		     int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, STRUCT_VAL_REGNUM);
+}
+
+static bool
+rx_return_in_msb (const_tree valtype)
+{
+  return TARGET_BIG_ENDIAN_DATA
+    && (AGGREGATE_TYPE_P (valtype) || TREE_CODE (valtype) == COMPLEX_TYPE);
+}
+
+/* Returns true if the provided function has the specified attribute.  */
+
+static inline bool
+has_func_attr (const_tree decl, const char * func_attr)
+{
+  if (decl == NULL_TREE)
+    decl = current_function_decl;
+
+  return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE;
+}
+
+/* Returns true if the provided function has the "fast_interrupt" attribute.  */
+
+static inline bool
+is_fast_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "fast_interrupt");
+}
+
+/* Returns true if the provided function has the "interrupt" attribute.  */
+
+static inline bool
+is_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "interrupt");
+}
+
+/* Returns true if the provided function has the "naked" attribute.  */
+
+static inline bool
+is_naked_func (const_tree decl)
+{
+  return has_func_attr (decl, "naked");
+}
+
+static bool use_fixed_regs = false;
+
+void
+rx_conditional_register_usage (void)
+{
+  static bool using_fixed_regs = false;
+
+  if (rx_small_data_limit > 0)
+    fixed_regs[GP_BASE_REGNUM] = call_used_regs [GP_BASE_REGNUM] = 1;
+
+  if (use_fixed_regs != using_fixed_regs)
+    {
+      static char saved_fixed_regs[FIRST_PSEUDO_REGISTER];
+      static char saved_call_used_regs[FIRST_PSEUDO_REGISTER];
+
+      if (use_fixed_regs)
+	{
+	  unsigned int switched = 0;
+	  unsigned int r;
+
+	  /* This is for fast interrupt handlers.  Any register in
+	     the range r10 to r13 (inclusive) that is currently
+	     marked as fixed is now a viable, call-saved register.
+	     All other registers are fixed.  */
+	  memcpy (saved_fixed_regs, fixed_regs, sizeof fixed_regs);
+	  memcpy (saved_call_used_regs, call_used_regs, sizeof call_used_regs);
+	  
+	  for (r = 1; r < 10; r++)
+	    fixed_regs[r] = call_used_regs[r] = 1;
+	  
+	  for (r = 10; r <= 13; r++)
+	    if (fixed_regs[r])
+	      {
+		fixed_regs[r] = 0;
+		call_used_regs[r] = 1;
+		++ switched;
+	      }
+	    else
+	      {
+		fixed_regs[r] = 1;
+		call_used_regs[r] = 1;
+	      }
+
+	  fixed_regs[14] = call_used_regs[14] = 1;
+	  fixed_regs[15] = call_used_regs[15] = 1;
+
+	  if (switched == 0)
+	    {
+	      static bool warned = false;
+
+	      if (! warned)
+		{
+		  warning (0, "no fixed registers available "
+			   "for use by fast interrupt handler");
+		  warned = true;
+		}
+	    }
+	}
+      else
+	{
+	  /* Restore the normal register masks.  */
+	  memcpy (fixed_regs, saved_fixed_regs, sizeof fixed_regs);
+	  memcpy (call_used_regs, saved_call_used_regs, sizeof call_used_regs);
+	}
+
+      using_fixed_regs = use_fixed_regs;
+    }
+}
+
+/* Perform any actions necessary before starting to compile FNDECL.
+   For the RX we use this to make sure that we have the correct
+   set of register masks selected.  If FNDECL is NULL then we are
+   compiling top level things.  */
+
+static void
+rx_set_current_function (tree fndecl)
+{
+  /* Remember the last target of rx_set_current_function.  */
+  static tree rx_previous_fndecl;
+  bool prev_was_fast_interrupt;
+  bool current_is_fast_interrupt;
+
+  /* Only change the context if the function changes.  This hook is called
+     several times in the course of compiling a function, and we don't want
+     to slow things down too much or call target_reinit when it isn't safe.  */
+  if (fndecl == rx_previous_fndecl)
+    return;
+
+  prev_was_fast_interrupt
+    = rx_previous_fndecl
+    ? is_fast_interrupt_func (rx_previous_fndecl) : false;
+
+  current_is_fast_interrupt
+    = fndecl ? is_fast_interrupt_func (fndecl) : false;
+      
+  if (prev_was_fast_interrupt != current_is_fast_interrupt)
+    {
+      use_fixed_regs = current_is_fast_interrupt;
+      target_reinit ();
+    }
+
+  rx_previous_fndecl = fndecl;
+}
+
+/* Typical stack layout should looks like this after the function's prologue:
+
+                            |    |
+                              --                       ^
+                            |    | \                   |
+                            |    |   arguments saved   | Increasing
+                            |    |   on the stack      |  addresses
+    PARENT   arg pointer -> |    | /
+  -------------------------- ---- -------------------
+    CHILD                   |ret |   return address
+                              --
+                            |    | \
+                            |    |   call saved
+                            |    |   registers
+			    |    | /
+                              --
+                            |    | \
+                            |    |   local
+                            |    |   variables
+        frame pointer ->    |    | /
+                              --
+                            |    | \
+                            |    |   outgoing          | Decreasing
+                            |    |   arguments         |  addresses
+   current stack pointer -> |    | /                   |
+  -------------------------- ---- ------------------   V
+                            |    |                 */
+
+static unsigned int
+bit_count (unsigned int x)
+{
+  const unsigned int m1 = 0x55555555;
+  const unsigned int m2 = 0x33333333;
+  const unsigned int m4 = 0x0f0f0f0f;
+
+  x -= (x >> 1) & m1;
+  x = (x & m2) + ((x >> 2) & m2);
+  x = (x + (x >> 4)) & m4;
+  x += x >>  8;
+
+  return (x + (x >> 16)) & 0x3f;
+}
+
+/* Returns either the lowest numbered and highest numbered registers that
+   occupy the call-saved area of the stack frame, if the registers are
+   stored as a contiguous block, or else a bitmask of the individual
+   registers if they are stored piecemeal.
+
+   Also computes the size of the frame and the size of the outgoing
+   arguments block (in bytes).  */
+
+static void
+rx_get_stack_layout (unsigned int * lowest,
+		     unsigned int * highest,
+		     unsigned int * register_mask,
+		     unsigned int * frame_size,
+		     unsigned int * stack_size)
+{
+  unsigned int reg;
+  unsigned int low;
+  unsigned int high;
+  unsigned int fixed_reg = 0;
+  unsigned int save_mask;
+  unsigned int pushed_mask;
+  unsigned int unneeded_pushes;
+
+  if (is_naked_func (NULL_TREE)
+      || is_fast_interrupt_func (NULL_TREE))
+    {
+      /* Naked functions do not create their own stack frame.
+	 Instead the programmer must do that for us.
+
+	 Fast interrupt handlers use fixed registers that have
+	 been epsecially released to the function, so they do
+	 not need or want a stack frame.  */
+      * lowest = 0;
+      * highest = 0;
+      * register_mask = 0;
+      * frame_size = 0;
+      * stack_size = 0;
+      return;
+    }
+
+  for (save_mask = high = low = 0, reg = 1; reg < FIRST_PSEUDO_REGISTER; reg++)
+    {
+      if (df_regs_ever_live_p (reg)
+	  && (! call_used_regs[reg]
+	      /* Even call clobbered registered must
+		 be pushed inside interrupt handlers.  */
+	      || is_interrupt_func (NULL_TREE)))
+	{
+	  if (low == 0)
+	    low = reg;
+	  high = reg;
+
+	  save_mask |= 1 << reg;
+	}
+
+      /* Remember if we see a fixed register
+	 after having found the low register.  */
+      if (low != 0 && fixed_reg == 0 && fixed_regs [reg])
+	fixed_reg = reg;
+    }
+
+  /* Decide if it would be faster fill in the call-saved area of the stack
+     frame using multiple PUSH instructions instead of a single PUSHM
+     instruction.
+
+     SAVE_MASK is a bitmask of the registers that must be stored in the
+     call-save area.  PUSHED_MASK is a bitmask of the registers that would
+     be pushed into the area if we used a PUSHM instruction.  UNNEEDED_PUSHES
+     is a bitmask of those registers in pushed_mask that are not in
+     save_mask.
+
+     We use a simple heuristic that says that it is better to use
+     multiple PUSH instructions if the number of unnecessary pushes is
+     greater than the number of necessary pushes.
+
+     We also use multiple PUSH instructions if there are any fixed registers
+     between LOW and HIGH.  The only way that this can happen is if the user
+     has specified --fixed-<reg-name> on the command line and in such
+     circumstances we do not want to touch the fixed registers at all.
+
+     FIXME: Is it worth improving this heuristic ?  */
+  pushed_mask = (-1 << low) & ~(-1 << (high + 1));
+  unneeded_pushes = (pushed_mask & (~ save_mask)) & pushed_mask;
+
+  if ((fixed_reg && fixed_reg <= high)
+      || (optimize_function_for_speed_p (cfun)
+	  && bit_count (save_mask) < bit_count (unneeded_pushes)))
+    {
+      /* Use multiple pushes.  */
+      * lowest = 0;
+      * highest = 0;
+      * register_mask = save_mask;
+    }
+  else
+    {
+      /* Use one push multiple instruction.  */
+      * lowest = low;
+      * highest = high;
+      * register_mask = 0;
+    }
+
+  * frame_size = rx_round_up
+    (get_frame_size (), STACK_BOUNDARY / BITS_PER_UNIT);
+
+  if (crtl->args.size > 0)
+    * frame_size += rx_round_up
+      (crtl->args.size, STACK_BOUNDARY / BITS_PER_UNIT);
+
+  * stack_size = rx_round_up
+    (crtl->outgoing_args_size, STACK_BOUNDARY / BITS_PER_UNIT);
+}
+
+/* Generate a PUSHM instruction that matches the given operands.  */
+
+void
+rx_emit_stack_pushm (rtx * operands)
+{
+  HOST_WIDE_INT last_reg;
+  rtx first_push;
+
+  gcc_assert (CONST_INT_P (operands[0]));
+  last_reg = (INTVAL (operands[0]) / UNITS_PER_WORD) - 1;
+
+  gcc_assert (GET_CODE (operands[1]) == PARALLEL);
+  first_push = XVECEXP (operands[1], 0, 1);
+  gcc_assert (SET_P (first_push));
+  first_push = SET_SRC (first_push);
+  gcc_assert (REG_P (first_push));
+
+  asm_fprintf (asm_out_file, "\tpushm\t%s-%s\n",
+	       reg_names [REGNO (first_push) - last_reg],
+	       reg_names [REGNO (first_push)]);
+}
+
+/* Generate a PARALLEL that will pass the rx_store_multiple_vector predicate.  */
+
+static rtx
+gen_rx_store_vector (unsigned int low, unsigned int high)
+{
+  unsigned int i;
+  unsigned int count = (high - low) + 2;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (SImode, stack_pointer_rtx,
+		 gen_rtx_MINUS (SImode, stack_pointer_rtx,
+				GEN_INT ((count - 1) * UNITS_PER_WORD)));
+
+  for (i = 0; i < count - 1; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (SImode,
+		   gen_rtx_MEM (SImode,
+				gen_rtx_MINUS (SImode, stack_pointer_rtx,
+					       GEN_INT ((i + 1) * UNITS_PER_WORD))),
+		   gen_rtx_REG (SImode, high - i));
+  return vector;
+}
+
+/* Mark INSN as being frame related.  If it is a PARALLEL
+   then mark each element as being frame related as well.  */
+
+static void
+mark_frame_related (rtx insn)
+{
+  RTX_FRAME_RELATED_P (insn) = 1;
+  insn = PATTERN (insn);
+
+  if (GET_CODE (insn) == PARALLEL)
+    {
+      unsigned int i;
+
+      for (i = 0; i < XVECLEN (insn, 0); i++)
+	RTX_FRAME_RELATED_P (XVECEXP (insn, 0, i)) = 1;
+    }
+}
+
+void
+rx_expand_prologue (void)
+{
+  unsigned int stack_size;
+  unsigned int frame_size;
+  unsigned int mask;
+  unsigned int low;
+  unsigned int high;
+  unsigned int reg;
+  rtx insn;
+
+  /* Naked functions use their own, programmer provided prologues.  */
+  if (is_naked_func (NULL_TREE)
+      /* Fast interrupt functions never use the stack.  */
+      || is_fast_interrupt_func (NULL_TREE))
+    return;
+
+  rx_get_stack_layout (& low, & high, & mask, & frame_size, & stack_size);
+
+  /* If we use any of the callee-saved registers, save them now.  */
+  if (mask)
+    {
+      /* Push registers in reverse order.  */
+      for (reg = FIRST_PSEUDO_REGISTER; reg --;)
+	if (mask & (1 << reg))
+	  {
+	    insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg)));
+	    mark_frame_related (insn);
+	  }
+    }
+  else if (low)
+    {
+      if (high == low)
+	insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, low)));
+      else
+	insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1)
+						    * UNITS_PER_WORD),
+					   gen_rx_store_vector (low, high)));
+      mark_frame_related (insn);
+    }
+
+  if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER)
+    {
+      unsigned int acc_high, acc_low;
+
+      /* Interrupt handlers have to preserve the accumulator
+	 register if so requested by the user.  Use the first
+         two pushed register as intermediaries.  */
+      if (mask)
+	{
+	  acc_low = acc_high = 0;
+
+	  for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++)
+	    if (mask & (1 << reg))
+	      {
+		if (acc_low == 0)
+		  acc_low = reg;
+		else
+		  {
+		    acc_high = reg;
+		    break;
+		  }
+	      }
+	    
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high != 0);
+
+	  /* Note - the bottom 16 bits of the accumulator are inaccessible.
+	     We just assume that they are zero.  */
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_high)));
+	}
+      else
+	{
+	  acc_low = low;
+	  acc_high = low + 1;
+
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high <= high);
+
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_pushm (GEN_INT (2 * UNITS_PER_WORD),
+				      gen_rx_store_vector (acc_low, acc_high)));
+	}
+
+      frame_size += 2 * UNITS_PER_WORD;
+    }
+
+  /* If needed, set up the frame pointer.  */
+  if (frame_pointer_needed)
+    {
+      if (frame_size)
+	insn = emit_insn (gen_addsi3 (frame_pointer_rtx, stack_pointer_rtx,
+				      GEN_INT (- (HOST_WIDE_INT) frame_size)));
+      else
+	insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  insn = NULL_RTX;
+
+  /* Allocate space for the outgoing args.
+     If the stack frame has not already been set up then handle this as well.  */
+  if (stack_size)
+    {
+      if (frame_size)
+	{
+	  if (frame_pointer_needed)
+	    insn = emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_rtx,
+					  GEN_INT (- (HOST_WIDE_INT)
+						   stack_size)));
+	  else
+	    insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+					  GEN_INT (- (HOST_WIDE_INT)
+						   (frame_size + stack_size))));
+	}
+      else
+	insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				      GEN_INT (- (HOST_WIDE_INT) stack_size)));
+    }
+  else if (frame_size)
+    {
+      if (! frame_pointer_needed)
+	insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				      GEN_INT (- (HOST_WIDE_INT) frame_size)));
+      else
+	insn = emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+    }
+
+  if (insn != NULL_RTX)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+static void
+rx_output_function_prologue (FILE * file,
+			     HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
+{
+  if (is_fast_interrupt_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Fast Interrupt Handler\n");
+
+  if (is_interrupt_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Interrupt Handler\n");
+
+  if (is_naked_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Naked Function\n");
+
+  if (cfun->static_chain_decl != NULL)
+    asm_fprintf (file, "\t; Note: Nested function declared "
+		 "inside another function.\n");
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (file, "\t; Note: Calls __builtin_eh_return.\n");
+}
+
+/* Generate a POPM or RTSD instruction that matches the given operands.  */
+
+void
+rx_emit_stack_popm (rtx * operands, bool is_popm)
+{
+  HOST_WIDE_INT stack_adjust;
+  HOST_WIDE_INT last_reg;
+  rtx first_push;
+
+  gcc_assert (CONST_INT_P (operands[0]));
+  stack_adjust = INTVAL (operands[0]);
+  
+  gcc_assert (GET_CODE (operands[1]) == PARALLEL);
+  last_reg = XVECLEN (operands[1], 0) - (is_popm ? 2 : 3);
+
+  first_push = XVECEXP (operands[1], 0, 1);
+  gcc_assert (SET_P (first_push));
+  first_push = SET_DEST (first_push);
+  gcc_assert (REG_P (first_push));
+
+  if (is_popm)
+    asm_fprintf (asm_out_file, "\tpopm\t%s-%s\n",
+		 reg_names [REGNO (first_push)],
+		 reg_names [REGNO (first_push) + last_reg]);
+  else
+    asm_fprintf (asm_out_file, "\trtsd\t#%d, %s-%s\n",
+		 (int) stack_adjust,
+		 reg_names [REGNO (first_push)],
+		 reg_names [REGNO (first_push) + last_reg]);
+}
+
+/* Generate a PARALLEL which will satisfy the rx_rtsd_vector predicate.  */
+
+static rtx
+gen_rx_rtsd_vector (unsigned int adjust, unsigned int low, unsigned int high)
+{
+  unsigned int i;
+  unsigned int bias = 3;
+  unsigned int count = (high - low) + bias;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (SImode, stack_pointer_rtx,
+		 plus_constant (stack_pointer_rtx, adjust));
+
+  for (i = 0; i < count - 2; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (SImode,
+		   gen_rtx_REG (SImode, low + i),
+		   gen_rtx_MEM (SImode,
+				i == 0 ? stack_pointer_rtx
+				: plus_constant (stack_pointer_rtx,
+						 i * UNITS_PER_WORD)));
+
+  XVECEXP (vector, 0, count - 1) = gen_rtx_RETURN (VOIDmode);
+
+  return vector;
+}
+  
+/* Generate a PARALLEL which will satisfy the rx_load_multiple_vector predicate.  */
+
+static rtx
+gen_rx_popm_vector (unsigned int low, unsigned int high)
+{
+  unsigned int i;  
+  unsigned int count = (high - low) + 2;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (SImode, stack_pointer_rtx,
+		 plus_constant (stack_pointer_rtx,
+				(count - 1) * UNITS_PER_WORD));
+
+  for (i = 0; i < count - 1; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (SImode,
+		   gen_rtx_REG (SImode, low + i),
+		   gen_rtx_MEM (SImode,
+				i == 0 ? stack_pointer_rtx
+				: plus_constant (stack_pointer_rtx,
+						 i * UNITS_PER_WORD)));
+
+  return vector;
+}
+  
+void
+rx_expand_epilogue (bool is_sibcall)
+{
+  unsigned int low;
+  unsigned int high;
+  unsigned int frame_size;
+  unsigned int stack_size;
+  unsigned int register_mask;
+  unsigned int regs_size;
+  unsigned int reg;
+  unsigned HOST_WIDE_INT total_size;
+
+  if (is_naked_func (NULL_TREE))
+    {
+      /* Naked functions use their own, programmer provided epilogues.
+	 But, in order to keep gcc happy we have to generate some kind of
+	 epilogue RTL.  */
+      emit_jump_insn (gen_naked_return ());
+      return;
+    }
+
+  rx_get_stack_layout (& low, & high, & register_mask,
+		       & frame_size, & stack_size);
+
+  total_size = frame_size + stack_size;
+  regs_size = ((high - low) + 1) * UNITS_PER_WORD;
+
+  /* See if we are unable to use the special stack frame deconstruct and
+     return instructions.  In most cases we can use them, but the exceptions
+     are:
+
+     - Sibling calling functions deconstruct the frame but do not return to
+       their caller.  Instead they branch to their sibling and allow their
+       return instruction to return to this function's parent.
+
+     - Fast and normal interrupt handling functions have to use special
+       return instructions.
+
+     - Functions where we have pushed a fragmented set of registers into the
+       call-save area must have the same set of registers popped.  */
+  if (is_sibcall
+      || is_fast_interrupt_func (NULL_TREE)
+      || is_interrupt_func (NULL_TREE)
+      || register_mask)
+    {
+      /* Cannot use the special instructions - deconstruct by hand.  */
+      if (total_size)
+	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (total_size)));
+
+      if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER)
+	{
+	  unsigned int acc_low, acc_high;
+
+	  /* Reverse the saving of the accumulator register onto the stack.
+	     Note we must adjust the saved "low" accumulator value as it
+	     is really the middle 32-bits of the accumulator.  */
+	  if (register_mask)
+	    {
+	      acc_low = acc_high = 0;
+	      for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++)
+		if (register_mask & (1 << reg))
+		  {
+		    if (acc_low == 0)
+		      acc_low = reg;
+		    else
+		      {
+			acc_high = reg;
+			break;
+		      }
+		  }
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_high)));
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_low)));
+	    }
+	  else
+	    {
+	      acc_low = low;
+	      acc_high = low + 1;
+	      emit_insn (gen_stack_popm (GEN_INT (2 * UNITS_PER_WORD),
+					 gen_rx_popm_vector (acc_low, acc_high)));
+	    }
+
+	  emit_insn (gen_ashlsi3 (gen_rtx_REG (SImode, acc_low),
+				  gen_rtx_REG (SImode, acc_low),
+				  GEN_INT (16)));
+	  emit_insn (gen_mvtaclo (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvtachi (gen_rtx_REG (SImode, acc_high)));
+	}
+
+      if (register_mask)
+	{
+	  for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg ++)
+	    if (register_mask & (1 << reg))
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg)));
+	}
+      else if (low)
+	{
+	  if (high == low)
+	    emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low)));
+	  else
+	    emit_insn (gen_stack_popm (GEN_INT (regs_size),
+				       gen_rx_popm_vector (low, high)));
+	}
+
+      if (is_fast_interrupt_func (NULL_TREE))
+	emit_jump_insn (gen_fast_interrupt_return ());
+      else if (is_interrupt_func (NULL_TREE))
+	emit_jump_insn (gen_exception_return ());
+      else if (! is_sibcall)
+	emit_jump_insn (gen_simple_return ());
+
+      return;
+    }
+
+  /* If we allocated space on the stack, free it now.  */
+  if (total_size)
+    {
+      unsigned HOST_WIDE_INT rtsd_size;
+
+      /* See if we can use the RTSD instruction.  */
+      rtsd_size = total_size + regs_size;
+      if (rtsd_size < 1024 && (rtsd_size % 4) == 0)
+	{
+	  if (low)
+	    emit_jump_insn (gen_pop_and_return
+			    (GEN_INT (rtsd_size),
+			     gen_rx_rtsd_vector (rtsd_size, low, high)));
+	  else
+	    emit_jump_insn (gen_deallocate_and_return (GEN_INT (total_size)));
+
+	  return;
+	}
+
+      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			     GEN_INT (total_size)));
+    }
+
+  if (low)
+    emit_jump_insn (gen_pop_and_return (GEN_INT (regs_size),
+					gen_rx_rtsd_vector (regs_size,
+							    low, high)));
+  else
+    emit_jump_insn (gen_simple_return ());
+}
+
+
+/* Compute the offset (in words) between FROM (arg pointer
+   or frame pointer) and TO (frame pointer or stack pointer).
+   See ASCII art comment at the start of rx_expand_prologue
+   for more information.  */
+
+int
+rx_initial_elimination_offset (int from, int to)
+{
+  unsigned int low;
+  unsigned int high;
+  unsigned int frame_size;
+  unsigned int stack_size;
+  unsigned int mask;
+
+  rx_get_stack_layout (& low, & high, & mask, & frame_size, & stack_size);
+
+  if (from == ARG_POINTER_REGNUM)
+    {
+      /* Extend the computed size of the stack frame to
+	 include the registers pushed in the prologue.  */
+      if (low)
+	frame_size += ((high - low) + 1) * UNITS_PER_WORD;
+      else
+	frame_size += bit_count (mask) * UNITS_PER_WORD;
+
+      /* Remember to include the return address.  */
+      frame_size += 1 * UNITS_PER_WORD;
+
+      if (to == FRAME_POINTER_REGNUM)
+	return frame_size;
+
+      gcc_assert (to == STACK_POINTER_REGNUM);
+      return frame_size + stack_size;
+    }
+
+  gcc_assert (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM);
+  return stack_size;
+}
+
+/* Update the status of the condition
+   codes (cc0) based on the given INSN.  */
+
+void
+rx_notice_update_cc (rtx body, rtx insn)
+{
+  switch (get_attr_cc (insn))
+    {
+    case CC_NONE:
+      /* Insn does not affect cc0 at all.  */
+      break;
+    case CC_CLOBBER:
+      /* Insn doesn't leave cc0 in a usable state.  */
+      CC_STATUS_INIT;
+      break;
+    case CC_SET_ZSOC:
+      /* The insn sets all the condition code bits.  */
+      CC_STATUS_INIT;
+      cc_status.value1 = SET_SRC (body);
+      break;
+    case CC_SET_ZSO:
+      /* Insn sets the Z,S and O flags, but not the C flag.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_NO_CARRY;
+      /* Do not set the value1 field in this case.  The final_scan_insn()
+	 function naively believes that if cc_status.value1 is set then
+	 it can eliminate *any* comparison against that value, even if
+	 the type of comparison cannot be satisfied by the range of flag
+	 bits being set here.  See gcc.c-torture/execute/20041210-1.c
+	 for an example of this in action.  */
+      break;
+    case CC_SET_ZS:
+      /* Insn sets the Z and S flags, but not the O or C flags.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= (CC_NO_CARRY | CC_NO_OVERFLOW);
+      /* See comment above regarding cc_status.value1.  */
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Decide if a variable should go into one of the small data sections.  */
+
+static bool
+rx_in_small_data (const_tree decl)
+{
+  int size;
+  const_tree section;
+
+  if (rx_small_data_limit == 0)
+    return false;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return false;
+
+  /* We do not put read-only variables into a small data area because
+     they would be placed with the other read-only sections, far away
+     from the read-write data sections, and we only have one small
+     data area pointer.
+     Similarly commons are placed in the .bss section which might be
+     far away (and out of alignment with respect to) the .data section.  */
+  if (TREE_READONLY (decl) || DECL_COMMON (decl))
+    return false;
+
+  section = DECL_SECTION_NAME (decl);
+  if (section)
+    {
+      const char * const name = TREE_STRING_POINTER (section);
+
+      return (strcmp (name, "D_2") == 0) || (strcmp (name, "B_2") == 0);
+    }
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+
+  return (size > 0) && (size <= rx_small_data_limit);
+}
+
+/* Return a section for X.
+   The only special thing we do here is to honor small data.  */
+
+static section *
+rx_select_rtx_section (enum machine_mode mode,
+		       rtx x,
+		       unsigned HOST_WIDE_INT align)
+{
+  if (rx_small_data_limit > 0
+      && GET_MODE_SIZE (mode) <= rx_small_data_limit
+      && align <= (unsigned HOST_WIDE_INT) rx_small_data_limit * BITS_PER_UNIT)
+    return sdata_section;
+
+  return default_elf_select_rtx_section (mode, x, align);
+}
+
+static section *
+rx_select_section (tree decl,
+		   int reloc,
+		   unsigned HOST_WIDE_INT align)
+{
+  if (rx_small_data_limit > 0)
+    {
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_SDATA:	return sdata_section;
+	case SECCAT_SBSS:	return sbss_section;
+	case SECCAT_SRODATA:
+	  /* Fall through.  We do not put small, read only
+	     data into the C_2 section because we are not
+	     using the C_2 section.  We do not use the C_2
+	     section because it is located with the other
+	     read-only data sections, far away from the read-write
+	     data sections and we only have one small data
+	     pointer (r13).  */
+	default:
+	  break;
+	}
+    }
+
+  /* If we are supporting the Renesas assembler
+     we cannot use mergeable sections.  */
+  if (TARGET_AS100_SYNTAX)
+    switch (categorize_decl_for_section (decl, reloc))
+      {
+      case SECCAT_RODATA_MERGE_CONST:
+      case SECCAT_RODATA_MERGE_STR_INIT:
+      case SECCAT_RODATA_MERGE_STR:
+	return readonly_data_section;
+
+      default:
+	break;
+      }
+
+  return default_elf_select_section (decl, reloc, align);
+}
+
+enum rx_builtin
+{
+  RX_BUILTIN_BRK,
+  RX_BUILTIN_CLRPSW,
+  RX_BUILTIN_INT,
+  RX_BUILTIN_MACHI,
+  RX_BUILTIN_MACLO,
+  RX_BUILTIN_MULHI,
+  RX_BUILTIN_MULLO,
+  RX_BUILTIN_MVFACHI,
+  RX_BUILTIN_MVFACMI,
+  RX_BUILTIN_MVFC,
+  RX_BUILTIN_MVTACHI,
+  RX_BUILTIN_MVTACLO,
+  RX_BUILTIN_MVTC,
+  RX_BUILTIN_MVTIPL,
+  RX_BUILTIN_RACW,
+  RX_BUILTIN_REVW,
+  RX_BUILTIN_RMPA,
+  RX_BUILTIN_ROUND,
+  RX_BUILTIN_SAT,
+  RX_BUILTIN_SETPSW,
+  RX_BUILTIN_WAIT,
+  RX_BUILTIN_max
+};
+
+static void
+rx_init_builtins (void)
+{
+#define ADD_RX_BUILTIN1(UC_NAME, LC_NAME, RET_TYPE, ARG_TYPE)		\
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE##_type_node, \
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_RX_BUILTIN2(UC_NAME, LC_NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2) \
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_RX_BUILTIN3(UC_NAME,LC_NAME,RET_TYPE,ARG_TYPE1,ARG_TYPE2,ARG_TYPE3) \
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  ARG_TYPE3##_type_node,\
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+  ADD_RX_BUILTIN1 (BRK,     "brk",     void,  void);
+  ADD_RX_BUILTIN1 (CLRPSW,  "clrpsw",  void,  integer);
+  ADD_RX_BUILTIN1 (SETPSW,  "setpsw",  void,  integer);
+  ADD_RX_BUILTIN1 (INT,     "int",     void,  integer);
+  ADD_RX_BUILTIN2 (MACHI,   "machi",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MACLO,   "maclo",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MULHI,   "mulhi",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MULLO,   "mullo",   void,  intSI, intSI);
+  ADD_RX_BUILTIN1 (MVFACHI, "mvfachi", intSI, void);
+  ADD_RX_BUILTIN1 (MVFACMI, "mvfacmi", intSI, void);
+  ADD_RX_BUILTIN1 (MVTACHI, "mvtachi", void,  intSI);
+  ADD_RX_BUILTIN1 (MVTACLO, "mvtaclo", void,  intSI);
+  ADD_RX_BUILTIN1 (RMPA,    "rmpa",    void,  void);
+  ADD_RX_BUILTIN1 (MVFC,    "mvfc",    intSI, integer);
+  ADD_RX_BUILTIN2 (MVTC,    "mvtc",    void,  integer, integer);
+  ADD_RX_BUILTIN1 (MVTIPL,  "mvtipl",  void,  integer);
+  ADD_RX_BUILTIN1 (RACW,    "racw",    void,  integer);
+  ADD_RX_BUILTIN1 (ROUND,   "round",   intSI, float);
+  ADD_RX_BUILTIN1 (REVW,    "revw",    intSI, intSI);
+  ADD_RX_BUILTIN1 (SAT,     "sat",     intSI, intSI);
+  ADD_RX_BUILTIN1 (WAIT,    "wait",    void,  void);
+}
+
+static rtx
+rx_expand_void_builtin_1_arg (rtx arg, rtx (* gen_func)(rtx), bool reg)
+{
+  if (reg && ! REG_P (arg))
+    arg = force_reg (SImode, arg);
+
+  emit_insn (gen_func (arg));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mvtc (tree exp)
+{
+  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+  if (! CONST_INT_P (arg1))
+    return NULL_RTX;
+
+  if (! REG_P (arg2))
+    arg2 = force_reg (SImode, arg2);
+
+  emit_insn (gen_mvtc (arg1, arg2));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mvfc (tree t_arg, rtx target)
+{
+  rtx arg = expand_normal (t_arg);
+
+  if (! CONST_INT_P (arg))
+    return NULL_RTX;
+
+  if (! REG_P (target))
+    target = force_reg (SImode, target);
+
+  emit_insn (gen_mvfc (target, arg));
+
+  return target;
+}
+
+static rtx
+rx_expand_builtin_mvtipl (rtx arg)
+{
+  /* The RX610 does not support the MVTIPL instruction.  */
+  if (rx_cpu_type == RX610)
+    return NULL_RTX;
+
+  if (! CONST_INT_P (arg) || ! IN_RANGE (arg, 0, (1 << 4) - 1))
+    return NULL_RTX;
+
+  emit_insn (gen_mvtipl (arg));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mac (tree exp, rtx (* gen_func)(rtx, rtx))
+{
+  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+  if (! REG_P (arg1))
+    arg1 = force_reg (SImode, arg1);
+
+  if (! REG_P (arg2))
+    arg2 = force_reg (SImode, arg2);
+
+  emit_insn (gen_func (arg1, arg2));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_int_builtin_1_arg (rtx arg,
+			     rtx target,
+			     rtx (* gen_func)(rtx, rtx),
+			     bool mem_ok)
+{
+  if (! REG_P (arg))
+    if (!mem_ok || ! MEM_P (arg))
+      arg = force_reg (SImode, arg);
+
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_func (target, arg));
+
+  return target;
+}
+
+static rtx
+rx_expand_int_builtin_0_arg (rtx target, rtx (* gen_func)(rtx))
+{
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_func (target));
+
+  return target;
+}
+
+static rtx
+rx_expand_builtin_round (rtx arg, rtx target)
+{
+  if ((! REG_P (arg) && ! MEM_P (arg))
+      || GET_MODE (arg) != SFmode)
+    arg = force_reg (SFmode, arg);
+
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_lrintsf2 (target, arg));
+
+  return target;
+}
+
+static rtx
+rx_expand_builtin (tree exp,
+		   rtx target,
+		   rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg    = CALL_EXPR_ARGS (exp) ? CALL_EXPR_ARG (exp, 0) : NULL_TREE;
+  rtx  op     = arg ? expand_normal (arg) : NULL_RTX;
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case RX_BUILTIN_BRK:     emit_insn (gen_brk ()); return NULL_RTX;
+    case RX_BUILTIN_CLRPSW:  return rx_expand_void_builtin_1_arg
+	(op, gen_clrpsw, false);
+    case RX_BUILTIN_SETPSW:  return rx_expand_void_builtin_1_arg
+	(op, gen_setpsw, false);
+    case RX_BUILTIN_INT:     return rx_expand_void_builtin_1_arg
+	(op, gen_int, false);
+    case RX_BUILTIN_MACHI:   return rx_expand_builtin_mac (exp, gen_machi);
+    case RX_BUILTIN_MACLO:   return rx_expand_builtin_mac (exp, gen_maclo);
+    case RX_BUILTIN_MULHI:   return rx_expand_builtin_mac (exp, gen_mulhi);
+    case RX_BUILTIN_MULLO:   return rx_expand_builtin_mac (exp, gen_mullo);
+    case RX_BUILTIN_MVFACHI: return rx_expand_int_builtin_0_arg
+	(target, gen_mvfachi);
+    case RX_BUILTIN_MVFACMI: return rx_expand_int_builtin_0_arg
+	(target, gen_mvfacmi);
+    case RX_BUILTIN_MVTACHI: return rx_expand_void_builtin_1_arg
+	(op, gen_mvtachi, true);
+    case RX_BUILTIN_MVTACLO: return rx_expand_void_builtin_1_arg
+	(op, gen_mvtaclo, true);
+    case RX_BUILTIN_RMPA:    emit_insn (gen_rmpa ()); return NULL_RTX;
+    case RX_BUILTIN_MVFC:    return rx_expand_builtin_mvfc (arg, target);
+    case RX_BUILTIN_MVTC:    return rx_expand_builtin_mvtc (exp);
+    case RX_BUILTIN_MVTIPL:  return rx_expand_builtin_mvtipl (op);
+    case RX_BUILTIN_RACW:    return rx_expand_void_builtin_1_arg
+	(op, gen_racw, false);
+    case RX_BUILTIN_ROUND:   return rx_expand_builtin_round (op, target);
+    case RX_BUILTIN_REVW:    return rx_expand_int_builtin_1_arg
+	(op, target, gen_revw, false);
+    case RX_BUILTIN_SAT:     return rx_expand_int_builtin_1_arg
+	(op, target, gen_sat, false);
+    case RX_BUILTIN_WAIT:    emit_insn (gen_wait ()); return NULL_RTX;
+
+    default:
+      internal_error ("bad builtin code");
+      break;
+    }
+
+  return NULL_RTX;
+}
+
+/* Place an element into a constructor or destructor section.
+   Like default_ctor_section_asm_out_constructor in varasm.c
+   except that it uses .init_array (or .fini_array) and it
+   handles constructor priorities.  */
+
+static void
+rx_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
+{
+  section * s;
+
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      char buf[18];
+
+      sprintf (buf, "%s.%.5u",
+	       is_ctor ? ".init_array" : ".fini_array",
+	       priority);
+      s = get_section (buf, SECTION_WRITE, NULL_TREE);
+    }
+  else if (is_ctor)
+    s = ctors_section;
+  else
+    s = dtors_section;
+
+  switch_to_section (s);
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+static void
+rx_elf_asm_constructor (rtx symbol, int priority)
+{
+  rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */true);
+}
+
+static void
+rx_elf_asm_destructor (rtx symbol, int priority)
+{
+  rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */false);
+}
+
+/* Check "fast_interrupt", "interrupt" and "naked" attributes.  */
+
+static tree
+rx_handle_func_attribute (tree * node,
+			  tree   name,
+			  tree   args,
+			  int    flags ATTRIBUTE_UNUSED,
+			  bool * no_add_attrs)
+{
+  gcc_assert (DECL_P (* node));
+  gcc_assert (args == NULL_TREE);
+
+  if (TREE_CODE (* node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      * no_add_attrs = true;
+    }
+
+  /* FIXME: We ought to check for conflicting attributes.  */
+
+  /* FIXME: We ought to check that the interrupt and exception
+     handler attributes have been applied to void functions.  */
+  return NULL_TREE;
+}
+
+/* Table of RX specific attributes.  */
+const struct attribute_spec rx_attribute_table[] =
+{
+  /* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler.  */
+  { "fast_interrupt", 0, 0, true, false, false, rx_handle_func_attribute },
+  { "interrupt",      0, 0, true, false, false, rx_handle_func_attribute },
+  { "naked",          0, 0, true, false, false, rx_handle_func_attribute },
+  { NULL,             0, 0, false, false, false, NULL }
+};
+
+static bool
+rx_allocate_stack_slots_for_args (void)
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return ! is_naked_func (NULL_TREE);
+}
+
+static bool
+rx_func_attr_inlinable (const_tree decl)
+{
+  return ! is_fast_interrupt_func (decl)
+    &&   ! is_interrupt_func (decl)
+    &&   ! is_naked_func (decl);  
+}
+
+static void
+rx_file_start (void)
+{
+  if (! TARGET_AS100_SYNTAX)
+    default_file_start ();
+}
+
+static bool
+rx_is_ms_bitfield_layout (const_tree record_type ATTRIBUTE_UNUSED)
+{
+  return TRUE;
+}
+
+/* Try to generate code for the "isnv" pattern which inserts bits
+   into a word.
+     operands[0] => Location to be altered.
+     operands[1] => Number of bits to change.
+     operands[2] => Starting bit.
+     operands[3] => Value to insert.
+   Returns TRUE if successful, FALSE otherwise.  */
+
+bool
+rx_expand_insv (rtx * operands)
+{
+  if (INTVAL (operands[1]) != 1
+      || ! CONST_INT_P (operands[3]))
+    return false;
+
+  if (MEM_P (operands[0])
+      && INTVAL (operands[2]) > 7)
+    return false;
+
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      if (MEM_P (operands[0]))
+	emit_insn (gen_bitclr_in_memory (operands[0], operands[0],
+					 operands[2]));
+      else
+	emit_insn (gen_bitclr (operands[0], operands[0], operands[2]));
+      break;
+    case 1:
+    case -1:
+      if (MEM_P (operands[0]))
+	emit_insn (gen_bitset_in_memory (operands[0], operands[0],
+					 operands[2]));
+      else
+	emit_insn (gen_bitset (operands[0], operands[0], operands[2]));
+      break;
+   default:
+      return false;
+    }
+  return true;
+}
+
+/* Returns true if X a legitimate constant for an immediate
+   operand on the RX.  X is already known to satisfy CONSTANT_P.  */
+
+bool
+rx_is_legitimate_constant (rtx x)
+{
+  HOST_WIDE_INT val;
+
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (! CONST_INT_P (XEXP (x, 1)))
+	    return false;
+
+	  /* GCC would not pass us CONST_INT + CONST_INT so we
+	     know that we have {SYMBOL|LABEL} + CONST_INT.  */
+	  x = XEXP (x, 0);
+	  gcc_assert (! CONST_INT_P (x));
+	}
+
+      switch (GET_CODE (x))
+	{
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  return true;
+
+	  /* One day we may have to handle UNSPEC constants here.  */
+	default:
+	  /* FIXME: Can this ever happen ?  */
+	  abort ();
+	  return false;
+	}
+      break;
+      
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return true;
+    case CONST_DOUBLE:
+      return rx_max_constant_size == 0;
+    case CONST_VECTOR:
+      return false;
+    default:
+      gcc_assert (CONST_INT_P (x));
+      break;
+    }
+
+  if (rx_max_constant_size == 0)
+    /* If there is no constraint on the size of constants
+       used as operands, then any value is legitimate.  */
+    return true;
+
+  val = INTVAL (x);
+
+  /* rx_max_constant_size specifies the maximum number
+     of bytes that can be used to hold a signed value.  */
+  return IN_RANGE (val, (-1 << (rx_max_constant_size * 8)),
+		        ( 1 << (rx_max_constant_size * 8)));
+}
+
+/* This is a tri-state variable.  The default value of 0 means that the user
+   has specified neither -mfpu nor -mnofpu on the command line.  In this case
+   the selection of RX FPU instructions is entirely based upon the size of
+   the floating point object and whether unsafe math optimizations were
+   enabled.  If 32-bit doubles have been enabled then both floats and doubles
+   can make use of FPU instructions, otherwise only floats may do so.
+
+   If the value is 1 then the user has specified -mfpu and the FPU
+   instructions should be used.  Unsafe math optimizations will automatically
+   be enabled and doubles set to 32-bits.  If the value is -1 then -mnofpu
+   has been specified and FPU instructions will not be used, even if unsafe
+   math optimizations have been enabled.  */
+int rx_enable_fpu = 0;
+
+/* Extra processing for target specific command line options.  */
+
+static bool
+rx_handle_option (size_t code, const char *  arg ATTRIBUTE_UNUSED, int value)
+{
+  switch (code)
+    {
+      /* -mfpu enables the use of RX FPU instructions.  This implies the use
+	 of 32-bit doubles and also the enabling of fast math optimizations.
+	 (Since the RX FPU instructions are not IEEE compliant).  The -mnofpu
+	 option disables the use of RX FPU instructions, but does not make
+	 place any constraints on the size of doubles or the use of fast math
+	 optimizations.
+
+	 The selection of 32-bit vs 64-bit doubles is handled by the setting
+	 of the 32BIT_DOUBLES mask in the rx.opt file.  Enabling fast math
+	 optimizations is performed in OVERRIDE_OPTIONS since if it was done
+	 here it could be overridden by a -fno-fast-math option specified
+	 *earlier* on the command line.  (Target specific options are
+	 processed before generic ones).  */
+    case OPT_fpu:
+      rx_enable_fpu = 1;
+      break;
+
+    case OPT_nofpu:
+      rx_enable_fpu = -1;
+      break;
+
+    case OPT_mint_register_:
+      switch (value)
+	{
+	case 4:
+	  fixed_regs[10] = call_used_regs [10] = 1;
+	  /* Fall through.  */
+	case 3:
+	  fixed_regs[11] = call_used_regs [11] = 1;
+	  /* Fall through.  */
+	case 2:
+	  fixed_regs[12] = call_used_regs [12] = 1;
+	  /* Fall through.  */
+	case 1:
+	  fixed_regs[13] = call_used_regs [13] = 1;
+	  /* Fall through.  */
+	case 0:
+	  return true;
+	default:
+	  return false;
+	}
+      break;
+
+    case OPT_mmax_constant_size_:
+      /* Make sure that the -mmax-constant_size option is in range.  */
+      return IN_RANGE (value, 0, 4);
+
+    case OPT_mcpu_:
+    case OPT_patch_:
+      if (strcasecmp (arg, "RX610") == 0)
+	rx_cpu_type = RX610;
+      /* FIXME: Should we check for non-RX cpu names here ?  */
+      break;
+      
+    default:
+      break;
+    }
+
+  return true;
+}
+
+static int
+rx_address_cost (rtx addr, bool speed)
+{
+  rtx a, b;
+
+  if (GET_CODE (addr) != PLUS)
+    return COSTS_N_INSNS (1);
+
+  a = XEXP (addr, 0);
+  b = XEXP (addr, 1);
+
+  if (REG_P (a) && REG_P (b))
+    /* Try to discourage REG+REG addressing as it keeps two registers live.  */
+    return COSTS_N_INSNS (4);
+
+  if (speed)
+    /* [REG+OFF] is just as fast as [REG].  */
+    return COSTS_N_INSNS (1);
+
+  if (CONST_INT_P (b)
+      && ((INTVAL (b) > 128) || INTVAL (b) < -127))
+    /* Try to discourage REG + <large OFF> when optimizing for size.  */
+    return COSTS_N_INSNS (2);
+    
+  return COSTS_N_INSNS (1);
+}
+
+static bool
+rx_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  /* We can always eliminate to the frame pointer.
+     We can eliminate to the stack pointer unless a frame
+     pointer is needed.  */
+
+  return to == FRAME_POINTER_REGNUM
+    || ( to == STACK_POINTER_REGNUM && ! frame_pointer_needed);
+}
+
+
+static void
+rx_trampoline_template (FILE * file)
+{
+  /* Output assembler code for a block containing the constant
+     part of a trampoline, leaving space for the variable parts.
+
+     On the RX, (where r8 is the static chain regnum) the trampoline
+     looks like:
+
+	   mov 		#<static chain value>, r8
+	   mov          #<function's address>, r9
+	   jmp		r9
+
+     In big-endian-data-mode however instructions are read into the CPU
+     4 bytes at a time.  These bytes are then swapped around before being
+     passed to the decoder.  So...we must partition our trampoline into
+     4 byte packets and swap these packets around so that the instruction
+     reader will reverse the process.  But, in order to avoid splitting
+     the 32-bit constants across these packet boundaries, (making inserting
+     them into the constructed trampoline very difficult) we have to pad the
+     instruction sequence with NOP insns.  ie:
+
+           nop
+	   nop
+           mov.l	#<...>, r8
+	   nop
+	   nop
+           mov.l	#<...>, r9
+           jmp		r9
+	   nop
+	   nop             */
+
+  if (! TARGET_BIG_ENDIAN_DATA)
+    {
+      asm_fprintf (file, "\tmov.L\t#0deadbeefH, r%d\n", STATIC_CHAIN_REGNUM);
+      asm_fprintf (file, "\tmov.L\t#0deadbeefH, r%d\n", TRAMPOLINE_TEMP_REGNUM);
+      asm_fprintf (file, "\tjmp\tr%d\n",                TRAMPOLINE_TEMP_REGNUM);
+    }
+  else
+    {
+      char r8 = '0' + STATIC_CHAIN_REGNUM;
+      char r9 = '0' + TRAMPOLINE_TEMP_REGNUM;
+
+      if (TARGET_AS100_SYNTAX)
+        {
+          asm_fprintf (file, "\t.BYTE 0%c2H, 0fbH, 003H,  003H\n", r8);
+          asm_fprintf (file, "\t.BYTE 0deH,  0adH, 0beH,  0efH\n");
+          asm_fprintf (file, "\t.BYTE 0%c2H, 0fbH, 003H,  003H\n", r9);
+          asm_fprintf (file, "\t.BYTE 0deH,  0adH, 0beH,  0efH\n");
+          asm_fprintf (file, "\t.BYTE 003H,  003H, 00%cH, 07fH\n", r9);
+        }
+      else
+        {
+          asm_fprintf (file, "\t.byte 0x%c2, 0xfb, 0x03,  0x03\n", r8);
+          asm_fprintf (file, "\t.byte 0xde,  0xad, 0xbe,  0xef\n");
+          asm_fprintf (file, "\t.byte 0x%c2, 0xfb, 0x03,  0x03\n", r9);
+          asm_fprintf (file, "\t.byte 0xde,  0xad, 0xbe,  0xef\n");
+          asm_fprintf (file, "\t.byte 0x03,  0x03, 0x0%c, 0x7f\n", r9);
+        }
+    }
+}
+
+static void
+rx_trampoline_init (rtx tramp, tree fndecl, rtx chain)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_block_move (tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  if (TARGET_BIG_ENDIAN_DATA)
+    {
+      emit_move_insn (adjust_address (tramp, SImode, 4), chain);
+      emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
+    }
+  else
+    {
+      emit_move_insn (adjust_address (tramp, SImode, 2), chain);
+      emit_move_insn (adjust_address (tramp, SImode, 6 + 2), fnaddr);
+    }
+}
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE		rx_function_value
+
+#undef  TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB		rx_return_in_msb
+
+#undef  TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P		rx_in_small_data
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		rx_return_in_memory
+
+#undef  TARGET_HAVE_SRODATA_SECTION
+#define TARGET_HAVE_SRODATA_SECTION	true
+
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define	TARGET_ASM_SELECT_RTX_SECTION	rx_select_rtx_section
+
+#undef	TARGET_ASM_SELECT_SECTION
+#define	TARGET_ASM_SELECT_SECTION	rx_select_section
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS		rx_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN		rx_expand_builtin
+
+#undef  TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR		rx_elf_asm_constructor
+
+#undef  TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR		rx_elf_asm_destructor
+
+#undef  TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX		rx_struct_value_rtx
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		rx_attribute_table
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START			rx_file_start
+
+#undef  TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P		rx_is_ms_bitfield_layout
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P		rx_is_legitimate_address
+
+#undef  TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS	rx_allocate_stack_slots_for_args
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE 		rx_output_function_prologue
+
+#undef  TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P 	rx_func_attr_inlinable
+
+#undef  TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION		rx_set_current_function
+
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION			rx_handle_option
+
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER			rx_assemble_integer
+
+#undef  TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P	hook_bool_mode_const_rtx_true
+
+#undef  TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET		32
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST			rx_address_cost
+
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE			rx_can_eliminate
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE		rx_trampoline_template
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT			rx_trampoline_init
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* #include "gt-rx.h" */
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
new file mode 100644
index 00000000000..bb7cf7f1e3e
--- /dev/null
+++ b/gcc/config/rx/rx.h
@@ -0,0 +1,659 @@
+/* GCC backend definitions for the Renesas RX processor.
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do                                            \
+    {                                           \
+      builtin_define ("__RX__"); 		\
+      builtin_assert ("cpu=RX"); 		\
+      if (rx_cpu_type == RX610)			\
+        builtin_assert ("machine=RX610");	\
+     else					\
+        builtin_assert ("machine=RX600");	\
+      						\
+      if (TARGET_BIG_ENDIAN_DATA)		\
+	builtin_define ("__RX_BIG_ENDIAN__");	\
+      else					\
+	builtin_define ("__RX_LITTLE_ENDIAN__");\
+      						\
+      if (TARGET_32BIT_DOUBLES)			\
+	builtin_define ("__RX_32BIT_DOUBLES__");\
+      else					\
+	builtin_define ("__RX_64BIT_DOUBLES__");\
+      						\
+      if (ALLOW_RX_FPU_INSNS)			\
+	builtin_define ("__RX_FPU_INSNS__");	\
+						\
+      if (TARGET_AS100_SYNTAX)			\
+	builtin_define ("__RX_AS100_SYNTAX__"); \
+      else					\
+	builtin_define ("__RX_GAS_SYNTAX__");   \
+    }                                           \
+  while (0)
+
+enum rx_cpu_types
+{
+  RX600,
+  RX610
+};
+
+extern enum rx_cpu_types  rx_cpu_type;
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mbig-endian-data:-mbig-endian-data} \
+%{m32bit-doubles:-m32bit-doubles} \
+%{!m32bit-doubles:-m64bit-doubles} \
+%{msmall-data-limit*:-msmall-data-limit} \
+%{mrelax:-relax} \
+"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "					\
+--start-group						\
+-lc							\
+%{msim*:-lsim}%{!msim*:-lnosys}				\
+%{fprofile-arcs|fprofile-generate|coverage:-lgcov} 	\
+--end-group					   	\
+%{!T*: %{msim*:%Trx-sim.ld}%{!msim*:%Trx.ld}}		\
+"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mbig-endian-data:--oformat elf32-rx-be} %{mrelax:-relax}"
+
+
+#define BITS_BIG_ENDIAN 		0
+#define BYTES_BIG_ENDIAN 		TARGET_BIG_ENDIAN_DATA
+#define WORDS_BIG_ENDIAN 		TARGET_BIG_ENDIAN_DATA
+
+#ifdef __RX_BIG_ENDIAN__
+#define LIBGCC2_WORDS_BIG_ENDIAN	1
+#else
+#define LIBGCC2_WORDS_BIG_ENDIAN	0
+#endif
+
+#define UNITS_PER_WORD 			4
+
+#define INT_TYPE_SIZE			32
+#define LONG_TYPE_SIZE			32
+#define LONG_LONG_TYPE_SIZE		64
+
+#define FLOAT_TYPE_SIZE 		32
+#define DOUBLE_TYPE_SIZE 		(TARGET_32BIT_DOUBLES ? 32 : 64)
+#define LONG_DOUBLE_TYPE_SIZE		DOUBLE_TYPE_SIZE
+
+#ifdef __RX_32BIT_DOUBLES__
+#define LIBGCC2_HAS_DF_MODE		0
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   32
+#define LIBGCC2_DOUBLE_TYPE_SIZE	32
+#else
+#define LIBGCC2_HAS_DF_MODE		1
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   64
+#define LIBGCC2_DOUBLE_TYPE_SIZE	64
+#endif
+
+#define DEFAULT_SIGNED_CHAR		0
+
+#define STRICT_ALIGNMENT 		1
+#define FUNCTION_BOUNDARY 		8
+#define BIGGEST_ALIGNMENT 		32
+#define STACK_BOUNDARY 			32
+#define PARM_BOUNDARY 			8
+
+#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) 32
+
+#define STACK_GROWS_DOWNWARD		1
+#define FRAME_GROWS_DOWNWARD		0
+#define FIRST_PARM_OFFSET(FNDECL) 	0
+
+#define MAX_REGS_PER_ADDRESS 		2
+
+#define Pmode 				SImode
+#define POINTER_SIZE			32
+#undef  SIZE_TYPE
+#define SIZE_TYPE			"long unsigned int"
+#define POINTERS_EXTEND_UNSIGNED	1
+#define FUNCTION_MODE 			QImode
+#define CASE_VECTOR_MODE		Pmode
+#define WORD_REGISTER_OPERATIONS	1
+#define HAS_LONG_COND_BRANCH		0
+#define HAS_LONG_UNCOND_BRANCH		0
+
+#define MOVE_MAX 			4
+#define STARTING_FRAME_OFFSET		0
+
+#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) 0
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)   1
+
+#define LEGITIMATE_CONSTANT_P(X) 	rx_is_legitimate_constant (X)
+
+#define HANDLE_PRAGMA_PACK_PUSH_POP	1
+
+#define HAVE_PRE_DECCREMENT		1
+#define HAVE_POST_INCREMENT		1
+
+#define MOVE_RATIO(SPEED) 		((SPEED) ? 4 : 2)
+#define SLOW_BYTE_ACCESS		1
+
+#define STORE_FLAG_VALUE		1
+#define LOAD_EXTEND_OP(MODE)		SIGN_EXTEND
+#define SHORT_IMMEDIATES_SIGN_EXTEND	1
+
+enum reg_class
+{
+  NO_REGS,			/* No registers in set.  */
+  GR_REGS,			/* Integer registers.  */
+  ALL_REGS,			/* All registers.  */
+  LIM_REG_CLASSES		/* Max value + 1.  */
+};
+
+#define REG_CLASS_NAMES					\
+{							\
+  "NO_REGS",						\
+  "GR_REGS",						\
+  "ALL_REGS"						\
+}
+
+#define REG_CLASS_CONTENTS				\
+{							\
+  { 0x00000000 },	/* No registers,  */		\
+  { 0x0000ffff },	/* Integer registers.  */	\
+  { 0x0000ffff }	/* All registers.  */		\
+}
+
+#define IRA_COVER_CLASSES				\
+  {							\
+    GR_REGS, LIM_REG_CLASSES				\
+  }
+
+#define SMALL_REGISTER_CLASSES 		0
+#define N_REG_CLASSES			(int) LIM_REG_CLASSES
+#define CLASS_MAX_NREGS(CLASS, MODE)    ((GET_MODE_SIZE (MODE) \
+					  + UNITS_PER_WORD - 1) \
+					 / UNITS_PER_WORD)
+
+#define GENERAL_REGS			GR_REGS
+#define BASE_REG_CLASS  		GR_REGS
+#define INDEX_REG_CLASS			GR_REGS
+
+#define FIRST_PSEUDO_REGISTER 		16
+
+#define REGNO_REG_CLASS(REGNO)          ((REGNO) < FIRST_PSEUDO_REGISTER \
+					 ? GR_REGS : NO_REGS)
+
+#define STACK_POINTER_REGNUM 	        0
+#define FUNC_RETURN_REGNUM              1
+#define FRAME_POINTER_REGNUM 		6
+#define ARG_POINTER_REGNUM 		7
+#define STATIC_CHAIN_REGNUM 		8
+#define TRAMPOLINE_TEMP_REGNUM		9
+#define STRUCT_VAL_REGNUM		15
+
+/* This is the register which is used to hold the address of the start
+   of the small data area, if that feature is being used.  Note - this
+   register must not be call_used because otherwise library functions
+   that are compiled without small data support might clobber it.
+
+   FIXME: The function gcc/config/rx/rx.c:rx_gen_move_template() has a
+   built in copy of this register's name, rather than constructing the
+   name from this #define.  */
+#define GP_BASE_REGNUM			13
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM },	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)	\
+  (OFFSET) = rx_initial_elimination_offset ((FROM), (TO))
+
+
+#define FUNCTION_ARG_REGNO_P(N)	  	(((N) >= 1) && ((N) <= 4))
+#define FUNCTION_VALUE_REGNO_P(N) 	((N) == FUNC_RETURN_REGNUM)
+#define DEFAULT_PCC_STRUCT_RETURN	0
+
+#define FIXED_REGISTERS					\
+{							\
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0	\
+}
+
+#define CALL_USED_REGISTERS				\
+{							\
+  1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1	\
+}
+
+#define CONDITIONAL_REGISTER_USAGE			\
+  rx_conditional_register_usage ()
+
+#define LIBCALL_VALUE(MODE)				\
+  gen_rtx_REG (((GET_MODE_CLASS (MODE) != MODE_INT	\
+		 || GET_MODE_SIZE (MODE) >= 4)		\
+		? (MODE)				\
+		: SImode),				\
+	       FUNC_RETURN_REGNUM)
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER						\
+{  7,  10,  11,  12,  13,  14,  4,  3,  2,  1, 9, 8, 6, 5, 15	\
+}
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)		CLASS
+
+#define REGNO_IN_RANGE(REGNO, MIN, MAX)		\
+  (IN_RANGE ((REGNO), (MIN), (MAX)) 		\
+   || (reg_renumber != NULL			\
+       && reg_renumber[(REGNO)] >= (MIN)	\
+       && reg_renumber[(REGNO)] <= (MAX)))
+
+#ifdef REG_OK_STRICT
+#define REGNO_OK_FOR_BASE_P(regno)      REGNO_IN_RANGE (regno, 0, 15)
+#else
+#define REGNO_OK_FOR_BASE_P(regno)	1
+#endif
+
+#define REGNO_OK_FOR_INDEX_P(regno)	REGNO_OK_FOR_BASE_P (regno)
+
+#define RTX_OK_FOR_BASE(X, STRICT)				\
+  ((STRICT) ?							\
+   (   (REG_P (X)						\
+        && REGNO_IN_RANGE (REGNO (X), 0, 15))			\
+    || (GET_CODE (X) == SUBREG					\
+        && REG_P (SUBREG_REG (X))				\
+        && REGNO_IN_RANGE (REGNO (SUBREG_REG (X)), 0, 15)))	\
+   :								\
+    ( (REG_P (X)						\
+       || (GET_CODE (X) == SUBREG				\
+	   && REG_P (SUBREG_REG (X))))))
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL)	\
+  do							\
+    {							\
+      if (rx_is_mode_dependent_addr (ADDR))		\
+        goto LABEL;					\
+    }							\
+  while (0)
+
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)				\
+  ((COUNT) == 0								\
+   ? gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, arg_pointer_rtx, GEN_INT (-4))) \
+   : NULL_RTX)
+
+#define INCOMING_RETURN_ADDR_RTX	gen_rtx_MEM (Pmode, stack_pointer_rtx)
+
+#define ACCUMULATE_OUTGOING_ARGS	1
+
+typedef unsigned int CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
+  rx_function_arg (& CUM, MODE, TYPE, NAMED)
+
+#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED)	\
+  (CUM) += rx_function_arg_size (MODE, TYPE)
+
+#define TRAMPOLINE_SIZE 	(! TARGET_BIG_ENDIAN_DATA ? 14 : 20)
+#define TRAMPOLINE_ALIGNMENT 	32
+
+#define NO_PROFILE_COUNTERS     1
+#define PROFILE_BEFORE_PROLOGUE 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+    fprintf (FILE, "\tbsr\t__mcount\n");
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   CLASS_MAX_NREGS (0, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 			\
+  REGNO_REG_CLASS (REGNO) == GR_REGS
+
+#define MODES_TIEABLE_P(MODE1, MODE2)				\
+  (   (   GET_MODE_CLASS (MODE1) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)		\
+   == (   GET_MODE_CLASS (MODE2) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+
+#define REGISTER_NAMES						\
+  {								\
+    "r0",  "r1",  "r2",   "r3",   "r4",   "r5",   "r6",   "r7",	\
+    "r8",  "r9",  "r10",  "r11",  "r12",  "r13",  "r14",  "r15" \
+  };
+
+#define ADDITIONAL_REGISTER_NAMES	\
+{					\
+    { "sp",    STACK_POINTER_REGNUM }	\
+  , { "fp",    FRAME_POINTER_REGNUM }	\
+  , { "arg",   ARG_POINTER_REGNUM }	\
+  , { "chain", STATIC_CHAIN_REGNUM }	\
+}
+
+#define DATA_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION D,DATA" 		\
+   : "\t.section D,\"aw\",@progbits\n\t.p2align 2")
+
+#define SDATA_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION D_2,DATA,ALIGN=2" 	\
+   : "\t.section D_2,\"aw\",@progbits\n\t.p2align 1")
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP  			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION C,ROMDATA,ALIGN=4" \
+   : "\t.section C,\"a\",@progbits\n\t.p2align 2")
+
+#define BSS_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION B,DATA,ALIGN=4" 	\
+   : "\t.section B,\"w\",@nobits\n\t.p2align 2")
+
+#define SBSS_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION B_2,DATA,ALIGN=2" 	\
+   : "\t.section B_2,\"w\",@nobits\n\t.p2align 1")
+
+/* The following definitions are conditional depending upon whether the
+   compiler is being built or crtstuff.c is being compiled by the built
+   compiler.  */
+#if defined CRT_BEGIN || defined CRT_END
+# ifdef __RX_AS100_SYNTAX
+#  define TEXT_SECTION_ASM_OP	      "\t.SECTION P,CODE"
+#  define CTORS_SECTION_ASM_OP	      "\t.SECTION init_array,CODE"
+#  define DTORS_SECTION_ASM_OP	      "\t.SECTION fini_array,CODE"
+#  define INIT_ARRAY_SECTION_ASM_OP   "\t.SECTION init_array,CODE"
+#  define FINI_ARRAY_SECTION_ASM_OP   "\t.SECTION fini_array,CODE"
+# else
+#  define TEXT_SECTION_ASM_OP	      "\t.section P,\"ax\""
+#  define CTORS_SECTION_ASM_OP	      \
+  "\t.section\t.init_array,\"aw\",@init_array"
+#  define DTORS_SECTION_ASM_OP	      \
+  "\t.section\t.fini_array,\"aw\",@fini_array"
+#  define INIT_ARRAY_SECTION_ASM_OP   \
+  "\t.section\t.init_array,\"aw\",@init_array"
+#  define FINI_ARRAY_SECTION_ASM_OP   \
+  "\t.section\t.fini_array,\"aw\",@fini_array"
+# endif
+#else
+# define TEXT_SECTION_ASM_OP	      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION P,CODE" : "\t.section P,\"ax\"")
+
+# define CTORS_SECTION_ASM_OP			      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION init_array,CODE" \
+   : "\t.section\t.init_array,\"aw\",@init_array")
+
+# define DTORS_SECTION_ASM_OP			      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION fini_array,CODE" \
+   : "\t.section\t.fini_array,\"aw\",@fini_array")
+
+# define INIT_ARRAY_SECTION_ASM_OP		      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION init_array,CODE" \
+   : "\t.section\t.init_array,\"aw\",@init_array")
+
+# define FINI_ARRAY_SECTION_ASM_OP		      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION fini_array,CODE" \
+   : "\t.section\t.fini_array,\"aw\",@fini_array")
+#endif
+
+#define GLOBAL_ASM_OP 		\
+  (TARGET_AS100_SYNTAX ? "\t.GLB\t" : "\t.global\t")
+#define ASM_COMMENT_START	" ;"
+#define ASM_APP_ON		""
+#define ASM_APP_OFF 		""
+#define LOCAL_LABEL_PREFIX	"L"
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX	"_"
+
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)		\
+  do						\
+    {						\
+      if ((LOG) == 0)				\
+        break;					\
+      if (TARGET_AS100_SYNTAX)			\
+	{					\
+	  if ((LOG) >= 2)			\
+	    fprintf (STREAM, "\t.ALIGN 4\t; %d alignment actually requested\n", 1 << (LOG)); \
+	  else					\
+	    fprintf (STREAM, "\t.ALIGN 2\n");	\
+	}					\
+      else					\
+	fprintf (STREAM, "\t.balign %d\n", 1 << (LOG));	\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, TARGET_AS100_SYNTAX ? "\t.LWORD L%d\n" : "\t.long .L%d\n", \
+	   VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   Note: The local label referenced by the "3b" below is emitted by
+   the tablejump insn.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, TARGET_AS100_SYNTAX \
+	   ? "\t.LWORD L%d - ?-\n" : "\t.long .L%d - 1b\n", VALUE)
+
+#define ASM_OUTPUT_SIZE_DIRECTIVE(STREAM, NAME, SIZE)			\
+  do									\
+    {									\
+      HOST_WIDE_INT size_ = (SIZE);					\
+									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+									\
+      fputs (SIZE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fprintf (STREAM, ", " HOST_WIDE_INT_PRINT_DEC "\n", size_);	\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_MEASURED_SIZE(STREAM, NAME)				\
+  do									\
+    {									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+      fputs (SIZE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fputs (", .-", STREAM);						\
+      assemble_name (STREAM, NAME);					\
+      putc ('\n', STREAM);						\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE)			\
+  do									\
+    {									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+      fputs (TYPE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fputs (", ", STREAM);						\
+      fprintf (STREAM, TYPE_OPERAND_FMT, TYPE);				\
+      putc ('\n', STREAM);						\
+    }									\
+  while (0)
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
+  do								\
+    {								\
+      sprintf (LABEL, TARGET_AS100_SYNTAX ? "*%s%u" : "*.%s%u", \
+	       PREFIX, (unsigned) (NUM));			\
+    }								\
+  while (0)
+
+#undef  ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)			\
+  do								\
+    {								\
+      if (TARGET_AS100_SYNTAX)					\
+	targetm.asm_out.globalize_label (FILE, NAME);		\
+      default_elf_asm_output_external (FILE, DECL, NAME);	\
+    }								\
+  while (0)
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (TARGET_AS100_SYNTAX)						\
+	{								\
+	  fprintf ((FILE), "\t.GLB\t");					\
+	  assemble_name ((FILE), (NAME));				\
+	  fprintf ((FILE), "\n");					\
+          assemble_name ((FILE), (NAME));				\
+	  switch ((ALIGN) / BITS_PER_UNIT)				\
+            {								\
+            case 4:							\
+              fprintf ((FILE), ":\t.BLKL\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE) / 4);					\
+	      break;							\
+            case 2:							\
+              fprintf ((FILE), ":\t.BLKW\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE) / 2);					\
+	      break;							\
+            default:							\
+              fprintf ((FILE), ":\t.BLKB\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE));						\
+	      break;							\
+            }								\
+        }								\
+      else								\
+        {								\
+          fprintf ((FILE), "%s", COMMON_ASM_OP);			\
+          assemble_name ((FILE), (NAME));				\
+          fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",	\
+	           (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+	}								\
+    }									\
+  while (0)
+
+#undef  SKIP_ASM_OP
+#define SKIP_ASM_OP   (TARGET_AS100_SYNTAX ? "\t.BLKB\t" : "\t.zero\t")
+
+#undef  ASM_OUTPUT_LIMITED_STRING
+#define ASM_OUTPUT_LIMITED_STRING(FILE, STR)		\
+  do							\
+    {							\
+      const unsigned char *_limited_str =		\
+	(const unsigned char *) (STR);			\
+      unsigned ch;					\
+							\
+      fprintf ((FILE), TARGET_AS100_SYNTAX 		\
+	       ? "\t.BYTE\t\"" : "\t.string\t\"");	\
+							\
+      for (; (ch = *_limited_str); _limited_str++)	\
+        {						\
+	  int escape;					\
+							\
+	  switch (escape = ESCAPES[ch])			\
+	    {						\
+	    case 0:					\
+	      putc (ch, (FILE));			\
+	      break;					\
+	    case 1:					\
+	      fprintf ((FILE), "\\%03o", ch);		\
+	      break;					\
+	    default:					\
+	      putc ('\\', (FILE));			\
+	      putc (escape, (FILE));			\
+	      break;					\
+	    }						\
+        }						\
+							\
+      fprintf ((FILE), TARGET_AS100_SYNTAX ? "\"\n\t.BYTE\t0\n" : "\"\n");\
+    }							\
+  while (0)
+
+#undef  IDENT_ASM_OP
+#define IDENT_ASM_OP  (TARGET_AS100_SYNTAX \
+		       ? "\t.END\t; Built by: ": "\t.ident\t")
+
+/* For PIC put jump tables into the text section so that the offsets that
+   they contain are always computed between two same-section symbols.  */
+#define JUMP_TABLES_IN_TEXT_SECTION	(flag_pic)
+
+#define PRINT_OPERAND(FILE, X, CODE)		\
+  rx_print_operand (FILE, X, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)	\
+  rx_print_operand_address (FILE, ADDR)
+
+#define CC_NO_CARRY			0400
+#define NOTICE_UPDATE_CC(EXP, INSN)	rx_notice_update_cc (EXP, INSN)
+
+extern int rx_float_compare_mode;
+
+/* This is a version of REG_P that also returns TRUE for SUBREGs.  */
+#define RX_REG_P(rtl) (REG_P (rtl) || GET_CODE (rtl) == SUBREG)
+
+/* Like REG_P except that this macro is true for SET expressions.  */
+#define SET_P(rtl)    (GET_CODE (rtl) == SET)
+
+#define CAN_DEBUG_WITHOUT_FP 1
+
+/* The AS100 assembler does not support .leb128 and .uleb128, but
+   the compiler-build-time configure tests will have enabled their
+   use because GAS supports them.  So default to generating STABS
+   debug information instead of DWARF2 when generating AS100
+   compatible output.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \
+				  ? DBX_DEBUG : DWARF2_DEBUG)
+
+#define INCOMING_FRAME_SP_OFFSET		4
+#define ARG_POINTER_CFA_OFFSET(FNDECL)		4
+#define FRAME_POINTER_CFA_OFFSET(FNDECL)	4
+
+extern int rx_enable_fpu;
+
+/* For some unknown reason LTO compression is not working, at
+   least on my local system.  So set the default compression
+   level to none, for now.
+
+   For an explanation of rx_flag_no_fpu see rx_handle_option().  */
+#define OVERRIDE_OPTIONS			\
+  do						\
+    {						\
+      if (flag_lto_compression_level == -1)	\
+        flag_lto_compression_level = 0;		\
+						\
+      if (rx_enable_fpu == 1)			\
+	set_fast_math_flags (true);		\
+    }						\
+  while (0)
+
+/* This macro is used to decide when RX FPU instructions can be used.  */
+#define ALLOW_RX_FPU_INSNS	((rx_enable_fpu != -1) \
+				 && flag_unsafe_math_optimizations)
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
new file mode 100644
index 00000000000..360f6235558
--- /dev/null
+++ b/gcc/config/rx/rx.md
@@ -0,0 +1,1766 @@
+;;  Machine Description for Renesas RX processors
+;;  Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This code iterator allows all branch instructions to
+;; be generated from a single define_expand template.
+(define_code_iterator most_cond [eq ne gt ge lt le gtu geu ltu leu
+				 unordered ordered ])
+
+;; This code iterator is used for sign- and zero- extensions.
+(define_mode_iterator small_int_modes [(HI "") (QI "")])
+
+;; We do not handle DFmode here because it is either
+;; the same as SFmode, or if -m64bit-doubles is active
+;; then all operations on doubles have to be handled by
+;; library functions.
+(define_mode_iterator register_modes
+  [(SF "ALLOW_RX_FPU_INSNS") (SI "") (HI "") (QI "")])
+
+
+;; Used to map RX condition names to GCC
+;; condition names for builtin instructions.
+(define_code_iterator gcc_conds [eq ne gt ge lt le gtu geu ltu leu
+				unge unlt uneq ltgt])
+(define_code_attr rx_conds [(eq "eq") (ne "ne") (gt "gt") (ge "ge") (lt "lt")
+			    (le "le") (gtu "gtu") (geu "geu") (ltu "ltu")
+			    (leu "leu") (unge "pz") (unlt "n") (uneq "o")
+			    (ltgt "no")])
+
+(define_constants
+  [
+   (SP_REG 0)
+
+   (UNSPEC_LOW_REG         0)
+   (UNSPEC_HIGH_REG        1)
+
+   (UNSPEC_RTE             10)
+   (UNSPEC_RTFI            11)
+   (UNSPEC_NAKED           12)
+   
+   (UNSPEC_MOVSTR          20)
+   (UNSPEC_MOVMEM          21)
+   (UNSPEC_SETMEM          22)
+   (UNSPEC_STRLEN          23)
+   (UNSPEC_CMPSTRN         24)
+
+   (UNSPEC_BUILTIN_BRK     30)
+   (UNSPEC_BUILTIN_CLRPSW  31)
+   (UNSPEC_BUILTIN_INT     32)
+   (UNSPEC_BUILTIN_MACHI   33)
+   (UNSPEC_BUILTIN_MACLO   34)
+   (UNSPEC_BUILTIN_MULHI   35)
+   (UNSPEC_BUILTIN_MULLO   36)
+   (UNSPEC_BUILTIN_MVFACHI 37)
+   (UNSPEC_BUILTIN_MVFACMI 38)
+   (UNSPEC_BUILTIN_MVFC    39)
+   (UNSPEC_BUILTIN_MVFCP   40)
+   (UNSPEC_BUILTIN_MVTACHI 41)
+   (UNSPEC_BUILTIN_MVTACLO 42)
+   (UNSPEC_BUILTIN_MVTC    43)
+   (UNSPEC_BUILTIN_MVTIPL  44)
+   (UNSPEC_BUILTIN_RACW	   45)
+   (UNSPEC_BUILTIN_REVW    46)
+   (UNSPEC_BUILTIN_RMPA	   47)
+   (UNSPEC_BUILTIN_ROUND   48)
+   (UNSPEC_BUILTIN_SAT     49)
+   (UNSPEC_BUILTIN_SETPSW  50)
+   (UNSPEC_BUILTIN_WAIT	   51)
+  ]
+)
+
+;; Condition code settings:
+;;   none     - insn does not affect the condition code bits
+;;   set_zs   - insn sets z,s to usable values;
+;;   set_zso  - insn sets z,s,o to usable values;
+;;   set_zsoc - insn sets z,s,o,c to usable values;
+;;   clobber  - value of cc0 is unknown
+(define_attr "cc" "none,set_zs,set_zso,set_zsoc,clobber" (const_string "none"))
+
+(define_attr "length" "" (const_int 8))
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Pipeline description.
+
+;; The RX only has a single pipeline.  It has five stages (fetch,
+;; decode, execute, memory access, writeback) each of which normally
+;; takes a single CPU clock cycle.
+
+;; The timings attribute consists of two numbers, the first is the
+;; throughput, which is the number of cycles the instruction takes
+;; to execute and generate a result.  The second is the latency
+;; which is the effective number of cycles the instruction takes to
+;; execute if its result is used by the following instruction.  The
+;; latency is always greater than or equal to the throughput.
+;; These values were taken from tables 2.13 and 2.14 in section 2.8
+;; of the RX610 Group Hardware Manual v0.11
+
+;; Note - it would be nice to use strings rather than integers for
+;; the possible values of this attribute, so that we can have the
+;; gcc build mechanism check for values that are not supported by
+;; the reservations below.  But this will not work because the code
+;; in rx_adjust_sched_cost() needs integers not strings.
+
+(define_attr "timings" "" (const_int 11))
+
+(define_automaton "pipelining")
+(define_cpu_unit "throughput" "pipelining")
+
+(define_insn_reservation "throughput__1_latency__1"  1
+  (eq_attr "timings" "11") "throughput")
+(define_insn_reservation "throughput__1_latency__2"  2
+  (eq_attr "timings" "12") "throughput,nothing")
+(define_insn_reservation "throughput__2_latency__2"  1
+  (eq_attr "timings" "22") "throughput*2")
+(define_insn_reservation "throughput__3_latency__3"  1
+  (eq_attr "timings" "33") "throughput*3")
+(define_insn_reservation "throughput__3_latency__4"  2
+  (eq_attr "timings" "34") "throughput*3,nothing")
+(define_insn_reservation "throughput__4_latency__4"  1
+  (eq_attr "timings" "44") "throughput*4")
+(define_insn_reservation "throughput__4_latency__5"  2
+  (eq_attr "timings" "45") "throughput*4,nothing")
+(define_insn_reservation "throughput__5_latency__5"  1
+  (eq_attr "timings" "55") "throughput*5")
+(define_insn_reservation "throughput__5_latency__6"  2
+  (eq_attr "timings" "56") "throughput*5,nothing")
+(define_insn_reservation "throughput__6_latency__6"  1
+  (eq_attr "timings" "66") "throughput*6")
+(define_insn_reservation "throughput_10_latency_10"  1
+  (eq_attr "timings" "1010") "throughput*10")
+(define_insn_reservation "throughput_11_latency_11"  1
+  (eq_attr "timings" "1111") "throughput*11")
+(define_insn_reservation "throughput_16_latency_16"  1
+  (eq_attr "timings" "1616") "throughput*16")
+(define_insn_reservation "throughput_18_latency_18"  1
+  (eq_attr "timings" "1818") "throughput*18")
+
+;; Comparisons
+
+(define_expand "cbranchsi4"
+  [(set (cc0) (compare:CC (match_operand:SI 1 "register_operand")
+			  (match_operand:SI 2 "rx_source_operand")))
+   (set (pc)
+	(if_then_else (match_operator:SI  0 "comparison_operator"
+					  [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  ""
+)
+
+(define_expand "cbranchsf4"
+  [(set (cc0) (compare:CC (match_operand:SF 1 "register_operand")
+			  (match_operand:SF 2 "rx_source_operand")))
+   (set (pc)
+	(if_then_else (match_operator:SI  0 "comparison_operator"
+					  [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  "ALLOW_RX_FPU_INSNS && ! flag_non_call_exceptions"
+  ""
+)
+
+;; The TST instruction is not used as it does not set the Carry flag,
+;; so for example, the LessThan comparison cannot be tested.
+;;
+;; (define_insn "tstsi"
+;;   [(set (cc0)
+;;         (match_operand:SI 0 "rx_source_operand"  "r,i,Q")))]
+;;   ""
+;;   {
+;;     rx_float_compare_mode = false;
+;;     return "tst\t%Q0";
+;;   }
+;;   [(set_attr "cc" "set_zs")
+;;    (set_attr "timings" "11,11,33")
+;;    (set_attr "length" "3,7,6")]
+;; )
+
+(define_insn "cmpsi"
+  [(set (cc0) (compare:CC
+	       (match_operand:SI 0 "register_operand"  "r,r,r,r,r,r,r")
+	       (match_operand:SI 1 "rx_source_operand"
+				 "r,Uint04,Int08,Sint16,Sint24,i,Q")))]
+  ""
+  {
+    rx_float_compare_mode = false;
+    return "cmp\t%Q1, %Q0";
+  }
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "timings" "11,11,11,11,11,11,33")
+   (set_attr "length"  "2,2,3,4,5,6,5")]
+)
+
+;; This pattern is disabled when -fnon-call-exceptions is active because
+;; it could generate a floating point exception, which would introduce an
+;; edge into the flow graph between this insn and the conditional branch
+;; insn to follow, thus breaking the cc0 relationship.  Run the g++ test
+;; g++.dg/eh/080514-1.C to see this happen.
+(define_insn "cmpsf"
+  [(set (cc0)
+	(compare:CC (match_operand:SF 0 "register_operand"  "r,r,r")
+		    (match_operand:SF 1 "rx_source_operand" "r,i,Q")))]
+  "ALLOW_RX_FPU_INSNS && ! flag_non_call_exceptions"
+  {
+    rx_float_compare_mode = true;
+    return "fcmp\t%1, %0";
+  }
+  [(set_attr "cc" "set_zso")
+   (set_attr "timings" "11,11,33")
+   (set_attr "length" "3,7,5")]
+)
+
+;; Flow Control Instructions:
+
+(define_expand "b<code>"
+  [(set (pc)
+        (if_then_else (most_cond (cc0) (const_int 0))
+                      (label_ref (match_operand 0))
+                      (pc)))]
+  ""
+  ""
+)
+
+(define_insn "*conditional_branch"
+  [(set (pc)
+	(if_then_else (match_operator           1 "comparison_operator"
+						[(cc0) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  {
+    return rx_gen_cond_branch_template (operands[1], false);
+  }
+  [(set_attr "length" "8")    ;; This length is wrong, but it is
+                              ;; too hard to compute statically.
+   (set_attr "timings" "33")  ;; The timing assumes that the branch is taken.
+   (set_attr "cc" "clobber")] ;; FIXME: This clobber is wrong.
+)
+
+(define_insn "*reveresed_conditional_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(cc0) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  {
+    return rx_gen_cond_branch_template (operands[1], true);
+  }
+  [(set_attr "length" "8")    ;; This length is wrong, but it is
+                              ;; too hard to compute statically.
+   (set_attr "timings" "33")  ;; The timing assumes that the branch is taken.
+   (set_attr "cc" "clobber")] ;; FIXME: This clobber is wrong.
+)
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "bra\t%0"
+  [(set_attr "length" "4")
+   (set_attr "timings" "33")
+   (set_attr "cc" "clobber")] ;; FIXME: This clobber is wrong.
+)
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "33")
+   (set_attr "cc" "clobber")] ;; FIXME: This clobber is wrong.
+)
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI     0 "register_operand" "r"))
+   (use (label_ref (match_operand  1 "" "")))]
+  ""
+  { return flag_pic ? (TARGET_AS100_SYNTAX ? "\n?:\tbra\t%0"
+					   : "\n1:\tbra\t%0")
+	                                   : "jmp\t%0";
+  }
+  [(set_attr "cc" "clobber") ;; FIXME: This clobber is wrong.
+   (set_attr "timings" "33")
+   (set_attr "length" "2")]
+)
+
+(define_insn "simple_return"
+  [(return)]
+  ""
+  "rts"
+  [(set_attr "length" "1")
+   (set_attr "timings" "55")]
+)
+
+(define_insn "deallocate_and_return"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 0 "immediate_operand" "i")))
+   (return)]
+  ""
+  "rtsd\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "55")]
+)
+
+(define_insn "pop_and_return"
+  [(match_parallel                    1 "rx_rtsd_vector"
+		   [(set:SI (reg:SI SP_REG)
+			    (plus:SI (reg:SI SP_REG)
+				     (match_operand:SI
+				      0 "const_int_operand" "n")))])]
+  "reload_completed"
+  {
+    rx_emit_stack_popm (operands, false);
+    return "";
+  }
+  [(set_attr "length" "3")
+   (set_attr "timings" "56")]
+)
+
+(define_insn "fast_interrupt_return"
+  [(unspec_volatile [(return)] UNSPEC_RTFI) ]
+  ""
+  "rtfi"
+  [(set_attr "length" "2")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "exception_return"
+  [(unspec_volatile [(return)] UNSPEC_RTE) ]
+  ""
+  "rte"
+  [(set_attr "length" "2")
+   (set_attr "timings" "66")]
+)
+
+(define_insn "naked_return"
+  [(unspec_volatile [(return)] UNSPEC_NAKED) ]
+  ""
+  "; Naked function: epilogue provided by programmer."
+)
+
+
+;; Note - the following set of patterns do not use the "memory_operand"
+;; predicate or an "m" constraint because we do not allow symbol_refs
+;; or label_refs as legitmate memory addresses.  This matches the
+;; behaviour of most of the RX instructions.  Only the call/branch
+;; instructions are allowed to refer to symbols/labels directly.
+;; The call operands are in QImode because that is the value of
+;; FUNCTION_MODE
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "general_operand")
+	 (match_operand:SI 1 "general_operand"))]
+  ""
+  {
+    rtx dest = XEXP (operands[0], 0);
+
+    if (! rx_call_operand (dest, Pmode))
+      dest = force_reg (Pmode, dest);
+    emit_call_insn (gen_call_internal (dest, operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "call_internal"
+  [(call (mem:QI (match_operand:SI 0 "rx_call_operand" "r,Symbol"))
+	 (match_operand:SI         1 "general_operand" "g,g"))]
+  ""
+  "@
+  jsr\t%A0
+  bsr\t%A0"
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "33")]
+)
+
+(define_expand "call_value"
+  [(set (match_operand          0 "register_operand")
+	(call (match_operand:QI 1 "general_operand")
+	      (match_operand:SI 2 "general_operand")))]
+  ""
+  {
+    rtx dest = XEXP (operands[1], 0);
+
+    if (! rx_call_operand (dest, Pmode))
+      dest = force_reg (Pmode, dest);
+    emit_call_insn (gen_call_value_internal (operands[0], dest, operands[2]));
+    DONE;
+  }
+)
+
+(define_insn "call_value_internal"
+  [(set (match_operand                  0 "register_operand" "=r,r")
+	(call (mem:QI (match_operand:SI 1 "rx_call_operand"   "r,Symbol"))
+	      (match_operand:SI         2 "general_operand"   "g,g")))]
+  ""
+  "@
+  jsr\t%A1
+  bsr\t%A1"
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "sibcall"
+ [(call (mem:QI (match_operand:SI 0 "rx_symbolic_call_operand" "Symbol"))
+	(match_operand:SI         1 "general_operand"          "g"))
+  (return)
+  (use (match_operand             2 "" ""))]
+  ""
+  "bra\t%A0"
+  [(set_attr "length" "4")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "sibcall_value"
+ [(set (match_operand                  0 "register_operand"         "=r")
+       (call (mem:QI (match_operand:SI 1 "rx_symbolic_call_operand" "Symbol"))
+	     (match_operand:SI         2 "general_operand"          "g")))
+  (return)
+  (use (match_operand                  3 "" ""))]
+  ""
+  "bra\t%A1"
+  [(set_attr "length" "4")
+   (set_attr "timings" "33")]
+)
+
+;; Function Prologue/Epilogue Instructions
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "rx_expand_prologue (); DONE;"
+)
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "rx_expand_epilogue (false); DONE;"
+)
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+  "rx_expand_epilogue (true); DONE;"
+)
+
+;; Move Instructions
+
+;; Note - we do not allow memory to memory moves, even though the ISA
+;; supports them.  The reason is that the conditions on such moves are
+;; too restrictive, specifically the source addressing mode is limited
+;; by the destination addressing mode and vice versa.  (For example it
+;; is not possible to use indexed register indirect addressing for one
+;; of the operands if the other operand is anything other than a register,
+;; but it is possible to use register relative addressing when the other
+;; operand also uses register relative or register indirect addressing).
+;;
+;; GCC does not support computing legitimate addresses based on the
+;; nature of other operands involved in the instruction, and reload is
+;; not smart enough to cope with a whole variety of different memory
+;; addressing constraints, so it is simpler and safer to just refuse
+;; to support memory to memory moves.
+
+(define_expand "mov<register_modes:mode>"
+  [(set (match_operand:register_modes 0 "general_operand")
+	(match_operand:register_modes 1 "general_operand"))]
+  ""
+  {
+    if (MEM_P (operand0) && MEM_P (operand1))
+      operands[1] = copy_to_mode_reg (<register_modes:MODE>mode, operand1);
+  }
+)
+
+(define_insn "*mov<register_modes:mode>_internal"
+  [(set (match_operand:register_modes
+	 0 "nonimmediate_operand" "=r,r,r,r,r,r,m,Q,Q,Q,Q")
+	(match_operand:register_modes
+	 1 "general_operand" "Int08,Sint16,Sint24,i,r,m,r,Int08,Sint16,Sint24,i"))]
+  ""
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "3,4,5,6,2,4,6,5,6,7,8")
+   (set_attr "timings" "11,11,11,11,11,12,11,11,11,11,11")]
+)
+
+(define_insn "extend<small_int_modes:mode>si2"
+  [(set (match_operand:SI 0 "register_operand"    "=r,r")
+        (sign_extend:SI (match_operand:small_int_modes
+			  1 "nonimmediate_operand" "r,m")))]
+  ""
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "2,6")
+   (set_attr "timings" "11,12")]
+)
+
+(define_insn "zero_extend<small_int_modes:mode>si2"
+  [(set (match_operand:SI 0 "register_operand"     "=r,r")
+        (zero_extend:SI (match_operand:small_int_modes
+			  1 "nonimmediate_operand"  "r,m")))]
+  ""
+  { return rx_gen_move_template (operands, true); }
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "11,12")]
+)
+
+(define_insn "stack_push"
+  [(set:SI (reg:SI SP_REG)
+	   (minus:SI (reg:SI SP_REG)
+		     (const_int 4)))
+   (set:SI (mem:SI (reg:SI SP_REG))
+	   (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "push.l\t%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "stack_pushm"
+  [(match_parallel                     1 "rx_store_multiple_vector"
+		   [(set:SI (reg:SI SP_REG)
+			    (minus:SI (reg:SI SP_REG)
+				      (match_operand:SI
+				       0 "const_int_operand" "n")))])]
+  "reload_completed"
+  {
+    rx_emit_stack_pushm (operands);
+    return "";
+  }
+  [(set_attr "length" "2")
+   (set_attr "timings" "44")] ;; The timing is a guesstimate average timing.
+)
+
+(define_insn "stack_pop"
+  [(set:SI (match_operand:SI 0 "register_operand" "=r")
+	   (mem:SI (reg:SI SP_REG)))
+   (set:SI (reg:SI SP_REG)
+	   (plus:SI (reg:SI SP_REG)
+		    (const_int 4)))]
+  ""
+  "pop\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "12")]
+)
+
+(define_insn "stack_popm"
+  [(match_parallel                     1 "rx_load_multiple_vector"
+		   [(set:SI (reg:SI SP_REG)
+			    (plus:SI (reg:SI SP_REG)
+				     (match_operand:SI
+				      0 "const_int_operand" "n")))])]
+  "reload_completed"
+  {
+    rx_emit_stack_popm (operands, true);
+    return "";
+  }
+  [(set_attr "length" "2")
+   (set_attr "timings" "45")] ;; The timing is a guesstimate average timing.
+)
+
+(define_insn "cstoresi4"
+  [(set (match_operand:SI  0 "register_operand" "=r,r,r,r,r,r,r")
+	(match_operator:SI
+	 1 "comparison_operator"
+	 [(match_operand:SI
+	   2 "register_operand"  "r,r,r,r,r,r,r")
+	  (match_operand:SI
+	   3 "rx_source_operand" "r,Uint04,Int08,Sint16,Sint24,i,Q")]))]
+  ""
+  {
+    rx_float_compare_mode = false;
+    return "cmp\t%Q3, %Q2\n\tsc%B1.L\t%0";
+  }
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "timings" "22,22,22,22,22,22,44")
+   (set_attr "length"  "5,5,6,7,8,9,8")]
+)
+
+(define_expand "movsicc"
+  [(set (match_operand:SI                   0 "register_operand")
+        (if_then_else:SI (match_operand:SI 1 "comparison_operator")
+			 (match_operand:SI  2 "nonmemory_operand")
+			 (match_operand:SI  3 "immediate_operand")))]
+  ""
+  {
+    if (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE)
+      FAIL;
+    if (! CONST_INT_P (operands[3]))
+      FAIL;
+  }
+)
+
+(define_insn "*movsieq"
+  [(set (match_operand:SI     0 "register_operand" "=r,r,r")
+	(if_then_else:SI (eq (match_operand:SI
+			      3 "register_operand"  "r,r,r")
+			     (match_operand:SI
+			      4 "rx_source_operand" "riQ,riQ,riQ"))
+			 (match_operand:SI
+			  1 "nonmemory_operand"     "0,i,r")
+			 (match_operand:SI
+			  2 "immediate_operand"     "i,i,i")))]
+  ""
+  "@
+  cmp\t%Q4, %Q3\n\tstnz\t%2, %0
+  cmp\t%Q4, %Q3\n\tmov.l\t%2, %0\n\tstz\t%1, %0
+  cmp\t%Q4, %Q3\n\tmov.l\t%1, %0\n\tstnz\t%2, %0"
+  [(set_attr "cc"      "set_zsoc")
+   (set_attr "length"  "13,19,15")
+   (set_attr "timings" "22,33,33")]
+)
+
+(define_insn "*movsine"
+  [(set (match_operand:SI                      0 "register_operand" "=r,r,r")
+	(if_then_else:SI (ne (match_operand:SI 3 "register_operand"  "r,r,r")
+			     (match_operand:SI 4 "rx_source_operand" "riQ,riQ,riQ"))
+			 (match_operand:SI     1 "nonmemory_operand" "0,i,r")
+			 (match_operand:SI     2 "immediate_operand" "i,i,i")))]
+  ""
+  "@
+  cmp\t%Q4, %Q3\n\tstz\t%2, %0
+  cmp\t%Q4, %Q3\n\tmov.l\t%2, %0\n\tstnz\t%1, %0
+  cmp\t%Q4, %Q3\n\tmov.l\t%1, %0\n\tstz\t%2, %0"
+  [(set_attr "cc"      "set_zsoc")
+   (set_attr "length"  "13,19,15")
+   (set_attr "timings" "22,33,33")]
+)
+
+;; Arithmetic Instructions
+
+(define_insn "abssi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (abs:SI (match_operand:SI 1 "register_operand"  "0,r")))]
+  ""
+  "@
+  abs\t%0
+  abs\t%1, %0"
+  [(set_attr "cc" "set_zso")
+   (set_attr "length" "2,3")]
+)
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand"
+			  "=r,r,r,r,r,r,r,r,r,r,r,r")
+	(plus:SI (match_operand:SI
+		  1 "register_operand"
+		  "%0,0,0,0,0,0,r,r,r,r,r,0")
+		 (match_operand:SI
+		  2 "rx_source_operand"
+		  "r,Uint04,Sint08,Sint16,Sint24,i,r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "@
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%Q2, %0"
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "timings" "11,11,11,11,11,11,11,11,11,11,11,33")
+   (set_attr "length" "2,2,3,4,5,6,3,3,4,5,6,5")]
+)
+
+(define_insn "adddi3"
+  [(set (match_operand:DI          0 "register_operand" "=r,r,r,r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,0,0")
+		 (match_operand:DI 2 "rx_source_operand"
+				   "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "add\t%L2, %L0\n\tadc\t%H2, %H0"
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "timings" "22,22,22,22,22,44")
+   (set_attr "length" "5,7,9,11,13,11")]
+)
+
+(define_insn "andsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,0,Q")
+		(match_operand:SI
+		 2 "rx_source_operand"
+		 "r,Uint04,Sint08,Sint16,Sint24,i,r,Q,0")))]
+  ""
+  "@
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %1, %0
+  and\t%Q2, %0
+  and\t%Q1, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "11,11,11,11,11,11,11,33,33")
+   (set_attr "length" "2,2,3,4,5,6,3,5,5")]
+)
+
+;; Byte swap (single 32-bit value).
+(define_insn "bswapsi2"
+  [(set (match_operand:SI           0 "register_operand" "+r")
+	(bswap:SI (match_operand:SI 1 "register_operand"  "r")))]
+  ""
+  "revl\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Byte swap (single 16-bit value).  Note - we ignore the swapping of the high 16-bits.
+(define_insn "bswaphi2"
+  [(set (match_operand:HI           0 "register_operand" "+r")
+	(bswap:HI (match_operand:HI 1 "register_operand"  "r")))]
+  ""
+  "revw\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "divsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(div:SI (match_operand:SI 1 "register_operand"  "0,0,0,0,0,0")
+		(match_operand:SI
+		 2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "div\t%Q2, %0"
+  [(set_attr "cc" "clobber")
+   (set_attr "timings" "1111") ;; Strictly speaking the timing should be
+                               ;; 2222, but that is a worst case sceanario.
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r")
+	(udiv:SI (match_operand:SI 1 "register_operand"   "0,0,0,0,0,0")
+		 (match_operand:SI
+		  2 "rx_source_operand"  "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "divu\t%Q2, %0"
+  [(set_attr "cc" "clobber")
+   (set_attr "timings" "1010") ;; Strictly speaking the timing should be
+                               ;; 2020, but that is a worst case sceanario.
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+;; Note - these patterns are suppressed in big-endian mode because they
+;; generate a little endian result.  ie the most significant word of the
+;; result is placed in the higher numbered register of the destination
+;; register pair.
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI          0 "register_operand"  "=r,r,r,r,r,r")
+        (mult:DI (sign_extend:DI (match_operand:SI
+				  1 "register_operand"  "%0,0,0,0,0,0"))
+                 (sign_extend:DI (match_operand:SI
+				  2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q"))))]
+  "! TARGET_BIG_ENDIAN_DATA"
+  "@
+  emul\t%Q2, %0
+  emul\t%Q2, %0
+  emul\t%Q2, %0
+  emul\t%Q2, %0
+  emul\t%Q2, %0
+  emul\t%Q2, %0"
+  [(set_attr "length" "3,4,5,6,7,6")   
+   (set_attr "timings" "22,22,22,22,22,44")]
+)
+
+;; See comment for mulsidi3.
+;; Note - the zero_extends are to distinguish this pattern from the
+;; mulsidi3 pattern.  Immediate mode addressing is not supported
+;; because gcc cannot handle the expression: (zero_extend (const_int)).
+(define_insn "umulsidi3"
+  [(set (match_operand:DI                          0 "register_operand"
+						   "=r,r")
+        (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand"
+						   "%0,0"))
+                 (zero_extend:DI (match_operand:SI 2 "rx_compare_operand"
+						   "r,Q"))))]
+  "! TARGET_BIG_ENDIAN_DATA"
+  "@
+  emulu\t%Q2, %0
+  emulu\t%Q2, %0"
+  [(set_attr "length" "3,6")
+   (set_attr "timings" "22,44")]
+)
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smax:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+		 (match_operand:SI 2 "rx_source_operand"
+				   "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "max\t%Q2, %0"
+  [(set_attr "length" "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r,r")
+	(smin:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r")
+		 (match_operand:SI 2 "rx_source_operand"
+				   "r,Sint08,Sint16,Sint24,i,Q,r")))]
+  ""
+  "@
+  min\t%Q2, %0
+  min\t%Q2, %0
+  min\t%Q2, %0
+  min\t%Q2, %0
+  min\t%Q2, %0
+  min\t%Q2, %0
+  mov.l\t%1,%0\n\tmin\t%Q2, %0"
+  [(set_attr "length"  "3,4,5,6,7,6,5")
+   (set_attr "timings" "11,11,11,11,11,33,22")]
+)
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+        (mult:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,0,Q,r")
+                 (match_operand:SI 2 "rx_source_operand"
+				   "r,Uint04,Sint08,Sint16,Sint24,i,Q,0,r")))]
+  ""
+  "@
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%Q1, %0
+  mul\t%Q2, %1, %0"
+  [(set_attr "length"  "2,2,3,4,5,6,5,5,3")
+   (set_attr "timings" "11,11,11,11,11,11,33,33,11")]
+)
+
+(define_insn "negsi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (neg:SI (match_operand:SI 1 "register_operand"  "0,r")))]
+  ;; The NEG instruction does not comply with -fwrapv semantics.
+  ;; See gcc.c-torture/execute/pr22493-1.c for an example of this.
+  "! flag_wrapv"
+  "@
+  neg\t%0
+  neg\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+	(not:SI (match_operand:SI 1 "register_operand"  "0,r")))]
+  ""
+  "@
+  not\t%0
+  not\t%1, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "length" "2,3")]
+)
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,0,Q")
+	        (match_operand:SI 2 "rx_source_operand"
+				  "r,Uint04,Sint08,Sint16,Sint24,i,r,Q,0")))]
+  ""
+  "@
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %1, %0
+  or\t%Q2, %0
+  or\t%Q1, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "11,11,11,11,11,11,11,33,33")
+   (set_attr "length"  "2,2,3,4,5,6,3,5,5")]
+)
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand"  "0")
+		   (match_operand:SI 2 "rx_shift_operand" "rn")))]
+  ""
+  "rotl\t%2, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "length" "3")]
+)
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r")
+	(rotatert:SI (match_operand:SI 1 "register_operand"  "0")
+		     (match_operand:SI 2 "rx_shift_operand" "rn")))]
+  ""
+  "rotr\t%2, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "length" "3")]
+)
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))]
+  ""
+  "@
+  shar\t%2, %0
+  shar\t%2, %0
+  shar\t%2, %1, %0"
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "length" "3,2,3")]
+)
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))]
+  ""
+  "@
+  shlr\t%2, %0
+  shlr\t%2, %0
+  shlr\t%2, %1, %0"
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "length" "3,2,3")]
+)
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+	           (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))]
+  ""
+  "@
+  shll\t%2, %0
+  shll\t%2, %0
+  shll\t%2, %1, %0"
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "length" "3,2,3")]
+)
+
+(define_insn "subsi3"
+  [(set (match_operand:SI           0 "register_operand" "=r,r,r,r,r")
+	(minus:SI (match_operand:SI 1 "register_operand"  "0,0,0,r,0")
+		  (match_operand:SI 2 "rx_source_operand" "r,Uint04,n,r,Q")))]
+  ""
+  "@
+  sub\t%2, %0
+  sub\t%2, %0
+  add\t%N2, %0
+  sub\t%2, %1, %0
+  sub\t%Q2, %0"
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "timings" "11,11,11,11,33")
+   (set_attr "length" "2,2,6,3,5")]
+)
+
+(define_insn "subdi3"
+  [(set (match_operand:DI           0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "register_operand"  "0,0")
+		  (match_operand:DI 2 "rx_source_operand" "r,Q")))]
+  ""
+  "sub\t%L2, %L0\n\tsbb\t%H2, %H0"
+  [(set_attr "cc" "set_zsoc")
+   (set_attr "timings" "22,44")
+   (set_attr "length" "5,11")]
+)
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+	        (match_operand:SI 2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "@
+  xor\t%Q2, %0
+  xor\t%Q2, %0
+  xor\t%Q2, %0
+  xor\t%Q2, %0
+  xor\t%Q2, %0
+  xor\t%Q2, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+;; Floating Point Instructions
+
+(define_insn "addsf3"
+  [(set (match_operand:SF          0 "register_operand"  "=r,r,r")
+	(plus:SF (match_operand:SF 1 "register_operand"  "%0,0,0")
+		 (match_operand:SF 2 "rx_source_operand"  "r,F,Q")))]
+  "ALLOW_RX_FPU_INSNS"
+  "@
+  fadd\t%2, %0
+  fadd\t%2, %0
+  fadd\t%2, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "44,44,66")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "divsf3"
+  [(set (match_operand:SF         0 "register_operand" "=r,r,r")
+	(div:SF (match_operand:SF 1 "register_operand"  "0,0,0")
+		(match_operand:SF 2 "rx_source_operand" "r,F,Q")))]
+  "ALLOW_RX_FPU_INSNS"
+  "fdiv\t%2, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "1616,1616,1818")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF          0 "register_operand" "=r,r,r")
+	(mult:SF (match_operand:SF 1 "register_operand" "%0,0,0")
+		(match_operand:SF  2 "rx_source_operand" "r,F,Q")))]
+  "ALLOW_RX_FPU_INSNS"
+  "@
+  fmul\t%2, %0
+  fmul\t%2, %0
+  fmul\t%2, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "33,33,55")
+   (set_attr "length"  "3,7,5")]
+)
+
+(define_insn "subsf3"
+  [(set (match_operand:SF           0 "register_operand" "=r,r,r")
+	(minus:SF (match_operand:SF 1 "register_operand"  "0,0,0")
+		  (match_operand:SF 2 "rx_source_operand" "r,F,Q")))]
+  "ALLOW_RX_FPU_INSNS"
+  "fsub\t%2, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "44,44,66")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r")
+	(fix:SI (match_operand:SF 1 "rx_compare_operand" "r,Q")))]
+  "ALLOW_RX_FPU_INSNS"
+  "ftoi\t%1, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "22,44")
+   (set_attr "length" "3,5")]
+)
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF           0 "register_operand"  "=r,r")
+	(float:SF (match_operand:SI 1 "rx_compare_operand" "r,Q")))]
+  "ALLOW_RX_FPU_INSNS"
+  "itof\t%1, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "22,44")
+   (set_attr "length" "3,6")]
+)
+
+;; Bit manipulation instructions.
+;; Note - there are two versions of each pattern because the memory
+;; accessing versions use QImode whilst the register accessing
+;; versions use SImode.
+;; The peephole are here because the combiner only looks at a maximum
+;; of three instructions at a time.
+
+(define_insn "bitset"
+  [(set:SI (match_operand:SI 0 "register_operand" "+r")
+	   (ior:SI (match_operand:SI 1 "register_operand" "0")
+		   (ashift:SI (const_int 1)
+			      (match_operand:SI 2 "nonmemory_operand" "ri"))))]
+  ""
+  "bset\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "bitset_in_memory"
+  [(set:QI (match_operand:QI 0 "memory_operand" "+m")
+	   (ior:QI (match_operand:QI 1 "memory_operand" "0")
+		   (ashift:QI (const_int 1)
+			      (match_operand:QI 2 "nonmemory_operand" "ri"))))]
+  ""
+  "bset\t%2, %0.B"
+  [(set_attr "length" "3")
+   (set_attr "timings" "34")]
+)
+
+;; (set (reg A) (const_int 1))
+;; (set (reg A) (ashift (reg A) (reg B)))
+;; (set (reg C) (ior (reg A) (reg C)))
+(define_peephole2
+  [(set:SI (match_operand:SI 0 "register_operand" "")
+	   (const_int 1))
+   (set:SI (match_dup 0)
+	   (ashift:SI (match_dup 0)
+		      (match_operand:SI 1 "register_operand" "")))
+   (set:SI (match_operand:SI 2 "register_operand" "")
+	   (ior:SI (match_dup 0)
+		   (match_dup 2)))]
+  "dead_or_set_p (insn, operands[0])"
+  [(set:SI (match_dup 2)
+	   (ior:SI (match_dup 2)
+		   (ashift:SI (const_int 1)
+			      (match_dup 1))))]
+)
+  
+;; (set (reg A) (const_int 1))
+;; (set (reg A) (ashift (reg A) (reg B)))
+;; (set (reg A) (ior (reg A) (reg C)))
+;; (set (reg C) (reg A)
+(define_peephole2
+  [(set:SI (match_operand:SI 0 "register_operand" "")
+	   (const_int 1))
+   (set:SI (match_dup 0)
+	   (ashift:SI (match_dup 0)
+		      (match_operand:SI 1 "register_operand" "")))
+   (set:SI (match_dup 0)
+	   (ior:SI (match_dup 0)
+		   (match_operand:SI 2 "register_operand" "")))
+   (set:SI (match_dup 2) (match_dup 0))]
+  "dead_or_set_p (insn, operands[0])"
+  [(set:SI (match_dup 2)
+	   (ior:SI (match_dup 2)
+		   (ashift:SI (const_int 1)
+			      (match_dup 1))))]
+)
+  
+(define_insn "bitinvert"
+  [(set:SI (match_operand:SI 0 "register_operand" "+r")
+	   (xor:SI (match_operand:SI 1 "register_operand" "0")
+		   (ashift:SI (const_int 1)
+			      (match_operand:SI 2 "nonmemory_operand" "ri"))))]
+  ""
+  "bnot\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "bitinvert_in_memory"
+  [(set:QI (match_operand:QI 0 "memory_operand" "+m")
+	   (xor:QI (match_operand:QI 1 "register_operand" "0")
+		   (ashift:QI (const_int 1)
+			      (match_operand:QI 2 "nonmemory_operand" "ri"))))]
+  ""
+  "bnot\t%2, %0.B"
+  [(set_attr "length" "5")
+   (set_attr "timings" "33")]
+)
+
+;; (set (reg A) (const_int 1))
+;; (set (reg A) (ashift (reg A) (reg B)))
+;; (set (reg C) (xor (reg A) (reg C)))
+(define_peephole2
+  [(set:SI (match_operand:SI 0 "register_operand" "")
+	   (const_int 1))
+   (set:SI (match_dup 0)
+	   (ashift:SI (match_dup 0)
+		      (match_operand:SI 1 "register_operand" "")))
+   (set:SI (match_operand:SI 2 "register_operand" "")
+	   (xor:SI (match_dup 0)
+		   (match_dup 2)))]
+  "dead_or_set_p (insn, operands[0])"
+  [(set:SI (match_dup 2)
+	   (xor:SI (match_dup 2)
+		   (ashift:SI (const_int 1)
+			      (match_dup 1))))]
+  ""
+)
+  
+;; (set (reg A) (const_int 1))
+;; (set (reg A) (ashift (reg A) (reg B)))
+;; (set (reg A) (xor (reg A) (reg C)))
+;; (set (reg C) (reg A))
+(define_peephole2
+  [(set:SI (match_operand:SI 0 "register_operand" "")
+	   (const_int 1))
+   (set:SI (match_dup 0)
+	   (ashift:SI (match_dup 0)
+		      (match_operand:SI 1 "register_operand" "")))
+   (set:SI (match_dup 0)
+	   (xor:SI (match_dup 0)
+		   (match_operand:SI 2 "register_operand" "")))
+   (set:SI (match_dup 2) (match_dup 0))]
+  "dead_or_set_p (insn, operands[0])"
+  [(set:SI (match_dup 2)
+	   (xor:SI (match_dup 2)
+		   (ashift:SI (const_int 1)
+			      (match_dup 1))))]
+  ""
+)
+
+(define_insn "bitclr"
+  [(set:SI (match_operand:SI 0 "register_operand" "+r")
+	   (and:SI (match_operand:SI 1 "register_operand" "0")
+		   (not:SI (ashift:SI (const_int 1)
+				      (match_operand:SI 2 "nonmemory_operand" "ri")))))]
+  ""
+  "bclr\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "bitclr_in_memory"
+  [(set:QI (match_operand:QI 0 "memory_operand" "+m")
+	   (and:QI (match_operand:QI 1 "memory_operand" "0")
+		   (not:QI (ashift:QI (const_int 1)
+				      (match_operand:QI 2 "nonmemory_operand" "ri")))))]
+  ""
+  "bclr\t%2, %0.B"
+  [(set_attr "length" "3")
+   (set_attr "timings" "34")]
+)
+
+;; (set (reg A) (const_int -2))
+;; (set (reg A) (rotate (reg A) (reg B)))
+;; (set (reg C) (and (reg A) (reg C)))
+(define_peephole2
+  [(set:SI (match_operand:SI 0 "register_operand" "")
+	   (const_int -2))
+   (set:SI (match_dup 0)
+	   (rotate:SI (match_dup 0)
+		      (match_operand:SI 1 "register_operand" "")))
+   (set:SI (match_operand:SI 2 "register_operand" "")
+	   (and:SI (match_dup 0)
+		   (match_dup 2)))]
+  "dead_or_set_p (insn, operands[0])"
+  [(set:SI (match_dup 2)
+	   (and:SI (match_dup 2)
+		   (not:SI (ashift:SI (const_int 1)
+				      (match_dup 1)))))]
+)
+  
+;; (set (reg A) (const_int -2))
+;; (set (reg A) (rotate (reg A) (reg B)))
+;; (set (reg A) (and (reg A) (reg C)))
+;; (set (reg C) (reg A)
+(define_peephole2
+  [(set:SI (match_operand:SI 0 "register_operand" "")
+	   (const_int -2))
+   (set:SI (match_dup 0)
+	   (rotate:SI (match_dup 0)
+		      (match_operand:SI 1 "register_operand" "")))
+   (set:SI (match_dup 0)
+	   (and:SI (match_dup 0)
+		   (match_operand:SI 2 "register_operand" "")))
+   (set:SI (match_dup 2) (match_dup 0))]
+  "dead_or_set_p (insn, operands[0])"
+  [(set:SI (match_dup 2)
+	   (and:SI (match_dup 2)
+		   (not:SI (ashift:SI (const_int 1)
+				      (match_dup 1)))))]
+)
+
+(define_expand "insv"
+  [(set:SI (zero_extract:SI (match_operand:SI
+			     0 "nonimmediate_operand") ;; Destination
+		            (match_operand
+			     1 "immediate_operand")    ;; # of bits to set
+			    (match_operand
+			     2 "immediate_operand"))   ;; Starting bit
+	   (match_operand
+	    3 "immediate_operand"))]  ;; Bits to insert
+  ""
+  {
+    if (rx_expand_insv (operands))
+      DONE;
+    FAIL;
+  }
+)   
+
+;; Atomic exchange operation.
+
+(define_insn "sync_lock_test_and_setsi"
+  [(set:SI (match_operand:SI 0 "register_operand"   "=r,r")
+	   (match_operand:SI 1 "rx_compare_operand" "=r,Q"))
+   (set:SI (match_dup 1)
+	   (match_operand:SI 2 "register_operand"    "0,0"))]
+  ""
+  "xchg\t%1, %0"
+  [(set_attr "length" "3,6")
+   (set_attr "timings" "22")]
+)
+
+;; Block move functions.
+
+(define_expand "movstr"
+  [(set:SI (match_operand:BLK 1 "memory_operand")    ;; Dest
+	   (match_operand:BLK 2 "memory_operand"))   ;; Source
+   (use (match_operand:SI     0 "register_operand")) ;; Updated Dest
+  ]
+  ""
+  {
+    rtx addr1 = gen_rtx_REG (SImode, 1);
+    rtx addr2 = gen_rtx_REG (SImode, 2);
+    rtx len   = gen_rtx_REG (SImode, 3);
+    rtx dest_copy = gen_reg_rtx (SImode);
+
+    emit_move_insn (len, GEN_INT (-1));
+    emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+    operands[1] = replace_equiv_address_nv (operands[1], addr1);
+    operands[2] = replace_equiv_address_nv (operands[2], addr2);
+    emit_move_insn (dest_copy, addr1);
+    emit_insn (gen_rx_movstr ());
+    emit_move_insn (len, GEN_INT (-1));
+    emit_insn (gen_rx_strend (operands[0], dest_copy));
+    DONE;
+  }
+)
+
+(define_insn "rx_movstr"
+  [(set:SI (mem:BLK (reg:SI 1))
+	   (mem:BLK (reg:SI 2)))
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVSTR)
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+  ]
+  ""
+  "smovu"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_insn "rx_strend"
+  [(set:SI (match_operand:SI                      0 "register_operand" "=r")
+	   (unspec_volatile:SI [(match_operand:SI 1 "register_operand"  "r")
+				(reg:SI 3)] UNSPEC_STRLEN))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+  ]
+  ""
+  "mov\t%1, r1\n\tmov\t#0, r2\n\tsuntil.b\n\tmov\tr1, %0\n\tsub\t#1, %0"
+  [(set_attr "length" "10")
+   (set_attr "cc" "clobber")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "movmemsi"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand")    ;; Dest
+	  (match_operand:BLK 1 "memory_operand"))   ;; Source
+     (use (match_operand:SI  2 "register_operand")) ;; Length in bytes
+     (match_operand          3 "immediate_operand") ;; Align
+     (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)]
+    )]
+  ""
+  {
+    rtx addr1 = gen_rtx_REG (SImode, 1);
+    rtx addr2 = gen_rtx_REG (SImode, 2);
+    rtx len   = gen_rtx_REG (SImode, 3);
+
+    if (REG_P (operands[0]) && (REGNO (operands[0]) == 2
+				      || REGNO (operands[0]) == 3))
+      FAIL;
+    if (REG_P (operands[1]) && (REGNO (operands[1]) == 1
+				      || REGNO (operands[1]) == 3))
+      FAIL;
+    if (REG_P (operands[2]) && (REGNO (operands[2]) == 1
+				      || REGNO (operands[2]) == 2))
+      FAIL;
+    emit_move_insn (addr1, force_operand (XEXP (operands[0], 0), NULL_RTX));
+    emit_move_insn (addr2, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[2], NULL_RTX));
+    operands[0] = replace_equiv_address_nv (operands[0], addr1);
+    operands[1] = replace_equiv_address_nv (operands[1], addr2);
+    emit_insn (gen_rx_movmem ());
+    DONE;
+  }
+)
+
+(define_insn "rx_movmem"
+  [(set (mem:BLK (reg:SI 1))
+	(mem:BLK (reg:SI 2)))
+   (use (reg:SI 3))
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))]
+  ""
+  "smovf"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "setmemsi"
+  [(set (match_operand:BLK 0 "memory_operand")     ;; Dest
+        (match_operand:QI  2 "nonmemory_operand")) ;; Value
+   (use (match_operand:SI  1 "nonmemory_operand")) ;; Length
+   (match_operand          3 "immediate_operand")  ;; Align
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_SETMEM)]
+  ""
+  {
+    rtx addr = gen_rtx_REG (SImode, 1);
+    rtx val  = gen_rtx_REG (QImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+
+    emit_move_insn (addr, force_operand (XEXP (operands[0], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[1], NULL_RTX));
+    emit_move_insn (val, operands[2]);
+    emit_insn (gen_rx_setmem ());
+    DONE;
+  }
+)
+
+(define_insn "rx_setmem"
+  [(set:BLK (mem:BLK (reg:SI 1)) (reg 2))
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_SETMEM)
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 3))]
+  ""
+  "sstr.b"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI
+	 0 "register_operand") ;; Result
+	(unspec_volatile:SI [(match_operand:BLK
+			      1 "memory_operand") ;; String1
+			     (match_operand:BLK
+			      2 "memory_operand")] ;; String2
+			    UNSPEC_CMPSTRN))
+   (use (match_operand:SI
+	 3 "register_operand")) ;; Max Length
+   (match_operand:SI
+    4 "immediate_operand")] ;; Known Align
+  ""
+  {
+    rtx str1 = gen_rtx_REG (SImode, 1);
+    rtx str2 = gen_rtx_REG (SImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+  
+    emit_move_insn (str1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[3], NULL_RTX));
+
+    emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_expand "cmpstrsi"
+  [(set (match_operand:SI
+	 0 "register_operand") ;; Result
+	(unspec_volatile:SI [(match_operand:BLK
+			      1 "memory_operand")  ;; String1
+			     (match_operand:BLK
+			      2 "memory_operand")] ;; String2
+			    UNSPEC_CMPSTRN))
+   (match_operand:SI
+    3 "immediate_operand")] ;; Known Align
+  ""
+  {
+    rtx str1 = gen_rtx_REG (SImode, 1);
+    rtx str2 = gen_rtx_REG (SImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+  
+    emit_move_insn (str1, force_reg (SImode, XEXP (operands[1], 0)));
+    emit_move_insn (str2, force_reg (SImode, XEXP (operands[2], 0)));
+    emit_move_insn (len, GEN_INT (-1));
+
+    emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_insn "rx_cmpstrn"
+  [(set:SI (match_operand:SI 0 "register_operand" "=r")
+	   (unspec_volatile:SI [(reg:SI 1) (reg:SI 2) (reg:SI 3)]
+			       UNSPEC_CMPSTRN))
+   (use (match_operand:BLK   1 "memory_operand" "m"))
+   (use (match_operand:BLK   2 "memory_operand" "m"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))]
+  ""
+  "scmpu		; Perform the string comparison
+   mov     #-1, %0      ; Set up -1 result (which cannot be created
+                        ; by the SC insn)
+   bnc	   ?+		; If Carry is not set skip over
+   scne.L  %0		; Set result based on Z flag
+?:              	
+"
+  [(set_attr "length" "9")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+;;   Builtin Functions
+;;
+;; GCC does not have the ability to generate the following instructions
+;; on its own so they are provided as builtins instead.  To use them from
+;; a program for example invoke them as __builtin_rx_<insn_name>.  For
+;; example:
+;;
+;;    int short_byte_swap (int arg) { return __builtin_rx_revw (arg); }
+
+;;---------- Accumulator Support ------------------------
+
+;; Multiply & Accumulate (high)
+(define_insn "machi"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MACHI)]
+  ""
+  "machi\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply & Accumulate (low)
+(define_insn "maclo"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MACLO)]
+  ""
+  "maclo\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply (high)
+(define_insn "mulhi"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MULHI)]
+  ""
+  "mulhi\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply (low)
+(define_insn "mullo"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MULLO)]
+  ""
+  "mullo\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Move from Accumulator (high)
+(define_insn "mvfachi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)]
+		   UNSPEC_BUILTIN_MVFACHI))]
+  ""
+  "mvfachi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move from Accumulator (middle)
+(define_insn "mvfacmi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)]
+		   UNSPEC_BUILTIN_MVFACMI))]
+  ""
+  "mvfacmi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to Accumulator (high)
+(define_insn "mvtachi"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACHI)]
+  ""
+  "mvtachi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to Accumulator (low)
+(define_insn "mvtaclo"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACLO)]
+  ""
+  "mvtaclo\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Round Accumulator
+(define_insn "racw"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+		       UNSPEC_BUILTIN_RACW)]
+  ""
+  "racw\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Repeat multiply and accumulate
+(define_insn "rmpa"
+  [(unspec:SI [(const_int 0) (reg:SI 1) (reg:SI 2) (reg:SI 3)
+	       (reg:SI 4) (reg:SI 5) (reg:SI 6)]
+	      UNSPEC_BUILTIN_RMPA)
+  (clobber (reg:SI 1))
+  (clobber (reg:SI 2))
+  (clobber (reg:SI 3))]
+  ""
+  "rmpa"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1010")]
+)
+
+;;---------- Arithmetic ------------------------
+
+;; Byte swap (two 16-bit values).
+(define_insn "revw"
+  [(set (match_operand:SI             0 "register_operand" "+r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"  "r")]
+		   UNSPEC_BUILTIN_REVW))]
+  ""
+  "revw\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Round to integer.
+(define_insn "lrintsf2"
+  [(set (match_operand:SI             0 "register_operand"  "=r,r")
+	(unspec:SI [(match_operand:SF 1 "rx_compare_operand" "r,Q")]
+		   UNSPEC_BUILTIN_ROUND))]
+  ""
+  "round\t%1, %0"
+  [(set_attr "cc" "set_zs")
+   (set_attr "timings" "22,44")   
+   (set_attr "length" "3,5")]
+)
+
+;; Saturate to 32-bits
+(define_insn "sat"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"  "0")]
+		   UNSPEC_BUILTIN_SAT))]
+  ""
+  "sat\t%0"
+  [(set_attr "length" "2")]
+)
+
+;;---------- Control Registers ------------------------
+
+;; Clear Processor Status Word
+(define_insn "clrpsw"
+  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")]
+	      UNSPEC_BUILTIN_CLRPSW)
+   (clobber (cc0))]
+  ""
+  "clrpsw\t%F0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")]
+)
+
+;; Set Processor Status Word
+(define_insn "setpsw"
+  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")]
+	      UNSPEC_BUILTIN_SETPSW)
+   (clobber (cc0))]
+  ""
+  "setpsw\t%F0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")]
+)
+
+;; Move from control register
+(define_insn "mvfc"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "immediate_operand" "i")]
+		   UNSPEC_BUILTIN_MVFC))]
+  ""
+  "mvfc\t%C1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to control register
+(define_insn "mvtc"
+  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i")
+	       (match_operand:SI 1 "nonmemory_operand" "r,i")]
+	      UNSPEC_BUILTIN_MVTC)]
+  ""
+  "mvtc\t%1, %C0"
+  [(set_attr "length" "3,7")]
+  ;; Ignore possible clobbering of the comparison flags in the
+  ;; PSW register.  This is a cc0 target so any cc0 setting
+  ;; instruction will always be paired with a cc0 user, without
+  ;; the possibility of this instruction being placed in between
+  ;; them.
+)
+
+;; Move to interrupt priority level
+(define_insn "mvtipl"
+  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "Uint04")]
+	      UNSPEC_BUILTIN_MVTIPL)]
+  ""
+  "mvtipl\t%0"
+  [(set_attr "length" "3")]
+)
+
+;;---------- Interrupts ------------------------
+
+;; Break
+(define_insn "brk"
+  [(unspec_volatile [(const_int 0)]
+		    UNSPEC_BUILTIN_BRK)]
+  ""
+  "brk"
+  [(set_attr "length" "1")
+   (set_attr "timings" "66")]
+)
+
+;; Interrupt
+(define_insn "int"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+		       UNSPEC_BUILTIN_INT)]
+  ""
+  "int\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Wait
+(define_insn "wait"
+  [(unspec_volatile [(const_int 0)]
+		    UNSPEC_BUILTIN_WAIT)]
+  ""
+  "wait"
+  [(set_attr "length" "2")]
+)
+
+;;---------- CoProcessor Support ------------------------
+
+;; FIXME: The instructions are currently commented out because
+;; the bit patterns have not been finalized, so the assembler
+;; does not support them.  Once they are decided and the assembler
+;; supports them, enable the instructions here.
+
+;; Move from co-processor register
+(define_insn "mvfcp"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "immediate_operand" "i")
+		    (match_operand:SI 2 "immediate_operand" "i")]
+		   UNSPEC_BUILTIN_MVFCP))]
+  ""
+  "; mvfcp\t%1, %0, %2"
+  [(set_attr "length" "5")]
+)
+
+;;---------- Misc ------------------------
+
+;; Required by cfglayout.c...
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "1")]
+)
diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt
new file mode 100644
index 00000000000..768d565b478
--- /dev/null
+++ b/gcc/config/rx/rx.opt
@@ -0,0 +1,98 @@
+; Command line options for the Renesas RX port of GCC.
+; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+; Contributed by Red Hat.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+;---------------------------------------------------
+
+m32bit-doubles
+Target RejectNegative Mask(32BIT_DOUBLES)
+Stores doubles in 32 bits.
+
+m64bit-doubles
+Target RejectNegative InverseMask(32BIT_DOUBLES)
+Store doubles in 64 bits.  This is the default.
+
+fpu
+Target RejectNegative Mask(32BIT_DOUBLES) MaskExists
+Enable the use of RX FPU instructions.
+
+nofpu
+Target RejectNegative InverseMask(32BIT_DOUBLES) MaskExists
+Disable the use of RX FPU instructions.
+
+;---------------------------------------------------
+
+mcpu=
+Target RejectNegative Joined Var(rx_cpu_name)
+Specify the target RX cpu type.
+
+patch=
+Target RejectNegative Joined Var(rx_cpu_name)
+Alias for -mcpu.
+
+;---------------------------------------------------
+
+mbig-endian-data
+Target RejectNegative Mask(BIG_ENDIAN_DATA)
+Data is stored in big-endian format.
+
+mlittle-endian-data
+Target RejectNegative InverseMask(BIG_ENDIAN_DATA)
+Data is stored in little-endian format.  (Default).
+
+;---------------------------------------------------
+
+msmall-data-limit=
+Target RejectNegative Joined UInteger Var(rx_small_data_limit) Init(0)
+Maximum size of global and static variables which can be placed into the small data area.
+
+;---------------------------------------------------
+
+msim
+Target
+Use the simulator runtime.
+
+;---------------------------------------------------
+
+mas100-syntax
+Target Mask(AS100_SYNTAX)
+Generate assembler output that is compatible with the Renesas AS100 assembler.  This may restrict some of the compiler's capabilities.  The default is to generate GAS compatable syntax.
+
+;---------------------------------------------------
+
+mrelax
+Target
+Enable linker relaxation.
+
+;---------------------------------------------------
+
+mmax-constant-size=
+Target RejectNegative Joined UInteger Var(rx_max_constant_size) Init(0)
+Maximum size in bytes of constant values allowed as operands.
+
+;---------------------------------------------------
+
+mint-register=
+Target RejectNegative Joined UInteger Var(rx_interrupt_registers) Init(0)
+Specifies the number of registers to reserve for interrupt handlers.
+
+;---------------------------------------------------
+
+msave-acc-in-interrupts
+Target Mask(SAVE_ACC_REGISTER)
+Specifies whether interrupt functions should save and restore the accumulator register.
diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx
new file mode 100644
index 00000000000..eb1ca48d3a3
--- /dev/null
+++ b/gcc/config/rx/t-rx
@@ -0,0 +1,32 @@
+# Makefile fragment for building GCC for the Renesas RX target.
+# Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+# Contributed by Red Hat.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See
+# the GNU General Public License for more details.
+#
+# You should have received a copy of the  GNU General Public
+# License along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Enable multilibs:
+
+MULTILIB_OPTIONS    = m32bit-doubles mbig-endian-data
+MULTILIB_DIRNAMES   = 32fp           big-endian-data
+MULTILIB_MATCHES    = m32bit-doubles=fpu
+MULTILIB_EXCEPTIONS =
+MULTILIB_EXTRA_OPTS = 
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
diff --git a/gcc/config/s390/2097.md b/gcc/config/s390/2097.md
index eb7240effd4..56893596a74 100644
--- a/gcc/config/s390/2097.md
+++ b/gcc/config/s390/2097.md
@@ -57,7 +57,8 @@
                   z10_int_fr_A3"
                   "z10_other_super, z10_other_super_c_E1, z10_other_super_E1, \
                   z10_int_super, z10_int_super_E1, \
-                  z10_lr, z10_store_super")
+                  z10_lr, z10_store_super"
+                  " ! s390_agen_dep_p")
 
 
 ; Forwarding from z10_super to frz10_ and z10_rec.
@@ -68,7 +69,8 @@
                   z10_store_super"
                   "z10_int_fr, z10_int_fr_E1, z10_int_fr_A3, \
                   z10_other_fr, z10_other_fr_A3, z10_lr_fr, z10_lr_fr_E1, \
-                  z10_other_fr_E1, z10_store_rec")
+                  z10_other_fr_E1, z10_store_rec"
+                  " ! s390_agen_dep_p")
 
 
 ; Forwarding from z10_fwd and z10_fr to z10_rec and z10_fr.
@@ -84,7 +86,8 @@
                   z10_int_fr_A3"
                   "z10_int_fr, z10_int_fr_E1, z10_int_fr_A3, \
                   z10_other_fr, z10_other_fr_A3, z10_lr_fr, z10_lr_fr_E1, \
-                  z10_other_fr_E1, z10_store_rec")
+                  z10_other_fr_E1, z10_store_rec"
+                  " ! s390_agen_dep_p")
 
 
 ;
@@ -205,15 +208,12 @@
        (and (eq_attr "type" "lr")
             (eq_attr "z10prop" "z10_fr")))
   "z10_e1_ANY, z10_Gate_ANY")
-;  "z10_e1_ANY")
 
 (define_insn_reservation "z10_lr_fr_E1" 6
   (and (eq_attr "cpu" "z10")
        (and (eq_attr "type" "lr")
             (eq_attr "z10prop" "z10_fr_E1")))
   "z10_e1_ANY, z10_Gate_ANY")
-;  "z10_e1_ANY")
-
 
 (define_insn_reservation "z10_la" 6
   (and (eq_attr "cpu" "z10")
@@ -227,14 +227,12 @@
        (and (eq_attr "type" "la")
             (eq_attr "z10prop" "z10_fwd")))
   "z10_e1_ANY, z10_Gate_ANY")
-;  "z10_e1_ANY")
 
 (define_insn_reservation "z10_la_fwd_A1" 6
   (and (eq_attr "cpu" "z10")
        (and (eq_attr "type" "la")
             (eq_attr "z10prop" "z10_fwd_A1")))
   "z10_e1_ANY, z10_Gate_ANY")
-;  "z10_e1_ANY")
 
 
 ; larl-type instructions
@@ -666,13 +664,14 @@
 ; Address-related bypasses
 ;
 
-; Here is the cycle diagram for Address-related bypasses:
+; Here is the cycle diagram for address-related bypasses:
 ; ... G1 G2 G3 A0 A1 A2 A3 E1 P1 P2 P3 R0 ...
-;         ^  ^    ^     ^  ^
-;         |  |    |     |  E1-type bypasses provide the new addr AFTER this cycle
-;         |  |    |     A3-type bypasses provide the new addr AFTER this cycle
-;         |  |    A1-type bypasses provide the new addr AFTER this cycle
-;         |  AGI resolution, actual USE of address is DURING this cycle
+;         ^  ^    ^     ^  ^        ^
+;         |  |    |     |  |        without bypass, its available AFTER this cycle
+;         |  |    |     |  E1-type bypasses provide the new value AFTER this cycle
+;         |  |    |     A3-type bypasses provide the new value AFTER this cycle
+;         |  |    A1-type bypasses provide the new value AFTER this cycle
+;         |  AGI resolution, actual USE of new value is DURING this cycle
 ;         AGI detection
 
 (define_bypass 3 "z10_larl_A1, z10_la_fwd_A1, z10_other_fwd_A1, \
@@ -682,7 +681,6 @@
                   z10_cs, z10_stm, z10_other"
 	         "s390_agen_dep_p")
 
-
 (define_bypass 5 "z10_larl_fwd_A3, z10_load_fwd_A3, z10_other_fwd_A3, \
                   z10_other_fr_A3, z10_int_fwd_A3, z10_int_fr_A3"
                  "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
@@ -699,6 +697,12 @@
                   z10_cs, z10_stm, z10_other"
  	         "s390_agen_dep_p")
 
+(define_bypass 9 "z10_int_super, z10_int_fwd, z10_int_fr"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+ 	         "s390_agen_dep_p")
+
 
 
 ;
diff --git a/gcc/config/s390/fixdfdi.h b/gcc/config/s390/fixdfdi.h
deleted file mode 100644
index ddddf3a7c9c..00000000000
--- a/gcc/config/s390/fixdfdi.h
+++ /dev/null
@@ -1,462 +0,0 @@
-/* Definitions of target machine for GNU compiler, for IBM S/390
-   Copyright (C) 1999, 2000, 2001, 2007, 2008 Free Software Foundation, Inc.
-   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
-                  Ulrich Weigand (uweigand@de.ibm.com).
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
-<http://www.gnu.org/licenses/>.  */
-
-#ifdef L_fixunstfdi
-
-#define EXPD(fp)	   (((fp.l.i[0]) >> 16) & 0x7FFF)
-#define EXPONENT_BIAS	   16383
-#define MANTISSA_BITS      112
-#define PRECISION          (MANTISSA_BITS + 1)
-#define SIGNBIT		   0x80000000
-#define SIGND(fp)	   ((fp.l.i[0]) & SIGNBIT)
-#define MANTD_HIGH_LL(fp)  ((fp.ll[0] & HIGH_LL_FRAC_MASK) | HIGH_LL_UNIT_BIT)
-#define MANTD_LOW_LL(fp)   (fp.ll[1])
-#define FRACD_ZERO_P(fp)   (!fp.ll[1] && !(fp.ll[0] & HIGH_LL_FRAC_MASK))
-#define HIGH_LL_FRAC_BITS  48
-#define HIGH_LL_UNIT_BIT   ((UDItype_x)1 << HIGH_LL_FRAC_BITS)
-#define HIGH_LL_FRAC_MASK  (HIGH_LL_UNIT_BIT - 1)
-
-typedef int DItype_x __attribute__ ((mode (DI)));
-typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
-typedef int SItype_x __attribute__ ((mode (SI)));
-typedef unsigned int USItype_x __attribute__ ((mode (SI)));
-
-union double_long {
-  long double d;
-  struct {
-      SItype_x i[4]; /* 32 bit parts: 0 upper ... 3 lowest */
-    } l;
-  UDItype_x ll[2];   /* 64 bit parts: 0 upper, 1 lower */
-};
-
-UDItype_x __fixunstfdi (long double a1);
-
-/* convert double to unsigned int */
-UDItype_x
-__fixunstfdi (long double a1)
-{
-    register union double_long dl1;
-    register int exp;
-    register UDItype_x l;
-
-    dl1.d = a1;
-
-    /* +/- 0, denormalized, negative */
-    if (!EXPD (dl1) || SIGND(dl1))
-      return 0;
-
-    /* The exponent - considered the binary point at the right end of
-       the mantissa.  */
-    exp = EXPD (dl1) - EXPONENT_BIAS - MANTISSA_BITS;
-
-    /* number < 1: If the mantissa would need to be right-shifted more bits than
-       its size (plus the implied one bit on the left) the result would be
-       zero.  */
-    if (exp <= -PRECISION)
-      return 0;
-
-    /* NaN: All exponent bits set and a nonzero fraction.  */
-    if ((EXPD(dl1) == 0x7fff) && !FRACD_ZERO_P (dl1))
-      return 0x0ULL;
-
-    /* One extra bit is needed for the unit bit which is appended by
-       MANTD_HIGH_LL on the left of the matissa.  */
-    exp += HIGH_LL_FRAC_BITS + 1;
-
-    /* If the result would still need a left shift it will be too large
-       to be represented.  */
-    if (exp > 0)
-      return 0xFFFFFFFFFFFFFFFFULL;
-
-    l = MANTD_LOW_LL (dl1) >> (HIGH_LL_FRAC_BITS + 1)
-        | MANTD_HIGH_LL (dl1) << (64 - (HIGH_LL_FRAC_BITS + 1));
-
-    return l >> -exp;
-}
-#define __fixunstfdi ___fixunstfdi
-#endif
-#undef L_fixunstfdi
-
-#ifdef L_fixtfdi
-#define EXPD(fp)	   (((fp.l.i[0]) >> 16) & 0x7FFF)
-#define EXPONENT_BIAS	   16383
-#define MANTISSA_BITS      112
-#define PRECISION          (MANTISSA_BITS + 1)
-#define SIGNBIT		   0x80000000
-#define SIGND(fp)	   ((fp.l.i[0]) & SIGNBIT)
-#define MANTD_HIGH_LL(fp)  ((fp.ll[0] & HIGH_LL_FRAC_MASK) | HIGH_LL_UNIT_BIT)
-#define MANTD_LOW_LL(fp)   (fp.ll[1])
-#define FRACD_ZERO_P(fp)   (!fp.ll[1] && !(fp.ll[0] & HIGH_LL_FRAC_MASK))
-#define HIGH_LL_FRAC_BITS  48
-#define HIGH_LL_UNIT_BIT   ((UDItype_x)1 << HIGH_LL_FRAC_BITS)
-#define HIGH_LL_FRAC_MASK  (HIGH_LL_UNIT_BIT - 1)
-
-typedef int DItype_x __attribute__ ((mode (DI)));
-typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
-typedef int SItype_x __attribute__ ((mode (SI)));
-typedef unsigned int USItype_x __attribute__ ((mode (SI)));
-
-union double_long {
-  long double d;
-  struct {
-      SItype_x i[4]; /* 32 bit parts: 0 upper ... 3 lowest */
-    } l;
-  UDItype_x ll[2];   /* 64 bit parts: 0 upper, 1 lower */
-};
-
-DItype_x __fixtfdi (long double a1);
-
-/* convert double to unsigned int */
-DItype_x
-__fixtfdi (long double a1)
-{
-    register union double_long dl1;
-    register int exp;
-    register UDItype_x l;
-
-    dl1.d = a1;
-
-    /* +/- 0, denormalized */
-    if (!EXPD (dl1))
-      return 0;
-
-    /* The exponent - considered the binary point at the right end of
-       the mantissa.  */
-    exp = EXPD (dl1) - EXPONENT_BIAS - MANTISSA_BITS;
-
-    /* number < 1: If the mantissa would need to be right-shifted more bits than
-       its size the result would be zero.  */
-    if (exp <= -PRECISION)
-      return 0;
-
-    /* NaN: All exponent bits set and a nonzero fraction.  */
-    if ((EXPD(dl1) == 0x7fff) && !FRACD_ZERO_P (dl1))
-      return 0x8000000000000000ULL;
-
-    /* One extra bit is needed for the unit bit which is appended by
-       MANTD_HIGH_LL on the left of the matissa.  */
-    exp += HIGH_LL_FRAC_BITS + 1;
-
-    /* If the result would still need a left shift it will be too large
-       to be represented.  Compared to the unsigned variant we have to
-       take care that there is still space for the sign bit to be
-       applied.  So we can only go on if there is a right-shift by one
-       or more.  */
-    if (exp >= 0)
-      {
-	l = 1ULL << 63; /* long long min */
-	return SIGND (dl1) ? l : l - 1;
-      }
-
-    l = MANTD_LOW_LL (dl1) >> (HIGH_LL_FRAC_BITS + 1)
-        | MANTD_HIGH_LL (dl1) << (64 - (HIGH_LL_FRAC_BITS + 1));
-
-    return SIGND (dl1) ? -(l >> -exp) : l >> -exp;
-}
-#define __fixtfdi ___fixtfdi
-#endif
-#undef L_fixtfdi
-
-#ifdef L_fixunsdfdi
-#define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
-#define EXCESSD		1022
-#define SIGNBIT		0x80000000
-#define SIGND(fp)	((fp.l.upper) & SIGNBIT)
-#define MANTD_LL(fp)	((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL)
-#define FRACD_LL(fp)	(fp.ll & (HIDDEND_LL-1))
-#define HIDDEND_LL	((UDItype_x)1 << 52)
-
-typedef int DItype_x __attribute__ ((mode (DI)));
-typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
-typedef int SItype_x __attribute__ ((mode (SI)));
-typedef unsigned int USItype_x __attribute__ ((mode (SI)));
-
-union double_long {
-    double d;
-    struct {
-      SItype_x upper;
-      USItype_x lower;
-    } l;
-    UDItype_x ll;
-};
-
-UDItype_x __fixunsdfdi (double a1);
-
-/* convert double to unsigned int */
-UDItype_x
-__fixunsdfdi (double a1)
-{
-    register union double_long dl1;
-    register int exp;
-    register UDItype_x l;
-
-    dl1.d = a1;
-
-    /* +/- 0, denormalized, negative */
-
-    if (!EXPD (dl1) || SIGND(dl1))
-      return 0;
-
-    exp = EXPD (dl1) - EXCESSD - 53;
-
-    /* number < 1 */
-
-    if (exp < -53)
-      return 0;
-
-    /* NaN */
-
-    if ((EXPD(dl1) == 0x7ff) && (FRACD_LL(dl1) != 0)) /* NaN */
-      return 0x0ULL;
-
-    /* Number big number & + inf */
-
-    if (exp >= 12) {
-      return 0xFFFFFFFFFFFFFFFFULL;
-    }
-
-    l = MANTD_LL(dl1);
-
-    /* shift down until exp < 12 or l = 0 */
-    if (exp > 0)
-      l <<= exp;
-    else
-      l >>= -exp;
-
-    return l;
-}
-#define __fixunsdfdi ___fixunsdfdi
-#endif
-#undef L_fixunsdfdi
-
-#ifdef L_fixdfdi
-#define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
-#define EXCESSD		1022
-#define SIGNBIT		0x80000000
-#define SIGND(fp)	((fp.l.upper) & SIGNBIT)
-#define MANTD_LL(fp)	((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL)
-#define FRACD_LL(fp)	(fp.ll & (HIDDEND_LL-1))
-#define HIDDEND_LL	((UDItype_x)1 << 52)
-
-typedef int DItype_x __attribute__ ((mode (DI)));
-typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
-typedef int SItype_x __attribute__ ((mode (SI)));
-typedef unsigned int USItype_x __attribute__ ((mode (SI)));
-
-union double_long {
-    double d;
-    struct {
-      SItype_x upper;
-      USItype_x lower;
-    } l;
-    UDItype_x ll;
-};
-
-DItype_x __fixdfdi (double a1);
-
-/* convert double to int */
-DItype_x
-__fixdfdi (double a1)
-{
-    register union double_long dl1;
-    register int exp;
-    register DItype_x l;
-
-    dl1.d = a1;
-
-    /* +/- 0, denormalized */
-
-    if (!EXPD (dl1))
-      return 0;
-
-    exp = EXPD (dl1) - EXCESSD - 53;
-
-    /* number < 1 */
-
-    if (exp < -53)
-      return 0;
-
-    /* NaN */
-
-    if ((EXPD(dl1) == 0x7ff) && (FRACD_LL(dl1) != 0)) /* NaN */
-      return 0x8000000000000000ULL;
-
-    /* Number big number & +/- inf */
-
-    if (exp >= 11) {
-	l = (long long)1<<63;
-	if (!SIGND(dl1))
-	    l--;
-	return l;
-    }
-
-    l = MANTD_LL(dl1);
-
-    /* shift down until exp < 12 or l = 0 */
-    if (exp > 0)
-      l <<= exp;
-    else
-      l >>= -exp;
-
-    return (SIGND (dl1) ? -l : l);
-}
-#define __fixdfdi ___fixdfdi
-#endif
-#undef L_fixdfdi
-
-#ifdef L_fixunssfdi
-#define EXP(fp)         (((fp.l) >> 23) & 0xFF)
-#define EXCESS          126
-#define SIGNBIT         0x80000000
-#define SIGN(fp)        ((fp.l) & SIGNBIT)
-#define HIDDEN          (1 << 23)
-#define MANT(fp)        (((fp.l) & 0x7FFFFF) | HIDDEN)
-#define FRAC(fp)        ((fp.l) & 0x7FFFFF)
-
-typedef int DItype_x __attribute__ ((mode (DI)));
-typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
-typedef int SItype_x __attribute__ ((mode (SI)));
-typedef unsigned int USItype_x __attribute__ ((mode (SI)));
-
-union float_long
-  {
-    float f;
-    USItype_x l;
-  };
-
-UDItype_x __fixunssfdi (float a1);
-
-/* convert float to unsigned int */
-UDItype_x
-__fixunssfdi (float a1)
-{
-    register union float_long fl1;
-    register int exp;
-    register UDItype_x l;
-
-    fl1.f = a1;
-
-    /* +/- 0, denormalized, negative */
-
-    if (!EXP (fl1) || SIGN(fl1))
-      return 0;
-
-    exp = EXP (fl1) - EXCESS - 24;
-
-    /* number < 1 */
-
-    if (exp < -24)
-      return 0;
-
-    /* NaN */
-
-    if ((EXP(fl1) == 0xff) && (FRAC(fl1) != 0)) /* NaN */
-      return 0x0ULL;
-
-    /* Number big number & + inf */
-
-    if (exp >= 41) {
-      return 0xFFFFFFFFFFFFFFFFULL;
-    }
-
-    l = MANT(fl1);
-
-    if (exp > 0)
-      l <<= exp;
-    else
-      l >>= -exp;
-
-    return l;
-}
-#define __fixunssfdi ___fixunssfdi
-#endif
-#undef L_fixunssfdi
-
-#ifdef L_fixsfdi
-#define EXP(fp)         (((fp.l) >> 23) & 0xFF)
-#define EXCESS          126
-#define SIGNBIT         0x80000000
-#define SIGN(fp)        ((fp.l) & SIGNBIT)
-#define HIDDEN          (1 << 23)
-#define MANT(fp)        (((fp.l) & 0x7FFFFF) | HIDDEN)
-#define FRAC(fp)        ((fp.l) & 0x7FFFFF)
-
-typedef int DItype_x __attribute__ ((mode (DI)));
-typedef unsigned int UDItype_x __attribute__ ((mode (DI)));
-typedef int SItype_x __attribute__ ((mode (SI)));
-typedef unsigned int USItype_x __attribute__ ((mode (SI)));
-
-union float_long
-  {
-    float f;
-    USItype_x l;
-  };
-
-DItype_x __fixsfdi (float a1);
-
-/* convert double to int */
-DItype_x
-__fixsfdi (float a1)
-{
-    register union float_long fl1;
-    register int exp;
-    register DItype_x l;
-
-    fl1.f = a1;
-
-    /* +/- 0, denormalized */
-
-    if (!EXP (fl1))
-      return 0;
-
-    exp = EXP (fl1) - EXCESS - 24;
-
-    /* number < 1 */
-
-    if (exp < -24)
-      return 0;
-
-    /* NaN */
-
-    if ((EXP(fl1) == 0xff) && (FRAC(fl1) != 0)) /* NaN */
-      return 0x8000000000000000ULL;
-
-    /* Number big number & +/- inf */
-
-    if (exp >= 40) {
-	l = (long long)1<<63;
-	if (!SIGN(fl1))
-	    l--;
-	return l;
-    }
-
-    l = MANT(fl1);
-
-    if (exp > 0)
-      l <<= exp;
-    else
-      l >>= -exp;
-
-    return (SIGN (fl1) ? -l : l);
-}
-#define __fixsfdi ___fixsfdi
-#endif
-#undef L_fixsfdi
diff --git a/gcc/config/s390/libgcc-glibc.ver b/gcc/config/s390/libgcc-glibc.ver
deleted file mode 100644
index 6fc52e40d78..00000000000
--- a/gcc/config/s390/libgcc-glibc.ver
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (C) 2002, 2006, 2008 Free Software Foundation, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GCC is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-# In order to work around the very problems that force us to now generally
-# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
-# By now choosing the same version tags for these specific routines, we
-# maintain enough binary compatibility to allow future versions of glibc
-# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
-
-# Note that we cannot use the default libgcc-glibc.ver file on s390x,
-# because GLIBC_2.0 does not exist on this architecture, as the first
-# ever glibc release on the platform was GLIBC_2.2.
-
-%ifndef __s390x__
-%exclude {
-  __divdi3
-  __moddi3
-  __udivdi3
-  __umoddi3
-  __register_frame
-  __register_frame_table
-  __deregister_frame
-  __register_frame_info
-  __deregister_frame_info
-  __frame_state_for
-  __register_frame_info_table
-}
-
-%inherit GCC_3.0 GLIBC_2.0
-GLIBC_2.0 {
-  __divdi3
-  __moddi3
-  __udivdi3
-  __umoddi3
-
-  __register_frame
-  __register_frame_table
-  __deregister_frame
-  __register_frame_info
-  __deregister_frame_info
-  __frame_state_for
-  __register_frame_info_table
-}
-%endif
-
-%ifdef __s390x__
-%exclude {
-  __register_frame
-  __register_frame_table
-  __deregister_frame
-  __register_frame_info
-  __deregister_frame_info
-  __frame_state_for
-  __register_frame_info_table
-}
-
-%inherit GCC_3.0 GLIBC_2.2
-GLIBC_2.2 {
-  __register_frame
-  __register_frame_table
-  __deregister_frame
-  __register_frame_info
-  __deregister_frame_info
-  __frame_state_for
-  __register_frame_info_table
-}
-%endif
-
-# With GCC 4.1.0 long double 128 bit support was introduced. The
-# following symbols coming from libgcc are enabled when -mlong-double-128
-# is specified. These lines make the symbols to get a @@GCC_4.1.0 attached.
-
-%exclude {
-  __divtc3
-  __multc3
-  __powitf2
-  __fixtfti
-  __fixunstfti
-  __floattitf
-
-  __fixtfdi
-  __fixunstfdi
-  __floatditf
-}
-
-GCC_4.1.0 {
-  __divtc3
-  __multc3
-  __powitf2
-
-%ifdef __s390x__
-  __fixtfti
-  __fixunstfti
-  __floattitf
-
-%else
-  __fixtfdi
-  __fixunstfdi
-  __floatditf
-%endif
-}
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index e439b01709f..a4334819203 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -9003,6 +9003,7 @@ s390_encode_section_info (tree decl, rtx rtl, int first)
       && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
       && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
       && (MEM_ALIGN (rtl) == 0
+	  || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
 	  || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
 }
@@ -9863,9 +9864,12 @@ s390_z10_optimize_cmp (rtx insn)
   if (!REG_P (*op0) || !REG_P (*op1))
     return false;
 
+  if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
+    return false;
+
   /* Swap the COMPARE arguments and its mask if there is a
      conflicting access in the previous insn.  */
-  prev_insn = PREV_INSN (insn);
+  prev_insn = prev_active_insn (insn);
   if (prev_insn != NULL_RTX && INSN_P (prev_insn)
       && reg_referenced_p (*op1, PATTERN (prev_insn)))
     s390_swap_cmp (cond, op0, op1, insn);
@@ -9876,7 +9880,7 @@ s390_z10_optimize_cmp (rtx insn)
      the operands, or if swapping them would cause a conflict
      with the previous insn, issue a NOP after the COMPARE in
      order to separate the two instuctions.  */
-  next_insn = NEXT_INSN (insn);
+  next_insn = next_active_insn (insn);
   if (next_insn != NULL_RTX && INSN_P (next_insn)
       && s390_non_addr_reg_read_p (*op1, next_insn))
     {
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index ffb96cd0f34..2da8b8753e2 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -24,12 +24,6 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef _S390_H
 #define _S390_H
 
-/* Override the __fixdfdi etc. routines when building libgcc2.
-   ??? This should be done in a cleaner way ...  */
-#if defined (IN_LIBGCC2) && !defined (__s390x__)
-#include <config/s390/fixdfdi.h>
-#endif
-
 /* Which processor to generate code or schedule for. The cpu attribute
    defines a list that mirrors this list, so changes to s390.md must be
    made at the same time.  */
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index db326ee766c..8f4a71feb3f 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -220,7 +220,7 @@
 ;;   reg: Instruction does not use the agen unit
 
 (define_attr "atype" "agen,reg"
-  (if_then_else (eq_attr "op_type" "E,RR,RI,RRE")
+  (if_then_else (eq_attr "op_type" "E,RR,RI,RRE,RSI,RIL,RIE,RRF,RRR")
 		(const_string "reg")
 		(const_string "agen")))
 
@@ -8941,18 +8941,16 @@
 ;
 
 (define_insn "prefetch"
-  [(prefetch (match_operand 0    "address_operand"   "ZQZS,ZRZT,X")
-	     (match_operand:SI 1 "const_int_operand" "   n,   n,n")
-	     (match_operand:SI 2 "const_int_operand" "   n,   n,n"))]
-  "TARGET_ZARCH && s390_tune == PROCESSOR_2097_Z10"
+  [(prefetch (match_operand 0    "address_operand"   "ZQZRZSZT,X")
+	     (match_operand:SI 1 "const_int_operand" "       n,n")
+	     (match_operand:SI 2 "const_int_operand" "       n,n"))]
+  "TARGET_Z10"
 {
   switch (which_alternative)
     {
       case 0:
-        return INTVAL (operands[1]) == 1 ? "stcmh\t2,0,%a0" : "stcmh\t1,0,%a0";
-      case 1:
         return INTVAL (operands[1]) == 1 ? "pfd\t2,%a0" : "pfd\t1,%a0";
-      case 2:
+      case 1:
         if (larl_operand (operands[0], Pmode))
 	  return INTVAL (operands[1]) == 1 ? "pfdrl\t2,%a0" : "pfdrl\t1,%a0";
       default:
@@ -8963,10 +8961,9 @@
         return "";
      }
 }
-  [(set_attr "type" "store,load,larl")
-   (set_attr "op_type" "RSY,RXY,RIL")
-   (set_attr "z10prop" "z10_super")
-   (set_attr "cpu_facility" "*,z10,z10")])
+  [(set_attr "type" "load,larl")
+   (set_attr "op_type" "RXY,RIL")
+   (set_attr "z10prop" "z10_super")])
 
 
 ;
diff --git a/gcc/config/s390/t-crtstuff b/gcc/config/s390/t-crtstuff
deleted file mode 100644
index 39b0eba6b97..00000000000
--- a/gcc/config/s390/t-crtstuff
+++ /dev/null
@@ -1,5 +0,0 @@
-# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,
-# because then __FRAME_END__ might not be the last thing in .eh_frame
-# section.
-CRTSTUFF_T_CFLAGS = -fno-asynchronous-unwind-tables
-TARGET_LIBGCC2_CFLAGS += -mlong-double-128
diff --git a/gcc/config/s390/t-linux b/gcc/config/s390/t-linux
deleted file mode 100644
index d5a92781450..00000000000
--- a/gcc/config/s390/t-linux
+++ /dev/null
@@ -1,3 +0,0 @@
-# Override t-slibgcc-elf-ver to export some libgcc symbols with
-# the symbol versions that glibc used.
-SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver $(srcdir)/config/s390/libgcc-glibc.ver
diff --git a/gcc/config/s390/t-linux64 b/gcc/config/s390/t-linux64
index 0ffb6902c18..36aced09c2c 100644
--- a/gcc/config/s390/t-linux64
+++ b/gcc/config/s390/t-linux64
@@ -1,8 +1,3 @@
 MULTILIB_OPTIONS = m64/m31
 MULTILIB_DIRNAMES = 64 32
 MULTILIB_OSDIRNAMES = ../lib64 ../lib
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
-
-EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
diff --git a/gcc/config/s390/t-tpf b/gcc/config/s390/t-tpf
deleted file mode 100644
index 6e4c377697c..00000000000
--- a/gcc/config/s390/t-tpf
+++ /dev/null
@@ -1,9 +0,0 @@
-# Compile crtbeginS.o and crtendS.o with pic.
-CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
-# Compile libgcc2.a with pic.
-TARGET_LIBGCC2_CFLAGS = -fPIC
-
-# Use unwind-dw2-fde-glibc.
-LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-glibc.c \
-  $(srcdir)/unwind-sjlj.c $(srcdir)/gthr-gnat.c $(srcdir)/unwind-c.c
-LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h
diff --git a/gcc/config/s390/tpf.h b/gcc/config/s390/tpf.h
index 5ffbd07f309..455c8ad92bc 100644
--- a/gcc/config/s390/tpf.h
+++ b/gcc/config/s390/tpf.h
@@ -55,7 +55,7 @@ along with GCC; see the file COPYING3.  If not see
    enable TPF profiling support and the standard backchain by default.  */
 #undef TARGET_DEFAULT
 #define TARGET_DEFAULT (MASK_TPF_PROFILING | MASK_64BIT | MASK_ZARCH \
-			| MASK_HARD_FLOAT | MASK_BACKCHAIN)
+			| MASK_HARD_DFP | MASK_BACKCHAIN)
 
 /* Exception handling.  */
 
diff --git a/gcc/config/score/score.h b/gcc/config/score/score.h
index 0b7af7b2739..cde9c222546 100644
--- a/gcc/config/score/score.h
+++ b/gcc/config/score/score.h
@@ -688,9 +688,6 @@ typedef struct score_args
 #define HAVE_PRE_MODIFY_REG             0
 #define HAVE_POST_MODIFY_REG            0
 
-/* Recognize any constant value that is a valid address.  */
-#define CONSTANT_ADDRESS_P(X)           CONSTANT_P (X)
-
 /* Maximum number of registers that can appear in a valid memory address.  */
 #define MAX_REGS_PER_ADDRESS            1
 
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 679cf11e83e..26bceea670d 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -222,7 +222,9 @@ static bool sh_optimize_target_register_callee_saved (bool);
 static bool sh_ms_bitfield_layout_p (const_tree);
 
 static void sh_init_builtins (void);
+static tree sh_builtin_decl (unsigned, bool);
 static void sh_media_init_builtins (void);
+static tree sh_media_builtin_decl (unsigned, bool);
 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
 static void sh_file_start (void);
@@ -416,6 +418,8 @@ static const struct attribute_spec sh_attribute_table[] =
 
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS sh_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL sh_builtin_decl
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
 
@@ -9427,6 +9431,7 @@ nonpic_symbol_mentioned_p (rtx x)
 	  || XINT (x, 1) == UNSPEC_GOTPLT
 	  || XINT (x, 1) == UNSPEC_GOTTPOFF
 	  || XINT (x, 1) == UNSPEC_DTPOFF
+	  || XINT (x, 1) == UNSPEC_TPOFF
 	  || XINT (x, 1) == UNSPEC_PLT
 	  || XINT (x, 1) == UNSPEC_SYMOFF
 	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
@@ -10520,6 +10525,7 @@ struct builtin_description
   const enum insn_code icode;
   const char *const name;
   int signature;
+  tree fndecl;
 };
 
 /* describe number and signedness of arguments; arg[0] == result
@@ -10586,99 +10592,99 @@ static const char signature_args[][4] =
 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int.  */
 /* mshards_q: returns signed short.  */
 /* nsb: takes long long arg, returns unsigned char.  */
-static const struct builtin_description bdesc[] =
-{
-  { CODE_FOR_absv2si2,	"__builtin_absv2si2", SH_BLTIN_V2SI2 },
-  { CODE_FOR_absv4hi2,	"__builtin_absv4hi2", SH_BLTIN_V4HI2 },
-  { CODE_FOR_addv2si3,	"__builtin_addv2si3", SH_BLTIN_V2SI3 },
-  { CODE_FOR_addv4hi3,	"__builtin_addv4hi3", SH_BLTIN_V4HI3 },
-  { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
-  { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
-  { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
-  { CODE_FOR_alloco_i,	"__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
-  { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
-  { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
-  { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
-  { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
-  { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
-  { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
-  { CODE_FOR_mcmv,	"__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
-  { CODE_FOR_mcnvs_lw,	"__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
-  { CODE_FOR_mcnvs_wb,	"__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
-  { CODE_FOR_mcnvs_wub,	"__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
-  { CODE_FOR_mextr1,	"__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mextr2,	"__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mextr3,	"__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mextr4,	"__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mextr5,	"__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mextr6,	"__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mextr7,	"__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mmacfx_wl,	"__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
-  { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
-  { CODE_FOR_mulv2si3,	"__builtin_mulv2si3", SH_BLTIN_V2SI3, },
-  { CODE_FOR_mulv4hi3,	"__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
-  { CODE_FOR_mmulfx_l,	"__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
-  { CODE_FOR_mmulfx_w,	"__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
-  { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
-  { CODE_FOR_mmulhi_wl,	"__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
-  { CODE_FOR_mmullo_wl,	"__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
-  { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
-  { CODE_FOR_mperm_w,	"__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
-  { CODE_FOR_msad_ubq,	"__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
-  { CODE_FOR_mshalds_l,	"__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
-  { CODE_FOR_mshalds_w,	"__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
-  { CODE_FOR_ashrv2si3,	"__builtin_ashrv2si3", SH_BLTIN_SH_SI },
-  { CODE_FOR_ashrv4hi3,	"__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
-  { CODE_FOR_mshards_q,	"__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
-  { CODE_FOR_mshfhi_b,	"__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mshfhi_l,	"__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
-  { CODE_FOR_mshfhi_w,	"__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
-  { CODE_FOR_mshflo_b,	"__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
-  { CODE_FOR_mshflo_l,	"__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
-  { CODE_FOR_mshflo_w,	"__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
-  { CODE_FOR_ashlv2si3,	"__builtin_ashlv2si3", SH_BLTIN_SH_SI },
-  { CODE_FOR_ashlv4hi3,	"__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
-  { CODE_FOR_lshrv2si3,	"__builtin_lshrv2si3", SH_BLTIN_SH_SI },
-  { CODE_FOR_lshrv4hi3,	"__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
-  { CODE_FOR_subv2si3,	"__builtin_subv2si3", SH_BLTIN_V2SI3 },
-  { CODE_FOR_subv4hi3,	"__builtin_subv4hi3", SH_BLTIN_V4HI3 },
-  { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
-  { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
-  { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
-  { CODE_FOR_fcosa_s,	"__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
-  { CODE_FOR_fsina_s,	"__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
-  { CODE_FOR_fipr,	"__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
-  { CODE_FOR_ftrv,	"__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
-  { CODE_FOR_mac_media,	"__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
-  { CODE_FOR_sqrtdf2,	"__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
-  { CODE_FOR_sqrtsf2,	"__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
-  { CODE_FOR_fsrra_s,	"__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
-  { CODE_FOR_ldhi_l,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
-  { CODE_FOR_ldhi_q,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
-  { CODE_FOR_ldlo_l,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
-  { CODE_FOR_ldlo_q,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
-  { CODE_FOR_sthi_l,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
-  { CODE_FOR_sthi_q,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
-  { CODE_FOR_stlo_l,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
-  { CODE_FOR_stlo_q,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
-  { CODE_FOR_ldhi_l64,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
-  { CODE_FOR_ldhi_q64,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
-  { CODE_FOR_ldlo_l64,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
-  { CODE_FOR_ldlo_q64,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
-  { CODE_FOR_sthi_l64,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
-  { CODE_FOR_sthi_q64,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
-  { CODE_FOR_stlo_l64,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
-  { CODE_FOR_stlo_q64,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
-  { CODE_FOR_nsb,	"__builtin_sh_media_NSB", SH_BLTIN_SU },
-  { CODE_FOR_byterev,	"__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
-  { CODE_FOR_prefetch,	"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
+static struct builtin_description bdesc[] =
+{
+  { CODE_FOR_absv2si2,	"__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
+  { CODE_FOR_absv4hi2,	"__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
+  { CODE_FOR_addv2si3,	"__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_addv4hi3,	"__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_alloco_i,	"__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
+  { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mcmv,	"__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
+  { CODE_FOR_mcnvs_lw,	"__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
+  { CODE_FOR_mcnvs_wb,	"__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
+  { CODE_FOR_mcnvs_wub,	"__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
+  { CODE_FOR_mextr1,	"__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr2,	"__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr3,	"__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr4,	"__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr5,	"__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr6,	"__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr7,	"__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mmacfx_wl,	"__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { CODE_FOR_mulv2si3,	"__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mulv4hi3,	"__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mmulfx_l,	"__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mmulfx_w,	"__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mmulhi_wl,	"__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { CODE_FOR_mmullo_wl,	"__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
+  { CODE_FOR_mperm_w,	"__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_msad_ubq,	"__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
+  { CODE_FOR_mshalds_l,	"__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_mshalds_w,	"__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_ashrv2si3,	"__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_ashrv4hi3,	"__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_mshards_q,	"__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
+  { CODE_FOR_mshfhi_b,	"__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mshfhi_l,	"__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mshfhi_w,	"__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mshflo_b,	"__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mshflo_l,	"__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mshflo_w,	"__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_ashlv2si3,	"__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_ashlv4hi3,	"__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_lshrv2si3,	"__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_lshrv4hi3,	"__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_subv2si3,	"__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_subv4hi3,	"__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_fcosa_s,	"__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
+  { CODE_FOR_fsina_s,	"__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
+  { CODE_FOR_fipr,	"__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
+  { CODE_FOR_ftrv,	"__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
+  { CODE_FOR_mac_media,	"__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
+  { CODE_FOR_sqrtdf2,	"__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
+  { CODE_FOR_sqrtsf2,	"__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
+  { CODE_FOR_fsrra_s,	"__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
+  { CODE_FOR_ldhi_l,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
+  { CODE_FOR_ldhi_q,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
+  { CODE_FOR_ldlo_l,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
+  { CODE_FOR_ldlo_q,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
+  { CODE_FOR_sthi_l,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
+  { CODE_FOR_sthi_q,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
+  { CODE_FOR_stlo_l,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
+  { CODE_FOR_stlo_q,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
+  { CODE_FOR_ldhi_l64,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
+  { CODE_FOR_ldhi_q64,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { CODE_FOR_ldlo_l64,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
+  { CODE_FOR_ldlo_q64,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { CODE_FOR_sthi_l64,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
+  { CODE_FOR_sthi_q64,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
+  { CODE_FOR_stlo_l64,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
+  { CODE_FOR_stlo_q64,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
+  { CODE_FOR_nsb,	"__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
+  { CODE_FOR_byterev,	"__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
+  { CODE_FOR_prefetch,	"__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
 };
 
 static void
 sh_media_init_builtins (void)
 {
   tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
-  const struct builtin_description *d;
+  struct builtin_description *d;
 
   memset (shared, 0, sizeof shared);
   for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
@@ -10724,11 +10730,23 @@ sh_media_init_builtins (void)
 	  if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
 	    shared[signature] = type;
 	}
-      add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
-			    NULL, NULL_TREE);
+      d->fndecl =
+	add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
+			      NULL, NULL_TREE);
     }
 }
 
+/* Returns the shmedia builtin decl for CODE.  */
+
+static tree
+sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{        
+  if (code >= ARRAY_SIZE (bdesc))
+    return error_mark_node;
+          
+  return bdesc[code].fndecl;
+}
+
 /* Implements target hook vector_mode_supported_p.  */
 bool
 sh_vector_mode_supported_p (enum machine_mode mode)
@@ -10767,6 +10785,17 @@ sh_init_builtins (void)
     sh_media_init_builtins ();
 }
 
+/* Returns the sh builtin decl for CODE.  */
+
+static tree
+sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{        
+  if (TARGET_SHMEDIA)
+    return sh_media_builtin_decl (code, initialize_p);
+          
+  return error_mark_node;
+}
+
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
    (and in mode MODE if that's convenient).
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index f06098bfc1b..7fa634777b3 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -6824,8 +6824,8 @@ label:
 ;; jump around the unconditional jump because it was out of range.
 (define_insn "stuff_delay_slot"
   [(set (pc)
-	(unspec [(match_operand:SI 0 "const_int_operand" "") (pc)] UNSPEC_BBR))
-   (set (reg:SI T_REG) (match_operand:SI 1 "const_int_operand" ""))]
+	(unspec [(match_operand:SI 0 "const_int_operand" "") (pc)
+		 (match_operand:SI 1 "const_int_operand" "")] UNSPEC_BBR))]
   "TARGET_SH1"
   ""
   [(set_attr "length" "0")
@@ -6889,8 +6889,6 @@ label:
   "TARGET_SHMEDIA"
   "
 {
-  /* hack to generate same code.  */
-  rtx tmp_di = GET_CODE (operands[0]) == UNORDERED ? NULL : gen_reg_rtx (DImode);
   rtx tmp = gen_reg_rtx (SImode);
   rtx cmp;
   if (GET_CODE (operands[0]) == NE)
@@ -6900,13 +6898,12 @@ label:
 			  operands[1], operands[2]);
 
   emit_insn (gen_cstore4_media (tmp, cmp, operands[1], operands[2]));
-  if (tmp_di) emit_insn (gen_extendsidi2 (tmp_di, tmp)); else tmp_di = tmp;
 
   if (GET_CODE (cmp) == GET_CODE (operands[0]))
-    operands[0] = gen_rtx_NE (VOIDmode, tmp_di, const0_rtx);
+    operands[0] = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
   else
-    operands[0] = gen_rtx_EQ (VOIDmode, tmp_di, const0_rtx);
-  operands[1] = tmp_di;
+    operands[0] = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  operands[1] = tmp;
   operands[2] = const0_rtx;
   operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]);
 }")
diff --git a/gcc/config/sparc/sol2-unwind.h b/gcc/config/sparc/sol2-unwind.h
new file mode 100644
index 00000000000..c98dc4dc088
--- /dev/null
+++ b/gcc/config/sparc/sol2-unwind.h
@@ -0,0 +1,458 @@
+/* DWARF2 EH unwinding support for SPARC Solaris.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <ucontext.h>
+
+#if defined(__arch64__)
+
+#define MD_FALLBACK_FRAME_STATE_FOR sparc64_fallback_frame_state
+
+static _Unwind_Reason_Code
+sparc64_fallback_frame_state (struct _Unwind_Context *context,
+			      _Unwind_FrameState *fs)
+{
+  void *pc = context->ra;
+  void *this_cfa = context->cfa;
+  void *new_cfa, *ra_location, *shifted_ra_location;
+  int regs_off;
+  int fpu_save_off;
+  unsigned char fpu_save;
+  int i;
+
+  /* This is the observed pattern for the sigacthandler in Solaris 8.  */
+  unsigned int sigacthandler_sol8_pattern []
+    = {0x9401400f, 0xca5aafa0, 0x913e2000, 0x892a3003,
+       0xe0590005, 0x9fc40000, 0x9410001a, 0x80a6e008};
+
+  /* This is the observed pattern for the sigacthandler in Solaris 9.  */ 
+  unsigned int sigacthandler_sol9_pattern []
+    = {0xa33e2000, 0x00000000, 0x892c7003, 0x90100011,
+       0xe0590005, 0x9fc40000, 0x9410001a, 0x80a46008};
+
+  /* This is the observed pattern for the __sighndlr.  */
+  unsigned int sighndlr_pattern []
+    = {0x9de3bf50, 0x90100018, 0x92100019, 0x9fc6c000,
+       0x9410001a, 0x81c7e008, 0x81e80000};
+
+  /* Deal with frame-less function from which a signal was raised.  */
+  if (_Unwind_IsSignalFrame (context))
+    {
+      /* The CFA is by definition unmodified in this case.  */
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+      fs->regs.cfa_offset = 0;
+
+      /* This is the canonical RA column.  */
+      fs->retaddr_column = 15;
+
+      return _URC_NO_REASON;
+    }
+
+  /* Look for the sigacthandler pattern.  The pattern changes slightly
+     in different versions of the operating system, so we skip the
+     comparison against pc-(4*6) for Solaris 9.  */
+  if ((    *(unsigned int *)(pc-(4*7)) == sigacthandler_sol8_pattern[0]
+	&& *(unsigned int *)(pc-(4*6)) == sigacthandler_sol8_pattern[1]
+	&& *(unsigned int *)(pc-(4*5)) == sigacthandler_sol8_pattern[2]
+	&& *(unsigned int *)(pc-(4*4)) == sigacthandler_sol8_pattern[3]
+	&& *(unsigned int *)(pc-(4*3)) == sigacthandler_sol8_pattern[4]
+	&& *(unsigned int *)(pc-(4*2)) == sigacthandler_sol8_pattern[5]
+	&& *(unsigned int *)(pc-(4*1)) == sigacthandler_sol8_pattern[6]
+	&& *(unsigned int *)(pc-(4*0)) == sigacthandler_sol8_pattern[7] ) ||
+      (    *(unsigned int *)(pc-(4*7)) == sigacthandler_sol9_pattern[0]
+	/* skip pc-(4*6) */
+	&& *(unsigned int *)(pc-(4*5)) == sigacthandler_sol9_pattern[2]
+	&& *(unsigned int *)(pc-(4*4)) == sigacthandler_sol9_pattern[3]
+	&& *(unsigned int *)(pc-(4*3)) == sigacthandler_sol9_pattern[4]
+	&& *(unsigned int *)(pc-(4*2)) == sigacthandler_sol9_pattern[5]
+	&& *(unsigned int *)(pc-(4*1)) == sigacthandler_sol9_pattern[6]
+	&& *(unsigned int *)(pc-(4*0)) == sigacthandler_sol9_pattern[7] ) )
+    /* We need to move up two frames (the kernel frame and the handler
+       frame).  Minimum stack frame size is 176 bytes (128 + 48): 128
+       bytes for spilling register window (16 extended words for in
+       and local registers), and 6 extended words to store at least
+       6 arguments to callees, The kernel frame and the sigacthandler
+       both have this minimal stack.  The ucontext_t structure is after
+       this offset.  */
+    regs_off = 176 + 176;
+
+  /* Look for the __sighndlr pattern.  */
+  else if (    *(unsigned int *)(pc-(4*5)) == sighndlr_pattern[0]
+	    && *(unsigned int *)(pc-(4*4)) == sighndlr_pattern[1]
+	    && *(unsigned int *)(pc-(4*3)) == sighndlr_pattern[2]
+	    && *(unsigned int *)(pc-(4*2)) == sighndlr_pattern[3]
+	    && *(unsigned int *)(pc-(4*1)) == sighndlr_pattern[4]
+	    && *(unsigned int *)(pc-(4*0)) == sighndlr_pattern[5]
+	    && *(unsigned int *)(pc+(4*1)) == sighndlr_pattern[6] )
+    {
+      /* We have observed different calling frames among different
+	 versions of the operating system, so that we need to
+	 discriminate using the upper frame.  We look for the return
+	 address of the caller frame (there is an offset of 15 double
+	 words between the frame address and the place where this return
+	 address is stored) in order to do some more pattern matching.  */
+      unsigned int cuh_pattern
+	= *(unsigned int *)(*(unsigned long *)(this_cfa + 15*8) - 4);
+
+      if (cuh_pattern == 0x9410001a || cuh_pattern == 0x94100013)
+	/* This matches the call_user_handler pattern for Solaris 9 and
+	   for Solaris 8 running inside Solaris Containers respectively.
+	   We need to move up four frames (the kernel frame, the signal
+	   frame, the call_user_handler frame, the __sighndlr frame).
+	   Three of them have the minimum stack frame size (kernel,
+	   signal, and __sighndlr frames) of 176 bytes, and there is
+	   another with a stack frame of 304 bytes (the call_user_handler
+	   frame).  The ucontext_t structure is after this offset.  */
+	regs_off = 176 + 176 + 176 + 304;
+      else
+	/* We need to move up three frames (the kernel frame, the
+	   sigacthandler frame, and the __sighndlr frame).  The kernel
+	   frame has a stack frame size of 176, the __sighndlr frames of
+	   304 bytes, and there is a stack frame of 176 bytes for the
+	   sigacthandler frame.  The ucontext_t structure is after this
+	   offset.  */
+	regs_off = 176 + 304 + 176;
+    }
+
+  /* Exit if the pattern at the return address does not match the
+     previous three patterns.  */
+  else
+    return _URC_END_OF_STACK;
+
+  /* FPU information can be extracted from the ucontext_t structure 
+     that is the third argument for the signal handler, that is saved
+     in the stack.  There are 64 bytes between the beginning of the
+     ucontext_t argument of the signal handler and the uc_mcontext
+     field.  There are 176 bytes between the beginning of uc_mcontext
+     and the beginning of the fpregs field.  */
+  fpu_save_off = regs_off + (8*10) + 176;
+
+  /* The fpregs field contains 32 extended words at the beginning that
+     contain the fpu state.  Then there are 2 extended words and two
+     bytes.  */
+  fpu_save = *(unsigned char *)(this_cfa + fpu_save_off + (8*32) + (2*8) + 2);
+
+  /* We need to get the frame pointer for the kernel frame that
+     executes when the signal is raised.  This frame is just the
+     following to the application code that generated the signal, so
+     that the later's stack pointer is the former's frame pointer.
+     The stack pointer for the interrupted application code can be
+     calculated from the ucontext_t structure (third argument for the
+     signal handler) that is saved in the stack.  There are 10 words
+     between the beginning of the  ucontext_t argument  of the signal
+     handler and the uc_mcontext.gregs field that contains the
+     registers saved by the signal handler.  */
+  new_cfa = *(void **)(this_cfa + regs_off + (8*10) + (REG_SP*8));
+  /* The frame address is %sp + STACK_BIAS in 64-bit mode. */
+  new_cfa += 2047;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+  fs->regs.cfa_offset = new_cfa - this_cfa;
+
+  /* Restore global and out registers (in this order) from the
+     ucontext_t structure, uc_mcontext.gregs field.  */
+  for (i = 1; i < 16; i++)
+    {
+      /* We never restore %sp as everything is purely CFA-based.  */
+      if ((unsigned int) i == __builtin_dwarf_sp_column ())
+	continue;
+
+      /* First the global registers and then the out registers.  */
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= this_cfa + regs_off + (8*10) + ((REG_Y+i)*8) - new_cfa;
+    }
+
+  /* Just above the stack pointer there are 16 extended words in which
+     the register window (in and local registers) was saved.  */
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[i + 16].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i + 16].loc.offset = i*8;
+    }
+
+  /* Check whether we need to restore fpu registers.  */
+  if (fpu_save)
+    {
+      for (i = 0; i < 64; i++)
+	{
+	  if (i > 32 && (i & 1))
+	    continue;
+
+	  fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[i + 32].loc.offset
+	    = this_cfa + fpu_save_off + (i*4) - new_cfa;
+	}
+    }
+
+  /* State the rules to find the kernel's code "return address", which is
+     the address of the active instruction when the signal was caught.
+     On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we
+     need to preventively subtract it from the purported return address.  */
+  ra_location = this_cfa + regs_off + (8*10) + (REG_PC*8);
+  shifted_ra_location = this_cfa + regs_off + (8*10) + (REG_Y*8);
+  *(void **)shifted_ra_location = *(void **)ra_location - 8;
+  fs->retaddr_column = 0;
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = shifted_ra_location - new_cfa;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#define MD_FROB_UPDATE_CONTEXT sparc64_frob_update_context
+
+static void
+sparc64_frob_update_context (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  /* The column of %sp contains the old CFA, not the old value of %sp.
+     The CFA offset already comprises the stack bias so, when %sp is the
+     CFA register, we must avoid counting the stack bias twice.  Do not
+     do that for signal frames as the offset is artificial for them.  */
+  if (fs->regs.cfa_reg == __builtin_dwarf_sp_column ()
+      && fs->regs.cfa_how == CFA_REG_OFFSET
+      && fs->regs.cfa_offset != 0
+      && !fs->signal_frame)
+    context->cfa -= 2047;
+}
+
+#else
+
+#define MD_FALLBACK_FRAME_STATE_FOR sparc_fallback_frame_state
+
+static _Unwind_Reason_Code
+sparc_fallback_frame_state (struct _Unwind_Context *context,
+			    _Unwind_FrameState *fs)
+{
+  void *pc = context->ra;
+  void *this_cfa = context->cfa;
+  void *new_cfa, *ra_location, *shifted_ra_location;
+  int regs_off;
+  int fpu_save_off;
+  unsigned char fpu_save;
+  int i;
+
+  /* This is the observed pattern for the sigacthandler.  */
+  unsigned int sigacthandler_pattern []
+    = {0x9602400f, 0x92100019, 0x00000000, 0x912e2002,
+       0xe002000a, 0x90100018, 0x9fc40000, 0x9410001a,
+       0x80a62008};
+
+  /* This is the observed pattern for the __libthread_segvhdlr.  */
+  unsigned int segvhdlr_pattern []
+    = {0x94102000, 0xe007bfe4, 0x9010001c, 0x92100019,
+       0x9fc40000, 0x9410001a, 0x81c7e008, 0x81e80000,
+       0x80a26000};
+
+  /* This is the observed pattern for the __sighndlr.  */
+  unsigned int sighndlr_pattern []
+    = {0x9de3bfa0, 0x90100018, 0x92100019, 0x9fc6c000,
+       0x9410001a, 0x81c7e008, 0x81e80000};
+
+  /* Deal with frame-less function from which a signal was raised.  */
+  if (_Unwind_IsSignalFrame (context))
+    {
+      /* The CFA is by definition unmodified in this case.  */
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+      fs->regs.cfa_offset = 0;
+
+      /* This is the canonical RA column.  */
+      fs->retaddr_column = 15;
+
+      return _URC_NO_REASON;
+    }
+
+  /* Look for the sigacthandler pattern.  The pattern changes slightly
+     in different versions of the operating system, so we skip the
+     comparison against pc-(4*6).  */
+  if (    *(unsigned int *)(pc-(4*8)) == sigacthandler_pattern[0]
+       && *(unsigned int *)(pc-(4*7)) == sigacthandler_pattern[1]
+       /* skip pc-(4*6) */
+       && *(unsigned int *)(pc-(4*5)) == sigacthandler_pattern[3]
+       && *(unsigned int *)(pc-(4*4)) == sigacthandler_pattern[4]
+       && *(unsigned int *)(pc-(4*3)) == sigacthandler_pattern[5]
+       && *(unsigned int *)(pc-(4*2)) == sigacthandler_pattern[6]
+       && *(unsigned int *)(pc-(4*1)) == sigacthandler_pattern[7]
+       && *(unsigned int *)(pc-(4*0)) == sigacthandler_pattern[8] )
+    /* We need to move up two frames (the kernel frame and the handler
+       frame).  Minimum stack frame size is 96 bytes (64 + 4 + 24): 64
+       bytes for spilling register window (16 words for in and local
+       registers), 4 bytes for a pointer to space for callees
+       returning structs, and 24 bytes to store at least six argument
+       to callees.  The ucontext_t structure is after this offset.  */
+    regs_off = 96 + 96;
+
+  /* Look for the __libthread_segvhdlr pattern.  */
+  else if (    *(unsigned int *)(pc-(4*6)) == segvhdlr_pattern[0]
+	    && *(unsigned int *)(pc-(4*5)) == segvhdlr_pattern[1]
+	    && *(unsigned int *)(pc-(4*4)) == segvhdlr_pattern[2]
+	    && *(unsigned int *)(pc-(4*3)) == segvhdlr_pattern[3]
+	    && *(unsigned int *)(pc-(4*2)) == segvhdlr_pattern[4]
+	    && *(unsigned int *)(pc-(4*1)) == segvhdlr_pattern[5]
+	    && *(unsigned int *)(pc-(4*0)) == segvhdlr_pattern[6]
+	    && *(unsigned int *)(pc+(4*1)) == segvhdlr_pattern[7]
+	    && *(unsigned int *)(pc+(4*2)) == segvhdlr_pattern[8] )
+    /* We need to move up four frames (the kernel frame, the
+       sigacthandler frame, the __sighndlr frame, and the
+       __libthread_segvhdlr).  Two of them have the minimum
+       stack frame size (kernel and __sighndlr frames) of 96 bytes,
+       other has a stack frame of 216 bytes (the sigacthandler frame),
+       and there is another with a stack frame of 128 bytes (the
+       __libthread_segvhdlr).  The ucontext_t structure is after this
+       offset.  */
+    regs_off = 96 + 96 + 128 + 216;
+
+  /* Look for the __sighndlr pattern.  */
+  else if (    *(unsigned int *)(pc-(4*5)) == sighndlr_pattern[0]
+	    && *(unsigned int *)(pc-(4*4)) == sighndlr_pattern[1]
+	    && *(unsigned int *)(pc-(4*3)) == sighndlr_pattern[2]
+	    && *(unsigned int *)(pc-(4*2)) == sighndlr_pattern[3]
+	    && *(unsigned int *)(pc-(4*1)) == sighndlr_pattern[4]
+	    && *(unsigned int *)(pc-(4*0)) == sighndlr_pattern[5]
+	    && *(unsigned int *)(pc+(4*1)) == sighndlr_pattern[6] )
+    {
+      /* We have observed different calling frames among different
+	 versions of the operating system, so that we need to
+	 discriminate using the upper frame.  We look for the return
+	 address of the caller frame (there is an offset of 15 words
+	 between the frame address and the place where this return
+	 address is stored) in order to do some more pattern matching.  */
+      unsigned int cuh_pattern
+	= *(unsigned int *)(*(unsigned int *)(this_cfa + 15*4) - 4);
+
+      if (cuh_pattern == 0xd407a04c)
+	/* This matches the call_user_handler pattern for Solaris 10.
+	   We need to move up three frames (the kernel frame, the
+	   call_user_handler frame, the __sighndlr frame).  Two of them
+	   have the minimum stack frame size (kernel and __sighndlr
+	   frames) of 96 bytes, and there is another with a stack frame
+	   of 160 bytes (the call_user_handler frame).  The ucontext_t
+	  structure is after this offset.  */
+	regs_off = 96 + 96 + 160;
+      else if (cuh_pattern == 0x9410001a || cuh_pattern == 0x9410001b)
+	/* This matches the call_user_handler pattern for Solaris 9 and
+	   for Solaris 8 running inside Solaris Containers respectively.
+	   We need to move up four frames (the kernel frame, the signal
+	   frame, the call_user_handler frame, the __sighndlr frame).
+	   Three of them have the minimum stack frame size (kernel,
+	   signal, and __sighndlr frames) of 96 bytes, and there is
+	   another with a stack frame of 160 bytes (the call_user_handler
+	   frame).  The ucontext_t structure is after this offset.  */
+	regs_off = 96 + 96 + 96 + 160;
+      else
+	/* We need to move up three frames (the kernel frame, the
+	   sigacthandler frame, and the __sighndlr frame).  Two of them
+	   have the minimum stack frame size (kernel and __sighndlr
+	   frames) of 96 bytes, and there is another with a stack frame
+	   of 216 bytes (the sigacthandler frame).  The ucontext_t 
+	   structure is after this offset.  */
+	regs_off = 96 + 96 + 216;
+    }
+
+  /* Exit if the pattern at the return address does not match the
+     previous three patterns.  */
+  else
+    return _URC_END_OF_STACK;
+
+  /* FPU information can be extracted from the ucontext_t structure
+     that is the third argument for the signal handler, that is saved
+     in the stack.  There are 10 words between the beginning of the
+     ucontext_t argument of the signal handler and the uc_mcontext
+     field.  There are 80 bytes between the beginning of uc_mcontext
+     and the beginning of the fpregs field.  */
+  fpu_save_off = regs_off + (4*10) + (4*20);
+
+  /* The fpregs field contains 32 words at the beginning that contain
+     the fpu state.  Then there are 2 words and two bytes.  */
+  fpu_save = *(unsigned char *)(this_cfa + fpu_save_off + (4*32) + (2*4) + 2);
+
+  /* We need to get the frame pointer for the kernel frame that
+     executes when the signal is raised.  This frame is just the
+     following to the application code that generated the signal, so
+     that the later's stack pointer is the former's frame pointer.
+     The stack pointer for the interrupted application code can be
+     calculated from the ucontext_t structure (third argument for the
+     signal handler) that is saved in the stack.  There are 10 words
+     between the beginning of the  ucontext_t argument  of the signal
+     handler and the uc_mcontext.gregs field that contains the
+     registers saved by the signal handler.  */
+  new_cfa = *(void **)(this_cfa + regs_off + (4*10) + (REG_SP*4));
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+  fs->regs.cfa_offset = new_cfa - this_cfa;
+
+  /* Restore global and out registers (in this order) from the
+     ucontext_t structure, uc_mcontext.gregs field.  */
+  for (i = 1; i < 16; i++)
+    {
+      /* We never restore %sp as everything is purely CFA-based.  */
+      if ((unsigned int) i == __builtin_dwarf_sp_column ())
+	continue;
+
+      /* First the global registers and then the out registers */
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= this_cfa + regs_off + (4*10) + ((REG_Y+i)*4) - new_cfa;
+    }
+
+  /* Just above the stack pointer there are 16 words in which the
+     register window (in and local registers) was saved.  */
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[i + 16].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i + 16].loc.offset = i*4;
+    }
+
+  /* Check whether we need to restore fpu registers.  */
+  if (fpu_save)
+    {
+      for (i = 0; i < 32; i++)
+	{
+	  fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[i + 32].loc.offset
+	    = this_cfa + fpu_save_off + (i*4) - new_cfa;
+	}
+    }
+
+  /* State the rules to find the kernel's code "return address", which is
+     the address of the active instruction when the signal was caught.
+     On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we
+     need to preventively subtract it from the purported return address.  */
+  ra_location = this_cfa + regs_off + (4*10) + (REG_PC*4);
+  shifted_ra_location = this_cfa + regs_off + (4*10) + (REG_Y*4);
+  *(void **)shifted_ra_location = *(void **)ra_location - 8;
+  fs->retaddr_column = 0;
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = shifted_ra_location - new_cfa;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+};
+
+#endif
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
index 139f5b39dff..a3f7647027b 100644
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -194,3 +194,5 @@ along with GCC; see the file COPYING3.  If not see
 	       (SIZE), (ALIGN) / BITS_PER_UNIT);			\
     }									\
   while (0)
+
+#define MD_UNWIND_SUPPORT "config/sparc/sol2-unwind.h"
diff --git a/gcc/config/spu/cache.S b/gcc/config/spu/cache.S
new file mode 100644
index 00000000000..9ffb6a0d194
--- /dev/null
+++ b/gcc/config/spu/cache.S
@@ -0,0 +1,43 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	.data
+	.p2align 7
+	.global __cache
+__cache:
+	.rept __CACHE_SIZE__ * 8
+	.fill 128
+	.endr
+
+	.p2align 7
+	.global __cache_tag_array
+__cache_tag_array:
+	.rept __CACHE_SIZE__ * 2
+	.long 1, 1, 1, 1
+	.fill 128-16
+	.endr
+__end_cache_tag_array:
+
+	.globl __cache_tag_array_size
+	.set __cache_tag_array_size, __end_cache_tag_array-__cache_tag_array
+
diff --git a/gcc/config/spu/cachemgr.c b/gcc/config/spu/cachemgr.c
new file mode 100644
index 00000000000..e7abd5e62db
--- /dev/null
+++ b/gcc/config/spu/cachemgr.c
@@ -0,0 +1,438 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <spu_mfcio.h>
+#include <spu_internals.h>
+#include <spu_intrinsics.h>
+#include <spu_cache.h>
+
+extern unsigned long long __ea_local_store;
+extern char __cache_tag_array_size;
+
+#define LINE_SIZE 128
+#define TAG_MASK (LINE_SIZE - 1)
+
+#define WAYS 4
+#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
+
+#define CACHE_LINES ((int) &__cache_tag_array_size /		\
+		     sizeof (struct __cache_tag_array) * WAYS)
+
+struct __cache_tag_array
+{
+  unsigned int tag_lo[WAYS];
+  unsigned int tag_hi[WAYS];
+  void *base[WAYS];
+  int reserved[WAYS];
+  vector unsigned short dirty_bits[WAYS];
+};
+
+extern struct __cache_tag_array __cache_tag_array[];
+extern char __cache[];
+
+/* In order to make the code seem a little cleaner, and to avoid having
+   64/32 bit ifdefs all over the place, we use macros.  */
+
+#ifdef __EA64__
+typedef unsigned long long addr;
+
+#define CHECK_TAG(_entry, _way, _tag)			\
+  ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF)	\
+   && (_entry)->tag_hi[(_way)] == ((_tag) >> 32))
+
+#define GET_TAG(_entry, _way) \
+  ((unsigned long long)(_entry)->tag_hi[(_way)] << 32	\
+   | (unsigned long long)(_entry)->tag_lo[(_way)])
+
+#define SET_TAG(_entry, _way, _tag)			\
+  (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF;	\
+  (_entry)->tag_hi[(_way)] = (_tag) >> 32
+
+#else /*__EA32__*/
+typedef unsigned long addr;
+
+#define CHECK_TAG(_entry, _way, _tag)			\
+  ((_entry)->tag_lo[(_way)] == (_tag))
+
+#define GET_TAG(_entry, _way)				\
+  ((_entry)->tag_lo[(_way)])
+
+#define SET_TAG(_entry, _way, _tag)			\
+  (_entry)->tag_lo[(_way)] = (_tag)
+
+#endif
+
+/* In GET_ENTRY, we cast away the high 32 bits,
+   as the tag is only in the low 32.  */
+
+#define GET_ENTRY(_addr)						   \
+  ((struct __cache_tag_array *)						   \
+   si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \
+			     si_from_uint (SET_MASK)),			   \
+	       si_from_uint ((unsigned int) __cache_tag_array))))
+
+#define GET_CACHE_LINE(_addr, _way) \
+  ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE));
+
+#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec))))
+#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1)
+#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1)
+
+#define LS_FLAG 0x80000000
+#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG)
+#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG)
+#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG)
+
+static int dma_tag = 32;
+
+static void
+__cache_evict_entry (struct __cache_tag_array *entry, int way)
+{
+  addr tag = GET_TAG (entry, way);
+
+  if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way))
+    {
+#ifdef NONATOMIC
+      /* Non-atomic writes.  */
+      unsigned int oldmask, mach_stat;
+      char *line = ((void *) 0);
+
+      /* Enter critical section.  */
+      mach_stat = spu_readch (SPU_RdMachStat);
+      spu_idisable ();
+
+      /* Issue DMA request.  */
+      line = GET_CACHE_LINE (entry->tag_lo[way], way);
+      mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+      /* Wait for DMA completion.  */
+      oldmask = mfc_read_tag_mask ();
+      mfc_write_tag_mask (1 << dma_tag);
+      mfc_read_tag_status_all ();
+      mfc_write_tag_mask (oldmask);
+
+      /* Leave critical section.  */
+      if (__builtin_expect (mach_stat & 1, 0))
+	spu_ienable ();
+#else
+      /* Allocate a buffer large enough that we know it has 128 bytes
+         that are 128 byte aligned (for DMA). */
+
+      char buffer[LINE_SIZE + 127];
+      qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
+      qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
+      qword bits;
+      unsigned int mach_stat;
+
+      /* Enter critical section.  */
+      mach_stat = spu_readch (SPU_RdMachStat);
+      spu_idisable ();
+
+      do
+	{
+	  /* We atomically read the current memory into a buffer
+	     modify the dirty bytes in the buffer, and write it
+	     back. If writeback fails, loop and try again.  */
+
+	  mfc_getllar (buf_ptr, tag, 0, 0);
+	  mfc_read_atomic_status ();
+
+	  /* The method we're using to write 16 dirty bytes into
+	     the buffer at a time uses fsmb which in turn uses
+	     the least significant 16 bits of word 0, so we
+	     load the bits and rotate so that the first bit of
+	     the bitmap is in the first bit that fsmb will use.  */
+
+	  bits = (qword) entry->dirty_bits[way];
+	  bits = si_rotqbyi (bits, -2);
+
+	  /* Si_fsmb creates the mask of dirty bytes.
+	     Use selb to nab the appropriate bits.  */
+	  buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
+
+	  /* Rotate to next 16 byte section of cache.  */
+	  bits = si_rotqbyi (bits, 2);
+
+	  buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+
+	  mfc_putllc (buf_ptr, tag, 0, 0);
+	}
+      while (mfc_read_atomic_status ());
+
+      /* Leave critical section.  */
+      if (__builtin_expect (mach_stat & 1, 0))
+	spu_ienable ();
+#endif
+    }
+
+  /* In any case, marking the lo tag with 1 which denotes empty.  */
+  SET_EMPTY (entry, way);
+  entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
+}
+
+void
+__cache_evict (__ea void *ea)
+{
+  addr tag = (addr) ea & ~TAG_MASK;
+  struct __cache_tag_array *entry = GET_ENTRY (ea);
+  int i = 0;
+
+  /* Cycles through all the possible ways an address could be at
+     and evicts the way if found.  */
+
+  for (i = 0; i < WAYS; i++)
+    if (CHECK_TAG (entry, i, tag))
+      __cache_evict_entry (entry, i);
+}
+
+static void *
+__cache_fill (int way, addr tag)
+{
+  unsigned int oldmask, mach_stat;
+  char *line = ((void *) 0);
+
+  /* Reserve our DMA tag.  */
+  if (dma_tag == 32)
+    dma_tag = mfc_tag_reserve ();
+
+  /* Enter critical section.  */
+  mach_stat = spu_readch (SPU_RdMachStat);
+  spu_idisable ();
+
+  /* Issue DMA request.  */
+  line = GET_CACHE_LINE (tag, way);
+  mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+  /* Wait for DMA completion.  */
+  oldmask = mfc_read_tag_mask ();
+  mfc_write_tag_mask (1 << dma_tag);
+  mfc_read_tag_status_all ();
+  mfc_write_tag_mask (oldmask);
+
+  /* Leave critical section.  */
+  if (__builtin_expect (mach_stat & 1, 0))
+    spu_ienable ();
+
+  return (void *) line;
+}
+
+static void
+__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
+{
+
+  addr tag = (addr) ea & ~TAG_MASK;
+  unsigned int lru = 0;
+  int i = 0;
+  int idx = 0;
+
+  /* If way > 4, then there are no empty slots, so we must evict
+     the least recently used entry. */
+  if (way >= 4)
+    {
+      for (i = 0; i < WAYS; i++)
+	{
+	  if (GET_LRU (entry, i) > lru)
+	    {
+	      lru = GET_LRU (entry, i);
+	      idx = i;
+	    }
+	}
+      __cache_evict_entry (entry, idx);
+      way = idx;
+    }
+
+  /* Set the empty entry's tag and fill it's cache line. */
+
+  SET_TAG (entry, way, tag);
+  entry->reserved[way] = 0;
+
+  /* Check if the address is just an effective address within the
+     SPU's local store. */
+
+  /* Because the LS is not 256k aligned, we can't do a nice and mask
+     here to compare, so we must check the whole range.  */
+
+  if ((addr) ea >= (addr) __ea_local_store
+      && (addr) ea < (addr) (__ea_local_store + 0x40000))
+    {
+      SET_IS_LS (entry, way);
+      entry->base[way] =
+	(void *) ((unsigned int) ((addr) ea -
+				  (addr) __ea_local_store) & ~0x7f);
+    }
+  else
+    {
+      entry->base[way] = __cache_fill (way, tag);
+    }
+}
+
+void *
+__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
+{
+#ifdef __EA64__
+  unsigned int tag_hi;
+  qword etag_hi;
+#endif
+  unsigned int tag_lo;
+  struct __cache_tag_array *entry;
+
+  qword etag_lo;
+  qword equal;
+  qword bit_mask;
+  qword way;
+
+  /* This first chunk, we merely fill the pointer and tag.  */
+
+  entry = GET_ENTRY (ea);
+
+#ifndef __EA64__
+  tag_lo =
+    si_to_uint (si_andc
+		(si_shufb
+		 (si_from_uint ((addr) ea), si_from_uint (0),
+		  si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
+#else
+  tag_lo =
+    si_to_uint (si_andc
+		(si_shufb
+		 (si_from_ullong ((addr) ea), si_from_uint (0),
+		  si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
+
+  tag_hi =
+    si_to_uint (si_shufb
+		(si_from_ullong ((addr) ea), si_from_uint (0),
+		 si_from_uint (0x00010203)));
+#endif
+
+  /* Increment LRU in reserved bytes.  */
+  si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
+	   si_from_ptr (entry), 48);
+
+missreturn:
+  /* Check if the entry's lo_tag is equal to the address' lo_tag.  */
+  etag_lo = si_lqd (si_from_ptr (entry), 0);
+  equal = si_ceq (etag_lo, si_from_uint (tag_lo));
+#ifdef __EA64__
+  /* And the high tag too.  */
+  etag_hi = si_lqd (si_from_ptr (entry), 16);
+  equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
+#endif
+
+  if ((si_to_uint (si_orx (equal)) == 0))
+    goto misshandler;
+
+  if (n_bytes_dirty)
+    {
+      /* way = 0x40,0x50,0x60,0x70 for each way, which is also the
+         offset of the appropriate dirty bits.  */
+      way = si_shli (si_clz (si_gbb (equal)), 2);
+
+      /* To create the bit_mask, we set it to all 1s (uint -1), then we
+         shift it over (128 - n_bytes_dirty) times.  */
+
+      bit_mask = si_from_uint (-1);
+
+      bit_mask =
+	si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
+
+      bit_mask =
+	si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
+
+      /* Rotate it around to the correct offset.  */
+      bit_mask =
+	si_rotqby (bit_mask,
+		   si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8));
+
+      bit_mask =
+	si_rotqbi (bit_mask,
+		   si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8));
+
+      /* Update the dirty bits.  */
+      si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
+	       si_from_ptr (entry), way);
+    };
+
+  /* We've definitely found the right entry, set LRU (reserved) to 0
+     maintaining the LS flag (MSB).  */
+
+  si_stqd (si_andc
+	   (si_lqd (si_from_ptr (entry), 48),
+	    si_and (equal, si_from_uint (~(LS_FLAG)))),
+	   si_from_ptr (entry), 48);
+
+  return (void *)
+    si_to_uint (si_a
+		(si_orx
+		 (si_and (si_lqd (si_from_ptr (entry), 32), equal)),
+		 si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
+
+misshandler:
+  equal = si_ceqi (etag_lo, 1);
+  __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
+  goto missreturn;
+}
+
+void *
+__cache_fetch (__ea void *ea)
+{
+  return __cache_fetch_dirty (ea, 0);
+}
+
+void
+__cache_touch (__ea void *ea __attribute__ ((unused)))
+{
+  /* NO-OP for now.  */
+}
+
+void __cache_flush (void) __attribute__ ((destructor));
+void
+__cache_flush (void)
+{
+  struct __cache_tag_array *entry = __cache_tag_array;
+  unsigned int i;
+  int j;
+
+  /* Cycle through each cache entry and evict all used ways.  */
+
+  for (i = 0; i < CACHE_LINES / WAYS; i++)
+    {
+      for (j = 0; j < WAYS; j++)
+	if (!CHECK_EMPTY (entry, j))
+	  __cache_evict_entry (entry, j);
+
+      entry++;
+    }
+}
diff --git a/gcc/config/spu/spu-c.c b/gcc/config/spu/spu-c.c
index fbbbf32e157..380af402c48 100644
--- a/gcc/config/spu/spu-c.c
+++ b/gcc/config/spu/spu-c.c
@@ -201,6 +201,17 @@ spu_cpu_cpp_builtins (struct cpp_reader *pfile)
   if (spu_arch == PROCESSOR_CELLEDP)
     builtin_define_std ("__SPU_EDP__");
   builtin_define_std ("__vector=__attribute__((__spu_vector__))");
+  switch (spu_ea_model)
+    {
+    case 32:
+      builtin_define_std ("__EA32__");
+      break;
+    case 64:
+      builtin_define_std ("__EA64__");
+      break;
+    default:
+       gcc_unreachable ();
+    }
 
   if (!flag_iso)
     {
diff --git a/gcc/config/spu/spu-elf.h b/gcc/config/spu/spu-elf.h
index 532313119cb..68982002103 100644
--- a/gcc/config/spu/spu-elf.h
+++ b/gcc/config/spu/spu-elf.h
@@ -68,8 +68,14 @@
 
 #define LINK_SPEC "%{mlarge-mem: --defsym __stack=0xfffffff0 }"
 
-#define LIB_SPEC \
-	"-( %{!shared:%{g*:-lg}} -lc -lgloss -)"
+#define LIB_SPEC "-( %{!shared:%{g*:-lg}} -lc -lgloss -) \
+    %{mno-atomic-updates:-lgcc_cachemgr_nonatomic; :-lgcc_cachemgr} \
+    %{mcache-size=128:-lgcc_cache128k; \
+      mcache-size=64 :-lgcc_cache64k; \
+      mcache-size=32 :-lgcc_cache32k; \
+      mcache-size=16 :-lgcc_cache16k; \
+      mcache-size=8  :-lgcc_cache8k; \
+                     :-lgcc_cache64k}"
 
 /* Turn off warnings in the assembler too. */
 #undef ASM_SPEC
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 316cc73d777..2888da67281 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -150,9 +150,12 @@ char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
 
 /*  Prototypes and external defs.  */
 static void spu_init_builtins (void);
+static tree spu_builtin_decl (unsigned, bool);
 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
+						 bool, addr_space_t);
 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
 static rtx get_pic_reg (void);
 static int need_to_save_reg (int regno, int saving);
@@ -202,15 +205,23 @@ static bool spu_return_in_memory (const_tree type, const_tree fntype);
 static void fix_range (const char *);
 static void spu_encode_section_info (tree, rtx, int);
 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
+					      addr_space_t);
 static tree spu_builtin_mul_widen_even (tree);
 static tree spu_builtin_mul_widen_odd (tree);
 static tree spu_builtin_mask_for_load (void);
 static int spu_builtin_vectorization_cost (bool);
 static bool spu_vector_alignment_reachable (const_tree, bool);
 static tree spu_builtin_vec_perm (tree, tree *);
+static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
+static enum machine_mode spu_addr_space_address_mode (addr_space_t);
+static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
+static rtx spu_addr_space_convert (rtx, tree, tree);
 static int spu_sms_res_mii (struct ddg *g);
 static void asm_file_start (void);
 static unsigned int spu_section_type_flags (tree, const char *, int);
+static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
+static void spu_unique_section (tree, int);
 static rtx spu_expand_load (rtx, rtx, rtx, int);
 static void spu_trampoline_init (rtx, tree, rtx);
 
@@ -269,6 +280,10 @@ spu_libgcc_cmp_return_mode (void);
 
 static enum machine_mode
 spu_libgcc_shift_count_mode (void);
+
+/* Pointer mode for __ea references.  */
+#define EAmode (spu_ea_model != 32 ? DImode : SImode)
+
 
 /*  Table of machine attributes.  */
 static const struct attribute_spec spu_attribute_table[] =
@@ -281,8 +296,29 @@ static const struct attribute_spec spu_attribute_table[] =
 
 /*  TARGET overrides.  */
 
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
+
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
+
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+  spu_addr_space_legitimate_address_p
+
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
+
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS spu_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL spu_builtin_decl
 
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
@@ -293,6 +329,15 @@ static const struct attribute_spec spu_attribute_table[] =
 #undef TARGET_LEGITIMIZE_ADDRESS
 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
 
+/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
+   and .quad for the debugger.  When it is known that the assembler is fixed,
+   these can be removed.  */
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
+
 /* The .8byte directive doesn't seem to work well for a 32 bit
    architecture. */
 #undef TARGET_ASM_UNALIGNED_DI_OP
@@ -409,6 +454,12 @@ static const struct attribute_spec spu_attribute_table[] =
 #undef TARGET_SECTION_TYPE_FLAGS
 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
 
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  spu_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
+
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
 
@@ -2365,7 +2416,7 @@ get_branch_target (rtx branch)
 	return 0;
 
      /* ASM GOTOs. */
-     if (GET_CODE (PATTERN (branch)) == ASM_OPERANDS)
+     if (extract_asm_operands (PATTERN (branch)) != NULL)
 	return NULL;
 
       set = single_set (branch);
@@ -3610,6 +3661,29 @@ exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
   return FALSE;
 }
 
+/* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
+
+static int
+ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+  tree decl;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      rtx plus = XEXP (x, 0);
+      rtx op0 = XEXP (plus, 0);
+      rtx op1 = XEXP (plus, 1);
+      if (GET_CODE (op1) == CONST_INT)
+	x = op0;
+    }
+
+  return (GET_CODE (x) == SYMBOL_REF
+ 	  && (decl = SYMBOL_REF_DECL (x)) != 0
+ 	  && TREE_CODE (decl) == VAR_DECL
+ 	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
+}
+
 /* We accept:
    - any 32-bit constant (SImode, SFmode)
    - any constant that can be generated with fsmbi (any mode)
@@ -3621,6 +3695,12 @@ spu_legitimate_constant_p (rtx x)
 {
   if (GET_CODE (x) == HIGH)
     x = XEXP (x, 0);
+
+  /* Reject any __ea qualified reference.  These can't appear in
+     instructions but must be forced to the constant pool.  */
+  if (for_each_rtx (&x, ea_symbol_ref, 0))
+    return 0;
+
   /* V4SI with all identical symbols is valid. */
   if (!flag_pic
       && GET_MODE (x) == V4SImode
@@ -3659,8 +3739,14 @@ spu_legitimate_address_p (enum machine_mode mode,
   switch (GET_CODE (x))
     {
     case LABEL_REF:
+      return !TARGET_LARGE_MEM;
+
     case SYMBOL_REF:
     case CONST:
+      /* Keep __ea references until reload so that spu_expand_mov can see them
+	 in MEMs.  */
+      if (ea_symbol_ref (&x, 0))
+	return !reload_in_progress && !reload_completed;
       return !TARGET_LARGE_MEM;
 
     case CONST_INT:
@@ -3704,6 +3790,20 @@ spu_legitimate_address_p (enum machine_mode mode,
   return FALSE;
 }
 
+/* Like spu_legitimate_address_p, except with named addresses.  */
+static bool
+spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+				     bool reg_ok_strict, addr_space_t as)
+{
+  if (as == ADDR_SPACE_EA)
+    return (REG_P (x) && (GET_MODE (x) == EAmode));
+
+  else if (as != ADDR_SPACE_GENERIC)
+    gcc_unreachable ();
+
+  return spu_legitimate_address_p (mode, x, reg_ok_strict);
+}
+
 /* When the address is reg + const_int, force the const_int into a
    register.  */
 rtx
@@ -3735,6 +3835,17 @@ spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
   return x;
 }
 
+/* Like spu_legitimate_address, except with named address support.  */
+static rtx
+spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
+				   addr_space_t as)
+{
+  if (as != ADDR_SPACE_GENERIC)
+    return x;
+
+  return spu_legitimize_address (x, oldx, mode);
+}
+
 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
    struct attribute_spec.handler.  */
 static tree
@@ -4238,6 +4349,233 @@ address_needs_split (rtx mem)
   return 0;
 }
 
+static GTY(()) rtx cache_fetch;		  /* __cache_fetch function */
+static GTY(()) rtx cache_fetch_dirty;	  /* __cache_fetch_dirty function */
+static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
+
+/* MEM is known to be an __ea qualified memory access.  Emit a call to
+   fetch the ppu memory to local store, and return its address in local
+   store.  */
+
+static void
+ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+  if (is_store)
+    {
+      rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
+      if (!cache_fetch_dirty)
+	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
+      emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
+			       2, ea_addr, EAmode, ndirty, SImode);
+    }
+  else
+    {
+      if (!cache_fetch)
+	cache_fetch = init_one_libfunc ("__cache_fetch");
+      emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
+			       1, ea_addr, EAmode);
+    }
+}
+
+/* Like ea_load_store, but do the cache tag comparison and, for stores,
+   dirty bit marking, inline.
+
+   The cache control data structure is an array of
+
+   struct __cache_tag_array
+     {
+        unsigned int tag_lo[4];
+        unsigned int tag_hi[4];
+        void *data_pointer[4];
+        int reserved[4];
+        vector unsigned short dirty_bits[4];
+     }  */
+
+static void
+ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+  rtx ea_addr_si;
+  HOST_WIDE_INT v;
+  rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
+  rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
+  rtx index_mask = gen_reg_rtx (SImode);
+  rtx tag_arr = gen_reg_rtx (Pmode);
+  rtx splat_mask = gen_reg_rtx (TImode);
+  rtx splat = gen_reg_rtx (V4SImode);
+  rtx splat_hi = NULL_RTX;
+  rtx tag_index = gen_reg_rtx (Pmode);
+  rtx block_off = gen_reg_rtx (SImode);
+  rtx tag_addr = gen_reg_rtx (Pmode);
+  rtx tag = gen_reg_rtx (V4SImode);
+  rtx cache_tag = gen_reg_rtx (V4SImode);
+  rtx cache_tag_hi = NULL_RTX;
+  rtx cache_ptrs = gen_reg_rtx (TImode);
+  rtx cache_ptrs_si = gen_reg_rtx (SImode);
+  rtx tag_equal = gen_reg_rtx (V4SImode);
+  rtx tag_equal_hi = NULL_RTX;
+  rtx tag_eq_pack = gen_reg_rtx (V4SImode);
+  rtx tag_eq_pack_si = gen_reg_rtx (SImode);
+  rtx eq_index = gen_reg_rtx (SImode);
+  rtx bcomp, hit_label, hit_ref, cont_label, insn;
+
+  if (spu_ea_model != 32)
+    {
+      splat_hi = gen_reg_rtx (V4SImode);
+      cache_tag_hi = gen_reg_rtx (V4SImode);
+      tag_equal_hi = gen_reg_rtx (V4SImode);
+    }
+
+  emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
+  emit_move_insn (tag_arr, tag_arr_sym);
+  v = 0x0001020300010203LL;
+  emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
+  ea_addr_si = ea_addr;
+  if (spu_ea_model != 32)
+    ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
+
+  /* tag_index = ea_addr & (tag_array_size - 128)  */
+  emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
+
+  /* splat ea_addr to all 4 slots.  */
+  emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
+  /* Similarly for high 32 bits of ea_addr.  */
+  if (spu_ea_model != 32)
+    emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
+
+  /* block_off = ea_addr & 127  */
+  emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
+
+  /* tag_addr = tag_arr + tag_index  */
+  emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
+
+  /* Read cache tags.  */
+  emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
+  if (spu_ea_model != 32)
+    emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
+					       plus_constant (tag_addr, 16)));
+
+  /* tag = ea_addr & -128  */
+  emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
+
+  /* Read all four cache data pointers.  */
+  emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
+					   plus_constant (tag_addr, 32)));
+
+  /* Compare tags.  */
+  emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
+  if (spu_ea_model != 32)
+    {
+      emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
+      emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
+    }
+
+  /* At most one of the tags compare equal, so tag_equal has one
+     32-bit slot set to all 1's, with the other slots all zero.
+     gbb picks off low bit from each byte in the 128-bit registers,
+     so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
+     we have a hit.  */
+  emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
+  emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
+
+  /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
+  emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
+
+  /* Allowing us to rotate the corresponding cache data pointer to slot0.
+     (rotating eq_index mod 16 bytes).  */
+  emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
+  emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
+
+  /* Add block offset to form final data address.  */
+  emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
+
+  /* Check that we did hit.  */
+  hit_label = gen_label_rtx ();
+  hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
+  bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+							    hit_ref, pc_rtx)));
+  /* Say that this branch is very likely to happen.  */
+  v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
+  REG_NOTES (insn)
+    = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
+
+  ea_load_store (mem, is_store, ea_addr, data_addr);
+  cont_label = gen_label_rtx ();
+  emit_jump_insn (gen_jump (cont_label));
+  emit_barrier ();
+
+  emit_label (hit_label);
+
+  if (is_store)
+    {
+      HOST_WIDE_INT v_hi;
+      rtx dirty_bits = gen_reg_rtx (TImode);
+      rtx dirty_off = gen_reg_rtx (SImode);
+      rtx dirty_128 = gen_reg_rtx (TImode);
+      rtx neg_block_off = gen_reg_rtx (SImode);
+
+      /* Set up mask with one dirty bit per byte of the mem we are
+	 writing, starting from top bit.  */
+      v_hi = v = -1;
+      v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
+      if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
+	{
+	  v_hi = v;
+	  v = 0;
+	}
+      emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
+
+      /* Form index into cache dirty_bits.  eq_index is one of
+	 0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
+	 0x40, 0x50, 0x60 or 0x70 which just happens to be the
+	 offset to each of the four dirty_bits elements.  */
+      emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
+
+      emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
+
+      /* Rotate bit mask to proper bit.  */
+      emit_insn (gen_negsi2 (neg_block_off, block_off));
+      emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
+      emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
+
+      /* Or in the new dirty bits.  */
+      emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
+
+      /* Store.  */
+      emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
+    }
+
+  emit_label (cont_label);
+}
+
+static rtx
+expand_ea_mem (rtx mem, bool is_store)
+{
+  rtx ea_addr;
+  rtx data_addr = gen_reg_rtx (Pmode);
+  rtx new_mem;
+
+  ea_addr = force_reg (EAmode, XEXP (mem, 0));
+  if (optimize_size || optimize == 0)
+    ea_load_store (mem, is_store, ea_addr, data_addr);
+  else
+    ea_load_store_inline (mem, is_store, ea_addr, data_addr);
+
+  if (ea_alias_set == -1)
+    ea_alias_set = new_alias_set ();
+
+  /* We generate a new MEM RTX to refer to the copy of the data
+     in the cache.  We do not copy memory attributes (except the
+     alignment) from the original MEM, as they may no longer apply
+     to the cache copy.  */
+  new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
+  set_mem_alias_set (new_mem, ea_alias_set);
+  set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
+
+  return new_mem;
+}
+
 int
 spu_expand_mov (rtx * ops, enum machine_mode mode)
 {
@@ -4295,9 +4633,17 @@ spu_expand_mov (rtx * ops, enum machine_mode mode)
 	}
     }
   if (MEM_P (ops[0]))
-    return spu_split_store (ops);
+    {
+      if (MEM_ADDR_SPACE (ops[0]))
+	ops[0] = expand_ea_mem (ops[0], true);
+      return spu_split_store (ops);
+    }
   if (MEM_P (ops[1]))
-    return spu_split_load (ops);
+    {
+      if (MEM_ADDR_SPACE (ops[1]))
+	ops[1] = expand_ea_mem (ops[1], false);
+      return spu_split_load (ops);
+    }
 
   return 0;
 }
@@ -5285,6 +5631,18 @@ struct spu_builtin_description spu_builtins[] = {
 #undef DEF_BUILTIN
 };
 
+/* Returns the rs6000 builtin decl for CODE.  */
+
+static tree
+spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{           
+  if (code >= NUM_SPU_BUILTINS)
+    return error_mark_node;
+          
+  return spu_builtins[code].fndecl;
+}
+
+
 static void
 spu_init_builtins (void)
 {
@@ -6427,6 +6785,113 @@ spu_builtin_vec_perm (tree type, tree *mask_element_type)
   return d->fndecl;
 }
 
+/* Return the appropriate mode for a named address pointer.  */
+static enum machine_mode
+spu_addr_space_pointer_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return ptr_mode;
+    case ADDR_SPACE_EA:
+      return EAmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the appropriate mode for a named address address.  */
+static enum machine_mode
+spu_addr_space_address_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return Pmode;
+    case ADDR_SPACE_EA:
+      return EAmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Determine if one named address space is a subset of another.  */
+
+static bool
+spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
+  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
+
+  if (subset == superset)
+    return true;
+
+  /* If we have -mno-address-space-conversion, treat __ea and generic as not
+     being subsets but instead as disjoint address spaces.  */
+  else if (!TARGET_ADDRESS_SPACE_CONVERSION)
+    return false;
+
+  else
+    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
+}
+
+/* Convert from one address space to another.  */
+static rtx
+spu_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+
+  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
+  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
+
+  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
+    {
+      rtx result, ls;
+
+      ls = gen_const_mem (DImode,
+			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+      set_mem_align (ls, 128);
+
+      result = gen_reg_rtx (Pmode);
+      ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
+      op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
+      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+					  ls, const0_rtx, Pmode, 1);
+
+      emit_insn (gen_subsi3 (result, op, ls));
+
+      return result;
+    }
+
+  else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
+    {
+      rtx result, ls;
+
+      ls = gen_const_mem (DImode,
+			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+      set_mem_align (ls, 128);
+
+      result = gen_reg_rtx (EAmode);
+      ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
+      op = force_reg (Pmode, op);
+      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+					  ls, const0_rtx, EAmode, 1);
+      op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
+
+      if (EAmode == SImode)
+	emit_insn (gen_addsi3 (result, op, ls));
+      else
+	emit_insn (gen_adddi3 (result, op, ls));
+
+      return result;
+    }
+
+  else
+    gcc_unreachable ();
+}
+
+
 /* Count the total number of instructions in each pipe and return the
    maximum, which is used as the Minimum Iteration Interval (MII)
    in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
@@ -6519,9 +6984,46 @@ spu_section_type_flags (tree decl, const char *name, int reloc)
   /* .toe needs to have type @nobits.  */
   if (strcmp (name, ".toe") == 0)
     return SECTION_BSS;
+  /* Don't load _ea into the current address space.  */
+  if (strcmp (name, "._ea") == 0)
+    return SECTION_WRITE | SECTION_DEBUG;
   return default_section_type_flags (decl, name, reloc);
 }
 
+/* Implement targetm.select_section.  */
+static section *
+spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  /* Variables and constants defined in the __ea address space
+     go into a special section named "._ea".  */
+  if (TREE_TYPE (decl) != error_mark_node
+      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
+    {
+      /* We might get called with string constants, but get_named_section
+	 doesn't like them as they are not DECLs.  Also, we need to set
+	 flags in that case.  */
+      if (!DECL_P (decl))
+	return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
+
+      return get_named_section (decl, "._ea", reloc);
+    }
+
+  return default_elf_select_section (decl, reloc, align);
+}
+
+/* Implement targetm.unique_section.  */
+static void
+spu_unique_section (tree decl, int reloc)
+{
+  /* We don't support unique section names in the __ea address
+     space for now.  */
+  if (TREE_TYPE (decl) != error_mark_node
+      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
+    return;
+
+  default_unique_section (decl, reloc);
+}
+
 /* Generate a constant or register which contains 2^SCALE.  We assume
    the result is valid for MODE.  Currently, MODE must be V4SFmode and
    SCALE must be SImode. */
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index 67011a62126..369e6d76e9d 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -51,7 +51,7 @@ extern GTY(()) int spu_tune;
 /* Default target_flags if no switches specified.  */
 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT (MASK_ERROR_RELOC | MASK_SAFE_DMA | MASK_BRANCH_HINTS \
-			| MASK_SAFE_HINTS)
+			| MASK_SAFE_HINTS | MASK_ADDRESS_SPACE_CONVERSION)
 #endif
 
 
@@ -469,6 +469,17 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin;	\
 #define ASM_OUTPUT_LABELREF(FILE, NAME) \
   asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME))
 
+#define ASM_OUTPUT_SYMBOL_REF(FILE, X) \
+  do							\
+    {							\
+      tree decl;					\
+      assemble_name (FILE, XSTR ((X), 0));		\
+      if ((decl = SYMBOL_REF_DECL ((X))) != 0		\
+	  && TREE_CODE (decl) == VAR_DECL		\
+	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)))	\
+	fputs ("@ppu", FILE);				\
+    } while (0)
+
 
 /* Instruction Output */
 #define REGISTER_NAMES \
@@ -590,6 +601,13 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin;	\
   } while (0)
 
 
+/* Address spaces.  */
+#define ADDR_SPACE_EA	1
+
+/* Named address space keywords.  */
+#define TARGET_ADDR_SPACE_KEYWORDS ADDR_SPACE_KEYWORD ("__ea", ADDR_SPACE_EA)
+
+
 /* Builtins.  */
 
 enum spu_builtin_type
diff --git a/gcc/config/spu/spu.opt b/gcc/config/spu/spu.opt
index 1589199b60b..4ad7128de51 100644
--- a/gcc/config/spu/spu.opt
+++ b/gcc/config/spu/spu.opt
@@ -82,3 +82,24 @@ Generate code for given CPU
 mtune=
 Target RejectNegative Joined Var(spu_tune_string)
 Schedule code for given CPU
+
+mea32
+Target Report RejectNegative Var(spu_ea_model,32) Init(32)
+Access variables in 32-bit PPU objects (default)
+
+mea64
+Target Report RejectNegative Var(spu_ea_model,64) VarExists
+Access variables in 64-bit PPU objects
+
+maddress-space-conversion
+Target Report Mask(ADDRESS_SPACE_CONVERSION)
+Allow conversions between __ea and generic pointers (default)
+
+mcache-size=
+Target Report RejectNegative Joined UInteger
+Size (in KB) of software data cache
+
+matomic-updates
+Target Report
+Atomically write back software data cache lines (default)
+
diff --git a/gcc/config/spu/spu_cache.h b/gcc/config/spu/spu_cache.h
new file mode 100644
index 00000000000..66a679be5a0
--- /dev/null
+++ b/gcc/config/spu/spu_cache.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPU_CACHE_H
+#define _SPU_CACHE_H
+
+void *__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty);
+void *__cache_fetch (__ea void *ea);
+void __cache_evict (__ea void *ea);
+void __cache_flush (void);
+void __cache_touch (__ea void *ea);
+
+#define cache_fetch_dirty(_ea, _n_bytes_dirty) \
+     __cache_fetch_dirty(_ea, _n_bytes_dirty)
+
+#define cache_fetch(_ea) __cache_fetch(_ea)
+#define cache_touch(_ea) __cache_touch(_ea)
+#define cache_evict(_ea) __cache_evict(_ea)
+#define cache_flush() __cache_flush()
+
+#endif
diff --git a/gcc/config/spu/t-spu-elf b/gcc/config/spu/t-spu-elf
index 0c9236fa89f..a54ede9fa25 100644
--- a/gcc/config/spu/t-spu-elf
+++ b/gcc/config/spu/t-spu-elf
@@ -66,14 +66,39 @@ fp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/spu/t-spu-elf
 # Don't let CTOR_LIST end up in sdata section.
 CRTSTUFF_T_CFLAGS =
 
-#MULTILIB_OPTIONS=mlarge-mem/mtest-abi
-#MULTILIB_DIRNAMES=large-mem test-abi
-#MULTILIB_MATCHES=
+# Multi-lib support.
+MULTILIB_OPTIONS=mea64
 
 # Neither gcc or newlib seem to have a standard way to generate multiple
 # crt*.o files.  So we don't use the standard crt0.o name anymore.
 
-EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o libgcc_cachemgr.a libgcc_cachemgr_nonatomic.a \
+	libgcc_cache8k.a libgcc_cache16k.a libgcc_cache32k.a libgcc_cache64k.a libgcc_cache128k.a
+
+$(T)cachemgr.o: $(srcdir)/config/spu/cachemgr.c
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -c $< -o $@
+
+# Specialised rule to add a -D flag.
+$(T)cachemgr_nonatomic.o: $(srcdir)/config/spu/cachemgr.c
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -DNONATOMIC -c $< -o $@
+
+$(T)libgcc_%.a: $(T)%.o
+	$(AR_FOR_TARGET) -rcs $@ $<
+
+$(T)cache8k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=8 -o $@ -c $<
+
+$(T)cache16k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=16 -o $@ -c $<
+
+$(T)cache32k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=32 -o $@ -c $<
+
+$(T)cache64k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=64 -o $@ -c $<
+
+$(T)cache128k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=128 -o $@ -c $<
 
 LIBGCC = stmp-multilib
 INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h
index 682f7e6f466..fa97e8becdc 100644
--- a/gcc/config/stormy16/stormy16.h
+++ b/gcc/config/stormy16/stormy16.h
@@ -522,8 +522,6 @@ enum reg_class
 
 #define HAVE_PRE_DECREMENT 1
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 #ifdef REG_OK_STRICT
diff --git a/gcc/config/vax/linux.h b/gcc/config/vax/linux.h
index 1087069adbb..dccbe9cc8ee 100644
--- a/gcc/config/vax/linux.h
+++ b/gcc/config/vax/linux.h
@@ -21,17 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_VERSION
 #define TARGET_VERSION fprintf (stderr, " (VAX GNU/Linux with ELF)");
 
-#define TARGET_OS_CPP_BUILTINS()		\
-  do						\
-    {						\
-	LINUX_TARGET_OS_CPP_BUILTINS();		\
-	if (flag_pic)				\
-	  {					\
-	    builtin_define ("__PIC__");		\
-	    builtin_define ("__pic__");		\
-	  }					\
-    }						\
-  while (0)
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
 
 /* We use GAS, G-float double and want new DI patterns.  */
 #undef TARGET_DEFAULT
author	rus <rus@138bc75d-0d04-0410-961f-82ee72b054a4>	2009-11-09 20:58:24 +0000
committer	rus <rus@138bc75d-0d04-0410-961f-82ee72b054a4>	2009-11-09 20:58:24 +0000
commit	7f4db7c80779ecbc57d1146654daf0acfe18de66 (patch)
tree	3af522a3b5e149c3fd498ecb1255994daae2129a /gcc/config
parent	611349f0ec42a37591db2cd02974a11a48d10edb (diff)
download	gcc-7f4db7c80779ecbc57d1146654daf0acfe18de66.tar.gz