2009-11-06 Basile Starynkevitch <basile@starynkevitch.net>

MELT branch merged with trunk rev 153975 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@153981 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2009-11-06 22:51:05 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2009-11-06 22:51:05 +0000
commit: e1647522f93999450cc558341bb2066ca26e070f (patch)
tree: ec9704394836b7bb5123d7d8c1d9647eace77c5d /gcc/config
parent: 035ef3e66f39f67a3fab95825e0fbc750bc8160d (diff)
download: gcc-e1647522f93999450cc558341bb2066ca26e070f.tar.gz
51 files changed, 4036 insertions, 542 deletions
diff --git a/gcc/config/alpha/osf.h b/gcc/config/alpha/osf.h
index 2b5165c0754..81c12aa14fc 100644
--- a/gcc/config/alpha/osf.h
+++ b/gcc/config/alpha/osf.h
@@ -167,10 +167,6 @@ __enable_execute_stack (void *addr)					\
 #define LD_INIT_SWITCH "-init"
 #define LD_FINI_SWITCH "-fini"
 
-/* The linker needs a space after "-o".  This allows -oldstyle_liblookup to
-   be passed to ld.  */
-#define SWITCHES_NEED_SPACES "o"
-
 /* Select a format to encode pointers in exception handling data.  CODE
    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
    true if the symbol may be affected by dynamic relocations.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 25e433cd3a4..4c7fcb65854 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -524,14 +524,11 @@ enum processor_type arm_tune = arm_none;
 /* The default processor used if not overridden by commandline.  */
 static enum processor_type arm_default_cpu = arm_none;
 
-/* Which floating point model to use.  */
-enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available.  */
-enum fputype arm_fpu_arch;
-
 /* Which floating point hardware to schedule for.  */
-enum fputype arm_fpu_tune;
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use.  */
+const struct arm_fpu_desc *arm_fpu_desc;
 
 /* Whether to use floating point hardware.  */
 enum float_abi_type arm_float_abi;
@@ -809,46 +806,21 @@ static struct arm_cpu_select arm_select[] =
 
 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
 
-struct fpu_desc
-{
-  const char * name;
-  enum fputype fpu;
-};
-
-
 /* Available values for -mfpu=.  */
 
-static const struct fpu_desc all_fpus[] =
-{
-  {"fpa",		FPUTYPE_FPA},
-  {"fpe2",		FPUTYPE_FPA_EMU2},
-  {"fpe3",		FPUTYPE_FPA_EMU2},
-  {"maverick",		FPUTYPE_MAVERICK},
-  {"vfp",		FPUTYPE_VFP},
-  {"vfp3",		FPUTYPE_VFP3},
-  {"vfpv3",		FPUTYPE_VFP3},
-  {"vfpv3-d16",		FPUTYPE_VFP3D16},
-  {"neon",		FPUTYPE_NEON},
-  {"neon-fp16",		FPUTYPE_NEON_FP16}
-};
-
-
-/* Floating point models used by the different hardware.
-   See fputype in arm.h.  */
-
-static const enum arm_fp_model fp_model_for_fpu[] =
-{
-  /* No FP hardware.  */
-  ARM_FP_MODEL_UNKNOWN,		/* FPUTYPE_NONE  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU2  */
-  ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU3  */
-  ARM_FP_MODEL_MAVERICK,	/* FPUTYPE_MAVERICK  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP3D16  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP3  */
-  ARM_FP_MODEL_VFP,		/* FPUTYPE_NEON  */
-  ARM_FP_MODEL_VFP		/* FPUTYPE_NEON_FP16  */
+static const struct arm_fpu_desc all_fpus[] =
+{
+  {"fpa",		ARM_FP_MODEL_FPA, 0, 0, false, false},
+  {"fpe2",		ARM_FP_MODEL_FPA, 2, 0, false, false},
+  {"fpe3",		ARM_FP_MODEL_FPA, 3, 0, false, false},
+  {"maverick",		ARM_FP_MODEL_MAVERICK, 0, 0, false, false},
+  {"vfp",		ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+  {"vfpv3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+  {"vfpv3-d16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+  {"neon",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+  {"neon-fp16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+  /* Compatibility aliases.  */
+  {"vfp3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
 };
 
 
@@ -1615,7 +1587,6 @@ arm_override_options (void)
   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
     error ("iwmmxt abi requires an iwmmxt capable cpu");
 
-  arm_fp_model = ARM_FP_MODEL_UNKNOWN;
   if (target_fpu_name == NULL && target_fpe_name != NULL)
     {
       if (streq (target_fpe_name, "2"))
@@ -1626,46 +1597,52 @@ arm_override_options (void)
 	error ("invalid floating point emulation option: -mfpe=%s",
 	       target_fpe_name);
     }
-  if (target_fpu_name != NULL)
-    {
-      /* The user specified a FPU.  */
-      for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
-	{
-	  if (streq (all_fpus[i].name, target_fpu_name))
-	    {
-	      arm_fpu_arch = all_fpus[i].fpu;
-	      arm_fpu_tune = arm_fpu_arch;
-	      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
-	      break;
-	    }
-	}
-      if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
-	error ("invalid floating point option: -mfpu=%s", target_fpu_name);
-    }
-  else
+
+  if (target_fpu_name == NULL)
     {
 #ifdef FPUTYPE_DEFAULT
-      /* Use the default if it is specified for this platform.  */
-      arm_fpu_arch = FPUTYPE_DEFAULT;
-      arm_fpu_tune = FPUTYPE_DEFAULT;
+      target_fpu_name = FPUTYPE_DEFAULT;
 #else
-      /* Pick one based on CPU type.  */
-      /* ??? Some targets assume FPA is the default.
-      if ((insn_flags & FL_VFP) != 0)
-	arm_fpu_arch = FPUTYPE_VFP;
-      else
-      */
       if (arm_arch_cirrus)
-	arm_fpu_arch = FPUTYPE_MAVERICK;
+	target_fpu_name = "maverick";
       else
-	arm_fpu_arch = FPUTYPE_FPA_EMU2;
+	target_fpu_name = "fpe2";
 #endif
-      if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
-	arm_fpu_tune = FPUTYPE_FPA;
+    }
+
+  arm_fpu_desc = NULL;
+  for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
+    {
+      if (streq (all_fpus[i].name, target_fpu_name))
+	{
+	  arm_fpu_desc = &all_fpus[i];
+	  break;
+	}
+    }
+  if (!arm_fpu_desc)
+    error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+
+  switch (arm_fpu_desc->model)
+    {
+    case ARM_FP_MODEL_FPA:
+      if (arm_fpu_desc->rev == 2)
+	arm_fpu_attr = FPU_FPE2;
+      else if (arm_fpu_desc->rev == 3)
+	arm_fpu_attr = FPU_FPE3;
       else
-	arm_fpu_tune = arm_fpu_arch;
-      arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
-      gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
+	arm_fpu_attr = FPU_FPA;
+      break;
+
+    case ARM_FP_MODEL_MAVERICK:
+      arm_fpu_attr = FPU_MAVERICK;
+      break;
+
+    case ARM_FP_MODEL_VFP:
+      arm_fpu_attr = FPU_VFP;
+      break;
+
+    default:
+      gcc_unreachable();
     }
 
   if (target_float_abi_name != NULL)
@@ -1687,7 +1664,7 @@ arm_override_options (void)
     arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
 
   if (TARGET_AAPCS_BASED
-      && (arm_fp_model == ARM_FP_MODEL_FPA))
+      && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
     error ("FPA is unsupported in the AAPCS");
 
   if (TARGET_AAPCS_BASED)
@@ -1715,7 +1692,7 @@ arm_override_options (void)
 
   /* If soft-float is specified then don't use FPU.  */
   if (TARGET_SOFT_FLOAT)
-    arm_fpu_arch = FPUTYPE_NONE;
+    arm_fpu_attr = FPU_NONE;
 
   if (TARGET_AAPCS_BASED)
     {
@@ -1742,8 +1719,7 @@ arm_override_options (void)
   /* For arm2/3 there is no need to do any scheduling if there is only
      a floating point emulator, or we are doing software floating-point.  */
   if ((TARGET_SOFT_FLOAT
-       || arm_fpu_tune == FPUTYPE_FPA_EMU2
-       || arm_fpu_tune == FPUTYPE_FPA_EMU3)
+       || (TARGET_FPA && arm_fpu_desc->rev))
       && (tune_flags & FL_MODE32) == 0)
     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
 
@@ -13305,7 +13281,7 @@ arm_output_epilogue (rtx sibling)
       /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
       int vfp_offset = offsets->frame;
 
-      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+      if (TARGET_FPA_EMU2)
 	{
 	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -13528,7 +13504,7 @@ arm_output_epilogue (rtx sibling)
 			 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
 	}
 
-      if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+      if (TARGET_FPA_EMU2)
 	{
 	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -14254,7 +14230,7 @@ arm_save_coproc_regs(void)
 
   /* Save any floating point call-saved registers used by this
      function.  */
-  if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+  if (TARGET_FPA_EMU2)
     {
       for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
 	if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -19736,45 +19712,8 @@ arm_file_start (void)
 	}
       else
 	{
-	  int set_float_abi_attributes = 0;
-	  switch (arm_fpu_arch)
-	    {
-	    case FPUTYPE_FPA:
-	      fpu_name = "fpa";
-	      break;
-	    case FPUTYPE_FPA_EMU2:
-	      fpu_name = "fpe2";
-	      break;
-	    case FPUTYPE_FPA_EMU3:
-	      fpu_name = "fpe3";
-	      break;
-	    case FPUTYPE_MAVERICK:
-	      fpu_name = "maverick";
-	      break;
-	    case FPUTYPE_VFP:
-	      fpu_name = "vfp";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_VFP3D16:
-	      fpu_name = "vfpv3-d16";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_VFP3:
-	      fpu_name = "vfpv3";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_NEON:
-	      fpu_name = "neon";
-	      set_float_abi_attributes = 1;
-	      break;
-	    case FPUTYPE_NEON_FP16:
-	      fpu_name = "neon-fp16";
-	      set_float_abi_attributes = 1;
-	      break;
-	    default:
-	      abort();
-	    }
-	  if (set_float_abi_attributes)
+	  fpu_name = arm_fpu_desc->name;
+	  if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
 	    {
 	      if (TARGET_HARD_FLOAT)
 		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 9272ca51cba..2dfd22df45c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -190,9 +190,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_HARD_FLOAT		(arm_float_abi != ARM_FLOAT_ABI_SOFT)
 /* Use hardware floating point calling convention.  */
 #define TARGET_HARD_FLOAT_ABI		(arm_float_abi == ARM_FLOAT_ABI_HARD)
-#define TARGET_FPA			(arm_fp_model == ARM_FP_MODEL_FPA)
-#define TARGET_MAVERICK			(arm_fp_model == ARM_FP_MODEL_MAVERICK)
-#define TARGET_VFP			(arm_fp_model == ARM_FP_MODEL_VFP)
+#define TARGET_FPA		(arm_fpu_desc->model == ARM_FP_MODEL_FPA)
+#define TARGET_MAVERICK		(arm_fpu_desc->model == ARM_FP_MODEL_MAVERICK)
+#define TARGET_VFP		(arm_fpu_desc->model == ARM_FP_MODEL_VFP)
 #define TARGET_IWMMXT			(arm_arch_iwmmxt)
 #define TARGET_REALLY_IWMMXT		(TARGET_IWMMXT && TARGET_32BIT)
 #define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
@@ -216,6 +216,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_THUMB2			(TARGET_THUMB && arm_arch_thumb2)
 /* Thumb-1 only.  */
 #define TARGET_THUMB1_ONLY		(TARGET_THUMB1 && !arm_arch_notm)
+/* FPA emulator without LFM.  */
+#define TARGET_FPA_EMU2			(TARGET_FPA && arm_fpu_desc->rev == 2)
 
 /* The following two macros concern the ability to execute coprocessor
    instructions for VFPv3 or NEON.  TARGET_VFP3/TARGET_VFPD32 are currently
@@ -223,27 +225,21 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
    to be more careful with TARGET_NEON as noted below.  */
 
 /* FPU is has the full VFPv3/NEON register file of 32 D registers.  */
-#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \
-		       && (arm_fpu_arch == FPUTYPE_VFP3 \
-			   || arm_fpu_arch == FPUTYPE_NEON \
-			   || arm_fpu_arch == FPUTYPE_NEON_FP16))
+#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32)
 
 /* FPU supports VFPv3 instructions.  */
-#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \
-		     && (arm_fpu_arch == FPUTYPE_VFP3D16 \
-			 || TARGET_VFPD32))
+#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
 
 /* FPU supports NEON/VFP half-precision floating-point.  */
-#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16)
+#define TARGET_NEON_FP16 \
+  (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16)
 
 /* FPU supports Neon instructions.  The setting of this macro gets
    revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT
    and TARGET_HARD_FLOAT to ensure that NEON instructions are
    available.  */
 #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \
-		     && arm_fp_model == ARM_FP_MODEL_VFP \
-		     && (arm_fpu_arch == FPUTYPE_NEON \
-			 || arm_fpu_arch == FPUTYPE_NEON_FP16))
+		     && TARGET_VFP && arm_fpu_desc->neon)
 
 /* "DSP" multiply instructions, eg. SMULxy.  */
 #define TARGET_DSP_MULTIPLY \
@@ -300,42 +296,25 @@ enum arm_fp_model
   ARM_FP_MODEL_VFP
 };
 
-extern enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available.  Also update
-   fp_model_for_fpu in arm.c when adding entries to this list.  */
-enum fputype
+enum vfp_reg_type
 {
-  /* No FP hardware.  */
-  FPUTYPE_NONE,
-  /* Full FPA support.  */
-  FPUTYPE_FPA,
-  /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM).  */
-  FPUTYPE_FPA_EMU2,
-  /* Emulated FPA hardware, Issue 3 emulator.  */
-  FPUTYPE_FPA_EMU3,
-  /* Cirrus Maverick floating point co-processor.  */
-  FPUTYPE_MAVERICK,
-  /* VFP.  */
-  FPUTYPE_VFP,
-  /* VFPv3-D16.  */
-  FPUTYPE_VFP3D16,
-  /* VFPv3.  */
-  FPUTYPE_VFP3,
-  /* Neon.  */
-  FPUTYPE_NEON,
-  /* Neon with half-precision float extensions.  */
-  FPUTYPE_NEON_FP16
+  VFP_REG_D16,
+  VFP_REG_D32,
+  VFP_REG_SINGLE
 };
 
-/* Recast the floating point class to be the floating point attribute.  */
-#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune)
-
-/* What type of floating point to tune for */
-extern enum fputype arm_fpu_tune;
-
-/* What type of floating point instructions are available */
-extern enum fputype arm_fpu_arch;
+extern const struct arm_fpu_desc
+{
+  const char *name;
+  enum arm_fp_model model;
+  int rev;
+  enum vfp_reg_type regs;
+  int neon;
+  int fp16;
+} *arm_fpu_desc;
+
+/* Which floating point hardware to schedule for.  */
+extern int arm_fpu_attr;
 
 enum float_abi_type
 {
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b8bf700242b..52edcbaa17b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -160,7 +160,7 @@
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16"
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
   (const (symbol_ref "arm_fpu_attr")))
 
 ; LENGTH of an instruction (in bytes)
@@ -6770,6 +6770,7 @@
 		(const_int 6)
 		(const_int 8))))]
 )
+
 (define_insn "*movsi_cbranchsi4"
   [(set (pc)
 	(if_then_else
@@ -6833,6 +6834,45 @@
 	   (const_int 10)))))]
 )
 
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(match_operand:SI 1 "low_register_operand" ""))
+   (set (pc)
+	(if_then_else (match_operator 2 "arm_comparison_operator"
+		       [(match_dup 1) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_THUMB1"
+  [(parallel
+    [(set (pc)
+	(if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))
+     (set (match_dup 0) (match_dup 1))])]
+  ""
+)
+
+;; Sigh!  This variant shouldn't be needed, but combine often fails to
+;; merge cases like this because the op1 is a hard register in
+;; CLASS_LIKELY_SPILLED_P.
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(match_operand:SI 1 "low_register_operand" ""))
+   (set (pc)
+	(if_then_else (match_operator 2 "arm_comparison_operator"
+		       [(match_dup 0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_THUMB1"
+  [(parallel
+    [(set (pc)
+	(if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))
+     (set (match_dup 0) (match_dup 1))])]
+  ""
+)
+
 (define_insn "*negated_cbranchsi4"
   [(set (pc)
 	(if_then_else
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index faaaf7bca39..ccfc7426077 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -61,7 +61,7 @@ typedef __builtin_neon_uhi uint16x8_t	__attribute__ ((__vector_size__ (16)));
 typedef __builtin_neon_usi uint32x4_t	__attribute__ ((__vector_size__ (16)));
 typedef __builtin_neon_udi uint64x2_t	__attribute__ ((__vector_size__ (16)));
 
-typedef __builtin_neon_sf float32_t;
+typedef float float32_t;
 typedef __builtin_neon_poly8 poly8_t;
 typedef __builtin_neon_poly16 poly16_t;
 
@@ -5085,7 +5085,7 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
 {
-  return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c);
+  return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5151,7 +5151,7 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
 {
-  return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c);
+  return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5283,7 +5283,7 @@ vdup_n_s32 (int32_t __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vdup_n_f32 (float32_t __a)
 {
-  return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5349,7 +5349,7 @@ vdupq_n_s32 (int32_t __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vdupq_n_f32 (float32_t __a)
 {
-  return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5415,7 +5415,7 @@ vmov_n_s32 (int32_t __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmov_n_f32 (float32_t __a)
 {
-  return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5481,7 +5481,7 @@ vmovq_n_s32 (int32_t __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmovq_n_f32 (float32_t __a)
 {
-  return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -6591,7 +6591,7 @@ vmul_n_s32 (int32x2_t __a, int32_t __b)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmul_n_f32 (float32x2_t __a, float32_t __b)
 {
-  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3);
+  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6621,7 +6621,7 @@ vmulq_n_s32 (int32x4_t __a, int32_t __b)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmulq_n_f32 (float32x4_t __a, float32_t __b)
 {
-  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3);
+  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6735,7 +6735,7 @@ vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3);
+  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6765,7 +6765,7 @@ vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3);
+  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6831,7 +6831,7 @@ vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
 {
-  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3);
+  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6861,7 +6861,7 @@ vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
 {
-  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3);
+  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -7851,7 +7851,7 @@ vld1_s64 (const int64_t * __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_f32 (const float32_t * __a)
 {
-  return (float32x2_t)__builtin_neon_vld1v2sf (__a);
+  return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -7917,7 +7917,7 @@ vld1q_s64 (const int64_t * __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_f32 (const float32_t * __a)
 {
-  return (float32x4_t)__builtin_neon_vld1v4sf (__a);
+  return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -7977,7 +7977,7 @@ vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
 {
-  return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c);
+  return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8043,7 +8043,7 @@ vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
 {
-  return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c);
+  return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8109,7 +8109,7 @@ vld1_dup_s32 (const int32_t * __a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vld1_dup_f32 (const float32_t * __a)
 {
-  return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a);
+  return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8175,7 +8175,7 @@ vld1q_dup_s32 (const int32_t * __a)
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vld1q_dup_f32 (const float32_t * __a)
 {
-  return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a);
+  return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8247,7 +8247,7 @@ vst1_s64 (int64_t * __a, int64x1_t __b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_f32 (float32_t * __a, float32x2_t __b)
 {
-  __builtin_neon_vst1v2sf (__a, __b);
+  __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8313,7 +8313,7 @@ vst1q_s64 (int64_t * __a, int64x2_t __b)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_f32 (float32_t * __a, float32x4_t __b)
 {
-  __builtin_neon_vst1v4sf (__a, __b);
+  __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8373,7 +8373,7 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
 {
-  __builtin_neon_vst1_lanev2sf (__a, __b, __c);
+  __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8439,7 +8439,7 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
 __extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
 {
-  __builtin_neon_vst1_lanev4sf (__a, __b, __c);
+  __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8512,7 +8512,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vld2_f32 (const float32_t * __a)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2v2sf (__a);
+  __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -8600,7 +8600,7 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vld2q_f32 (const float32_t * __a)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld2v4sf (__a);
+  __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -8676,7 +8676,7 @@ vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -8748,7 +8748,7 @@ vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -8807,7 +8807,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vld2_dup_f32 (const float32_t * __a)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
-  __rv.__o = __builtin_neon_vld2_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -8892,7 +8892,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2_f32 (float32_t * __a, float32x2x2_t __b)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
-  __builtin_neon_vst2v2sf (__a, __bu.__o);
+  __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8969,7 +8969,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2q_f32 (float32_t * __a, float32x4x2_t __b)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst2v4sf (__a, __bu.__o);
+  __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9032,7 +9032,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
-  __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9088,7 +9088,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9140,7 +9140,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
 vld3_f32 (const float32_t * __a)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3v2sf (__a);
+  __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9228,7 +9228,7 @@ __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
 vld3q_f32 (const float32_t * __a)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
-  __rv.__o = __builtin_neon_vld3v4sf (__a);
+  __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9304,7 +9304,7 @@ vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -9376,7 +9376,7 @@ vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -9435,7 +9435,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
 vld3_dup_f32 (const float32_t * __a)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
-  __rv.__o = __builtin_neon_vld3_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9520,7 +9520,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3_f32 (float32_t * __a, float32x2x3_t __b)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
-  __builtin_neon_vst3v2sf (__a, __bu.__o);
+  __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9597,7 +9597,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3q_f32 (float32_t * __a, float32x4x3_t __b)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
-  __builtin_neon_vst3v4sf (__a, __bu.__o);
+  __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9660,7 +9660,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
-  __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9716,7 +9716,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
-  __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9768,7 +9768,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
 vld4_f32 (const float32_t * __a)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4v2sf (__a);
+  __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9856,7 +9856,7 @@ __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
 vld4q_f32 (const float32_t * __a)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4v4sf (__a);
+  __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -9932,7 +9932,7 @@ vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -10004,7 +10004,7 @@ vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c);
+  __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
   return __rv.__i;
 }
 
@@ -10063,7 +10063,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
 vld4_dup_f32 (const float32_t * __a)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_neon_vld4_dupv2sf (__a);
+  __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a);
   return __rv.__i;
 }
 
@@ -10148,7 +10148,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4_f32 (float32_t * __a, float32x2x4_t __b)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst4v2sf (__a, __bu.__o);
+  __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10225,7 +10225,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4q_f32 (float32_t * __a, float32x4x4_t __b)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
-  __builtin_neon_vst4v4sf (__a, __bu.__o);
+  __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10288,7 +10288,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
-  __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c);
+  __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10344,7 +10344,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
 vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
-  __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c);
+  __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
 }
 
 __extension__ static __inline void __attribute__ ((__always_inline__))
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index 8d3afbf55fd..ba206022b75 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -30,7 +30,7 @@
 
 /* Section 4.1 of the AAPCS requires the use of VFP format.  */
 #undef  FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
 
 /* TARGET_BIG_ENDIAN_DEFAULT is set in
    config.gcc for big endian configurations.  */
diff --git a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md
index fcd92b002d7..515de43d28b 100644
--- a/gcc/config/arm/fpa.md
+++ b/gcc/config/arm/fpa.md
@@ -599,10 +599,10 @@
     {
     default:
     case 0: return \"mvf%?e\\t%0, %1\";
-    case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+    case 1: if (TARGET_FPA_EMU2)
 	      return \"ldf%?e\\t%0, %1\";
 	    return \"lfm%?\\t%0, 1, %1\";
-    case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+    case 2: if (TARGET_FPA_EMU2)
 	      return \"stf%?e\\t%1, %0\";
 	    return \"sfm%?\\t%1, 1, %0\";
     }
diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h
index 07455ee87fd..9fdca414e8e 100644
--- a/gcc/config/arm/linux-elf.h
+++ b/gcc/config/arm/linux-elf.h
@@ -98,7 +98,7 @@
 
 /* NWFPE always understands FPA instructions.  */
 #undef  FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3
+#define FPUTYPE_DEFAULT "fpe3"
 
 /* Call the function profiler with a given profile label.  */
 #undef  ARM_FUNCTION_PROFILER
diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml
index 9c8e2a89b86..112c8be6e3b 100644
--- a/gcc/config/arm/neon-gen.ml
+++ b/gcc/config/arm/neon-gen.ml
@@ -122,6 +122,7 @@ let rec signed_ctype = function
   | T_uint16 | T_int16 -> T_intHI
   | T_uint32 | T_int32 -> T_intSI
   | T_uint64 | T_int64 -> T_intDI
+  | T_float32 -> T_floatSF
   | T_poly8 -> T_intQI
   | T_poly16 -> T_intHI
   | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
@@ -320,7 +321,7 @@ let deftypes () =
     typeinfo;
   Format.print_newline ();
   (* Extra types not in <stdint.h>.  *)
-  Format.printf "typedef __builtin_neon_sf float32_t;\n";
+  Format.printf "typedef float float32_t;\n";
   Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
   Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"
 
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 10393b33ebc..114097d22a7 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -50,7 +50,7 @@ type vectype = T_int8x8    | T_int8x16
              | T_ptrto of vectype | T_const of vectype
              | T_void      | T_intQI
              | T_intHI     | T_intSI
-             | T_intDI
+             | T_intDI     | T_floatSF
 
 (* The meanings of the following are:
      TImode : "Tetra", two registers (four words).
@@ -1693,6 +1693,7 @@ let string_of_vectype vt =
   | T_intHI -> "__builtin_neon_hi"
   | T_intSI -> "__builtin_neon_si"
   | T_intDI -> "__builtin_neon_di"
+  | T_floatSF -> "__builtin_neon_sf"
   | T_arrayof (num, base) ->
       let basename = name (fun x -> x) base in
       affix (Printf.sprintf "%sx%d" basename num)
diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h
index 4c06fa1cb3b..9cf186b338d 100644
--- a/gcc/config/arm/netbsd-elf.h
+++ b/gcc/config/arm/netbsd-elf.h
@@ -153,5 +153,5 @@ do									\
 while (0)
 
 #undef FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
 
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 8879fedb7d7..aa7e197bc5d 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -97,7 +97,7 @@ along with GCC; see the file COPYING3.  If not see
 /* There is no default multilib.  */
 #undef MULTILIB_DEFAULTS
 
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
 
 #undef FUNCTION_PROFILER
 #define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 0927e3928c1..782ad11627b 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -406,8 +406,6 @@ extern int avr_reg_order[];
 #define HAVE_POST_INCREMENT 1
 #define HAVE_PRE_DECREMENT 1
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 #define REG_OK_FOR_BASE_NOSTRICT_P(X) \
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 03a279036f3..365680ee9fa 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -911,9 +911,6 @@ typedef struct {
 
 /* Addressing Modes */
 
-/* Recognize any constant value that is a valid address.  */
-#define CONSTANT_ADDRESS_P(X)	(CONSTANT_P (X))
-
 /* Nonzero if the constant value X is a legitimate general operand.
    symbol_ref are not legitimate and will be put into constant pool.
    See force_const_mem().
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 0fea7d77b39..3c426b74ae5 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -950,8 +950,6 @@ struct cum_args {int regs;};
 
 #define HAVE_POST_INCREMENT 1
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 /* Must be a compile-time constant, so we go with the highest value
    among all CRIS variants.  */
 #define MAX_REGS_PER_ADDRESS 2
diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h
index 20e157173d8..5e6237895b5 100644
--- a/gcc/config/fr30/fr30.h
+++ b/gcc/config/fr30/fr30.h
@@ -741,16 +741,6 @@ enum reg_class
 /*}}}*/ 
 /*{{{  Addressing Modes.  */ 
 
-/* A C expression that is 1 if the RTX X is a constant which is a valid
-   address.  On most machines, this can be defined as `CONSTANT_P (X)', but a
-   few machines are more restrictive in which constant addresses are supported.
-
-   `CONSTANT_P' accepts integer-values expressions whose values are not
-   explicitly known, such as `symbol_ref', `label_ref', and `high' expressions
-   and `const' arithmetic expressions, in addition to `const_int' and
-   `const_double' expressions.  */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 /* A number, the maximum number of registers that can appear in a valid memory
    address.  Note that it is up to you to specify a value equal to the maximum
    number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept.  */
diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h
index d48aa1ef17d..d5a7a4a6670 100644
--- a/gcc/config/frv/frv.h
+++ b/gcc/config/frv/frv.h
@@ -1927,16 +1927,6 @@ __asm__("\n"								\
 
 /* Addressing Modes.  */
 
-/* A C expression that is 1 if the RTX X is a constant which is a valid
-   address.  On most machines, this can be defined as `CONSTANT_P (X)', but a
-   few machines are more restrictive in which constant addresses are supported.
-
-   `CONSTANT_P' accepts integer-values expressions whose values are not
-   explicitly known, such as `symbol_ref', `label_ref', and `high' expressions
-   and `const' arithmetic expressions, in addition to `const_int' and
-   `const_double' expressions.  */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 /* A number, the maximum number of registers that can appear in a valid memory
    address.  Note that it is up to you to specify a value equal to the maximum
    number that `TARGET_LEGITIMATE_ADDRESS_P' would ever accept.  */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 49acfa780e4..21f0e3184ef 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -46,9 +46,11 @@
 
 /* Extended Features */
 /* %ecx */
+#define bit_FMA4        (1 << 16) 
 #define bit_LAHF_LM	(1 << 0)
+#define bit_LWP 	(1 << 15)
 #define bit_SSE4a	(1 << 6)
-#define bit_FMA4	(1 << 16)
+#define bit_XOP         (1 << 11)
 
 /* %edx */
 #define bit_LM		(1 << 29)
diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h
index 42782ade0ed..2bd411a0f05 100644
--- a/gcc/config/i386/fma4intrin.h
+++ b/gcc/config/i386/fma4intrin.h
@@ -35,15 +35,6 @@
 /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
 #include <ammintrin.h>
 
-/* Internal data types for implementing the intrinsics.  */
-typedef float __v8sf __attribute__ ((__vector_size__ (32)));
-typedef double __v4df __attribute__ ((__vector_size__ (32)));
-
-typedef float __m256 __attribute__ ((__vector_size__ (32),
-				     __may_alias__));
-typedef double __m256d __attribute__ ((__vector_size__ (32),
-				       __may_alias__));
-
 /* 128b Floating point multiply/add type instructions.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 12a3f1759a8..5a5311fba0f 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -232,6 +232,10 @@ ix86_target_macros_internal (int isa_flag,
     def_or_undef (parse_in, "__SSE4A__");
   if (isa_flag & OPTION_MASK_ISA_FMA4)
     def_or_undef (parse_in, "__FMA4__");
+  if (isa_flag & OPTION_MASK_ISA_XOP)
+    def_or_undef (parse_in, "__XOP__");
+  if (isa_flag & OPTION_MASK_ISA_LWP)
+    def_or_undef (parse_in, "__LWP__");
   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
     def_or_undef (parse_in, "__SSE_MATH__");
   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c29a7848ae1..2031dfb6e98 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1964,6 +1964,10 @@ static int ix86_isa_flags_explicit;
 #define OPTION_MASK_ISA_FMA4_SET \
   (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
    | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_XOP_SET \
+  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
+#define OPTION_MASK_ISA_LWP_SET \
+  OPTION_MASK_ISA_LWP
 
 /* AES and PCLMUL need SSE2 because they use xmm registers */
 #define OPTION_MASK_ISA_AES_SET \
@@ -2015,7 +2019,10 @@ static int ix86_isa_flags_explicit;
 #define OPTION_MASK_ISA_SSE4A_UNSET \
   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
 
-#define OPTION_MASK_ISA_FMA4_UNSET OPTION_MASK_ISA_FMA4
+#define OPTION_MASK_ISA_FMA4_UNSET \
+  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
+#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
+#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
 
 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
@@ -2263,6 +2270,32 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
 	}
       return true;
 
+   case OPT_mxop:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
+	}
+      return true;
+
+   case OPT_mlwp:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
+	}
+      return true;
+
     case OPT_mabm:
       if (value)
 	{
@@ -2391,6 +2424,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
   {
     { "-m64",		OPTION_MASK_ISA_64BIT },
     { "-mfma4",		OPTION_MASK_ISA_FMA4 },
+    { "-mxop",		OPTION_MASK_ISA_XOP },
+    { "-mlwp",		OPTION_MASK_ISA_LWP },
     { "-msse4a",	OPTION_MASK_ISA_SSE4A },
     { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
     { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
@@ -2621,7 +2656,9 @@ override_options (bool main_args_p)
       PTA_AVX = 1 << 18,
       PTA_FMA = 1 << 19,
       PTA_MOVBE = 1 << 20,
-      PTA_FMA4 = 1 << 21
+      PTA_FMA4 = 1 << 21,
+      PTA_XOP = 1 << 22,
+      PTA_LWP = 1 << 23
     };
 
   static struct pta
@@ -2967,6 +3004,12 @@ override_options (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_FMA4
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
 	  ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
+	if (processor_alias_table[i].flags & PTA_XOP
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
+	  ix86_isa_flags |= OPTION_MASK_ISA_XOP;
+	if (processor_alias_table[i].flags & PTA_LWP
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
+	  ix86_isa_flags |= OPTION_MASK_ISA_LWP;
 	if (processor_alias_table[i].flags & PTA_ABM
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
 	  ix86_isa_flags |= OPTION_MASK_ISA_ABM;
@@ -3649,6 +3692,8 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
     IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
     IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
     IX86_ATTR_ISA ("fma4",	OPT_mfma4),
+    IX86_ATTR_ISA ("xop",	OPT_mxop),
+    IX86_ATTR_ISA ("lwp",	OPT_mlwp),
 
     /* string options */
     IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
@@ -11290,6 +11335,7 @@ get_some_local_dynamic_name (void)
    X -- don't print any sort of PIC '@' suffix for a symbol.
    & -- print some in-use local-dynamic symbol name.
    H -- print a memory address offset by 8; used for sse high-parts
+   Y -- print condition for XOP pcom* instruction.
    + -- print a branch hint as 'cs' or 'ds' prefix
    ; -- print a semicolon (after prefixes due to bug in older gas).
  */
@@ -11707,6 +11753,61 @@ print_operand (FILE *file, rtx x, int code)
 	    return;
 	  }
 
+	case 'Y':
+	  switch (GET_CODE (x))
+	    {
+	    case NE:
+	      fputs ("neq", file);
+	      break;
+	    case EQ:
+	      fputs ("eq", file);
+	      break;
+	    case GE:
+	    case GEU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
+	      break;
+	    case GT:
+	    case GTU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
+	      break;
+	    case LE:
+	    case LEU:
+	      fputs ("le", file);
+	      break;
+	    case LT:
+	    case LTU:
+	      fputs ("lt", file);
+	      break;
+	    case UNORDERED:
+	      fputs ("unord", file);
+	      break;
+	    case ORDERED:
+	      fputs ("ord", file);
+	      break;
+	    case UNEQ:
+	      fputs ("ueq", file);
+	      break;
+	    case UNGE:
+	      fputs ("nlt", file);
+	      break;
+	    case UNGT:
+	      fputs ("nle", file);
+	      break;
+	    case UNLE:
+	      fputs ("ule", file);
+	      break;
+	    case UNLT:
+	      fputs ("ult", file);
+	      break;
+	    case LTGT:
+	      fputs ("une", file);
+	      break;
+	    default:
+	      output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+	      return;
+	    }
+	  return;
+
 	case ';':
 #if TARGET_MACHO
 	  fputs (" ; ", file);
@@ -15916,6 +16017,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       x = gen_rtx_AND (mode, x, op_false);
       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
     }
+  else if (TARGET_XOP)
+    {
+      rtx pcmov = gen_rtx_SET (mode, dest,
+			       gen_rtx_IF_THEN_ELSE (mode, cmp,
+						     op_true,
+						     op_false));
+      emit_insn (pcmov);
+    }
   else
     {
       op_true = force_reg (mode, op_true);
@@ -16038,6 +16147,9 @@ ix86_expand_int_vcond (rtx operands[])
   cop0 = operands[4];
   cop1 = operands[5];
 
+  /* XOP supports all of the comparisons on all vector int types.  */
+  if (!TARGET_XOP)
+    {
   /* Canonicalize the comparison to EQ, GT, GTU.  */
   switch (code)
     {
@@ -16148,6 +16260,7 @@ ix86_expand_int_vcond (rtx operands[])
       cop0 = x;
       cop1 = CONST0_RTX (mode);
     }
+    }
 
   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
 			   operands[1+negate], operands[2-negate]);
@@ -20806,7 +20919,7 @@ enum ix86_builtins
 
   IX86_BUILTIN_CVTUDQ2PS,
 
-  /* FMA4 instructions.  */
+  /* FMA4 and XOP instructions.  */
   IX86_BUILTIN_VFMADDSS,
   IX86_BUILTIN_VFMADDSD,
   IX86_BUILTIN_VFMADDPS,
@@ -20839,6 +20952,164 @@ enum ix86_builtins
   IX86_BUILTIN_VFNMADDPD256,
   IX86_BUILTIN_VFNMSUBPS256,
   IX86_BUILTIN_VFNMSUBPD256,
+
+  IX86_BUILTIN_VPCMOV,
+  IX86_BUILTIN_VPCMOV_V2DI,
+  IX86_BUILTIN_VPCMOV_V4SI,
+  IX86_BUILTIN_VPCMOV_V8HI,
+  IX86_BUILTIN_VPCMOV_V16QI,
+  IX86_BUILTIN_VPCMOV_V4SF,
+  IX86_BUILTIN_VPCMOV_V2DF,
+  IX86_BUILTIN_VPCMOV256,
+  IX86_BUILTIN_VPCMOV_V4DI256,
+  IX86_BUILTIN_VPCMOV_V8SI256,
+  IX86_BUILTIN_VPCMOV_V16HI256,
+  IX86_BUILTIN_VPCMOV_V32QI256,
+  IX86_BUILTIN_VPCMOV_V8SF256,
+  IX86_BUILTIN_VPCMOV_V4DF256,
+
+  IX86_BUILTIN_VPPERM,
+
+  IX86_BUILTIN_VPMACSSWW,
+  IX86_BUILTIN_VPMACSWW,
+  IX86_BUILTIN_VPMACSSWD,
+  IX86_BUILTIN_VPMACSWD,
+  IX86_BUILTIN_VPMACSSDD,
+  IX86_BUILTIN_VPMACSDD,
+  IX86_BUILTIN_VPMACSSDQL,
+  IX86_BUILTIN_VPMACSSDQH,
+  IX86_BUILTIN_VPMACSDQL,
+  IX86_BUILTIN_VPMACSDQH,
+  IX86_BUILTIN_VPMADCSSWD,
+  IX86_BUILTIN_VPMADCSWD,
+
+  IX86_BUILTIN_VPHADDBW,
+  IX86_BUILTIN_VPHADDBD,
+  IX86_BUILTIN_VPHADDBQ,
+  IX86_BUILTIN_VPHADDWD,
+  IX86_BUILTIN_VPHADDWQ,
+  IX86_BUILTIN_VPHADDDQ,
+  IX86_BUILTIN_VPHADDUBW,
+  IX86_BUILTIN_VPHADDUBD,
+  IX86_BUILTIN_VPHADDUBQ,
+  IX86_BUILTIN_VPHADDUWD,
+  IX86_BUILTIN_VPHADDUWQ,
+  IX86_BUILTIN_VPHADDUDQ,
+  IX86_BUILTIN_VPHSUBBW,
+  IX86_BUILTIN_VPHSUBWD,
+  IX86_BUILTIN_VPHSUBDQ,
+
+  IX86_BUILTIN_VPROTB,
+  IX86_BUILTIN_VPROTW,
+  IX86_BUILTIN_VPROTD,
+  IX86_BUILTIN_VPROTQ,
+  IX86_BUILTIN_VPROTB_IMM,
+  IX86_BUILTIN_VPROTW_IMM,
+  IX86_BUILTIN_VPROTD_IMM,
+  IX86_BUILTIN_VPROTQ_IMM,
+
+  IX86_BUILTIN_VPSHLB,
+  IX86_BUILTIN_VPSHLW,
+  IX86_BUILTIN_VPSHLD,
+  IX86_BUILTIN_VPSHLQ,
+  IX86_BUILTIN_VPSHAB,
+  IX86_BUILTIN_VPSHAW,
+  IX86_BUILTIN_VPSHAD,
+  IX86_BUILTIN_VPSHAQ,
+
+  IX86_BUILTIN_VFRCZSS,
+  IX86_BUILTIN_VFRCZSD,
+  IX86_BUILTIN_VFRCZPS,
+  IX86_BUILTIN_VFRCZPD,
+  IX86_BUILTIN_VFRCZPS256,
+  IX86_BUILTIN_VFRCZPD256,
+
+  IX86_BUILTIN_VPCOMEQUB,
+  IX86_BUILTIN_VPCOMNEUB,
+  IX86_BUILTIN_VPCOMLTUB,
+  IX86_BUILTIN_VPCOMLEUB,
+  IX86_BUILTIN_VPCOMGTUB,
+  IX86_BUILTIN_VPCOMGEUB,
+  IX86_BUILTIN_VPCOMFALSEUB,
+  IX86_BUILTIN_VPCOMTRUEUB,
+
+  IX86_BUILTIN_VPCOMEQUW,
+  IX86_BUILTIN_VPCOMNEUW,
+  IX86_BUILTIN_VPCOMLTUW,
+  IX86_BUILTIN_VPCOMLEUW,
+  IX86_BUILTIN_VPCOMGTUW,
+  IX86_BUILTIN_VPCOMGEUW,
+  IX86_BUILTIN_VPCOMFALSEUW,
+  IX86_BUILTIN_VPCOMTRUEUW,
+
+  IX86_BUILTIN_VPCOMEQUD,
+  IX86_BUILTIN_VPCOMNEUD,
+  IX86_BUILTIN_VPCOMLTUD,
+  IX86_BUILTIN_VPCOMLEUD,
+  IX86_BUILTIN_VPCOMGTUD,
+  IX86_BUILTIN_VPCOMGEUD,
+  IX86_BUILTIN_VPCOMFALSEUD,
+  IX86_BUILTIN_VPCOMTRUEUD,
+
+  IX86_BUILTIN_VPCOMEQUQ,
+  IX86_BUILTIN_VPCOMNEUQ,
+  IX86_BUILTIN_VPCOMLTUQ,
+  IX86_BUILTIN_VPCOMLEUQ,
+  IX86_BUILTIN_VPCOMGTUQ,
+  IX86_BUILTIN_VPCOMGEUQ,
+  IX86_BUILTIN_VPCOMFALSEUQ,
+  IX86_BUILTIN_VPCOMTRUEUQ,
+
+  IX86_BUILTIN_VPCOMEQB,
+  IX86_BUILTIN_VPCOMNEB,
+  IX86_BUILTIN_VPCOMLTB,
+  IX86_BUILTIN_VPCOMLEB,
+  IX86_BUILTIN_VPCOMGTB,
+  IX86_BUILTIN_VPCOMGEB,
+  IX86_BUILTIN_VPCOMFALSEB,
+  IX86_BUILTIN_VPCOMTRUEB,
+
+  IX86_BUILTIN_VPCOMEQW,
+  IX86_BUILTIN_VPCOMNEW,
+  IX86_BUILTIN_VPCOMLTW,
+  IX86_BUILTIN_VPCOMLEW,
+  IX86_BUILTIN_VPCOMGTW,
+  IX86_BUILTIN_VPCOMGEW,
+  IX86_BUILTIN_VPCOMFALSEW,
+  IX86_BUILTIN_VPCOMTRUEW,
+
+  IX86_BUILTIN_VPCOMEQD,
+  IX86_BUILTIN_VPCOMNED,
+  IX86_BUILTIN_VPCOMLTD,
+  IX86_BUILTIN_VPCOMLED,
+  IX86_BUILTIN_VPCOMGTD,
+  IX86_BUILTIN_VPCOMGED,
+  IX86_BUILTIN_VPCOMFALSED,
+  IX86_BUILTIN_VPCOMTRUED,
+
+  IX86_BUILTIN_VPCOMEQQ,
+  IX86_BUILTIN_VPCOMNEQ,
+  IX86_BUILTIN_VPCOMLTQ,
+  IX86_BUILTIN_VPCOMLEQ,
+  IX86_BUILTIN_VPCOMGTQ,
+  IX86_BUILTIN_VPCOMGEQ,
+  IX86_BUILTIN_VPCOMFALSEQ,
+  IX86_BUILTIN_VPCOMTRUEQ,
+
+  /* LWP instructions.  */
+  IX86_BUILTIN_LLWPCB16,
+  IX86_BUILTIN_LLWPCB32,
+  IX86_BUILTIN_LLWPCB64,
+  IX86_BUILTIN_SLWPCB16,
+  IX86_BUILTIN_SLWPCB32,
+  IX86_BUILTIN_SLWPCB64,
+  IX86_BUILTIN_LWPVAL16,
+  IX86_BUILTIN_LWPVAL32,
+  IX86_BUILTIN_LWPVAL64,
+  IX86_BUILTIN_LWPINS16,
+  IX86_BUILTIN_LWPINS32,
+  IX86_BUILTIN_LWPINS64,
+
   IX86_BUILTIN_MAX
 };
 
@@ -21052,7 +21323,13 @@ enum ix86_special_builtin_type
   VOID_FTYPE_PV8SF_V8SF_V8SF,
   VOID_FTYPE_PV4DF_V4DF_V4DF,
   VOID_FTYPE_PV4SF_V4SF_V4SF,
-  VOID_FTYPE_PV2DF_V2DF_V2DF
+  VOID_FTYPE_PV2DF_V2DF_V2DF,
+  VOID_FTYPE_USHORT_UINT_USHORT,
+  VOID_FTYPE_UINT_UINT_UINT,
+  VOID_FTYPE_UINT64_UINT_UINT,
+  UCHAR_FTYPE_USHORT_UINT_USHORT,
+  UCHAR_FTYPE_UINT_UINT_UINT,
+  UCHAR_FTYPE_UINT64_UINT_UINT
 };
 
 /* Builtin types */
@@ -21299,6 +21576,22 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbhi1,   "__builtin_ia32_llwpcb16",   IX86_BUILTIN_LLWPCB16,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbsi1,   "__builtin_ia32_llwpcb32",   IX86_BUILTIN_LLWPCB32,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbdi1,   "__builtin_ia32_llwpcb64",   IX86_BUILTIN_LLWPCB64,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbhi1,   "__builtin_ia32_slwpcb16",   IX86_BUILTIN_SLWPCB16,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbsi1,   "__builtin_ia32_slwpcb32",   IX86_BUILTIN_SLWPCB32,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbdi1,   "__builtin_ia32_slwpcb64",   IX86_BUILTIN_SLWPCB64,    UNKNOWN,     (int) VOID_FTYPE_VOID },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalhi3,   "__builtin_ia32_lwpval16", IX86_BUILTIN_LWPVAL16,  UNKNOWN,     (int) VOID_FTYPE_USHORT_UINT_USHORT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3,   "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL64,  UNKNOWN,     (int) VOID_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3,   "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64,  UNKNOWN,     (int) VOID_FTYPE_UINT64_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinshi3,   "__builtin_ia32_lwpins16", IX86_BUILTIN_LWPINS16,  UNKNOWN,     (int) UCHAR_FTYPE_USHORT_UINT_USHORT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3,   "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64,  UNKNOWN,     (int) UCHAR_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3,   "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64,  UNKNOWN,     (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+
 };
 
 /* Builtins with variable number of arguments.  */
@@ -21912,13 +22205,58 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
 };
 
-/* FMA4.  */
+/* FMA4 and XOP.  */
 enum multi_arg_type {
   MULTI_ARG_UNKNOWN,
   MULTI_ARG_3_SF,
   MULTI_ARG_3_DF,
   MULTI_ARG_3_SF2,
-  MULTI_ARG_3_DF2
+  MULTI_ARG_3_DF2,
+  MULTI_ARG_3_DI,
+  MULTI_ARG_3_SI,
+  MULTI_ARG_3_SI_DI,
+  MULTI_ARG_3_HI,
+  MULTI_ARG_3_HI_SI,
+  MULTI_ARG_3_QI,
+  MULTI_ARG_3_DI2,
+  MULTI_ARG_3_SI2,
+  MULTI_ARG_3_HI2,
+  MULTI_ARG_3_QI2,
+  MULTI_ARG_2_SF,
+  MULTI_ARG_2_DF,
+  MULTI_ARG_2_DI,
+  MULTI_ARG_2_SI,
+  MULTI_ARG_2_HI,
+  MULTI_ARG_2_QI,
+  MULTI_ARG_2_DI_IMM,
+  MULTI_ARG_2_SI_IMM,
+  MULTI_ARG_2_HI_IMM,
+  MULTI_ARG_2_QI_IMM,
+  MULTI_ARG_2_DI_CMP,
+  MULTI_ARG_2_SI_CMP,
+  MULTI_ARG_2_HI_CMP,
+  MULTI_ARG_2_QI_CMP,
+  MULTI_ARG_2_DI_TF,
+  MULTI_ARG_2_SI_TF,
+  MULTI_ARG_2_HI_TF,
+  MULTI_ARG_2_QI_TF,
+  MULTI_ARG_2_SF_TF,
+  MULTI_ARG_2_DF_TF,
+  MULTI_ARG_1_SF,
+  MULTI_ARG_1_DF,
+  MULTI_ARG_1_SF2,
+  MULTI_ARG_1_DF2,
+  MULTI_ARG_1_DI,
+  MULTI_ARG_1_SI,
+  MULTI_ARG_1_HI,
+  MULTI_ARG_1_QI,
+  MULTI_ARG_1_SI_DI,
+  MULTI_ARG_1_HI_DI,
+  MULTI_ARG_1_HI_SI,
+  MULTI_ARG_1_QI_DI,
+  MULTI_ARG_1_QI_SI,
+  MULTI_ARG_1_QI_HI
+
 };
 
 static const struct builtin_description bdesc_multi_arg[] =
@@ -21959,7 +22297,160 @@ static const struct builtin_description bdesc_multi_arg[] =
   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4,	   "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4,	   "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4,	   "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4,	   "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 }
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4,	   "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,	 UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_2_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_2_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
 
 };
 
@@ -22341,51 +22832,6 @@ ix86_init_mmx_sse_builtins (void)
 				integer_type_node,
 				NULL_TREE);
 
-
-  tree v2di_ftype_v2di
-    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
-
-  tree v16qi_ftype_v8hi_v8hi
-    = build_function_type_list (V16QI_type_node,
-				V8HI_type_node, V8HI_type_node,
-				NULL_TREE);
-  tree v8hi_ftype_v4si_v4si
-    = build_function_type_list (V8HI_type_node,
-				V4SI_type_node, V4SI_type_node,
-				NULL_TREE);
-  tree v8hi_ftype_v16qi_v16qi 
-    = build_function_type_list (V8HI_type_node,
-				V16QI_type_node, V16QI_type_node,
-				NULL_TREE);
-  tree v4hi_ftype_v8qi_v8qi 
-    = build_function_type_list (V4HI_type_node,
-				V8QI_type_node, V8QI_type_node,
-				NULL_TREE);
-  tree unsigned_ftype_unsigned_uchar
-    = build_function_type_list (unsigned_type_node,
-				unsigned_type_node,
-				unsigned_char_type_node,
-				NULL_TREE);
-  tree unsigned_ftype_unsigned_ushort
-    = build_function_type_list (unsigned_type_node,
-				unsigned_type_node,
-				short_unsigned_type_node,
-				NULL_TREE);
-  tree unsigned_ftype_unsigned_unsigned
-    = build_function_type_list (unsigned_type_node,
-				unsigned_type_node,
-				unsigned_type_node,
-				NULL_TREE);
-  tree uint64_ftype_uint64_uint64
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				NULL_TREE);
-  tree float_ftype_float
-    = build_function_type_list (float_type_node,
-				float_type_node,
-				NULL_TREE);
-
   /* AVX builtins  */
   tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
 						     V32QImode);
@@ -22397,6 +22843,8 @@ ix86_init_mmx_sse_builtins (void)
 						    V4DImode);
   tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
 						    V4DFmode);
+  tree V16HI_type_node = build_vector_type_for_mode (intHI_type_node,
+						     V16HImode);
   tree v8sf_ftype_v8sf
     = build_function_type_list (V8SF_type_node,
 				V8SF_type_node,
@@ -22641,6 +23089,138 @@ ix86_init_mmx_sse_builtins (void)
     = build_function_type_list (V2DF_type_node,
 				V2DF_type_node, V2DI_type_node, NULL_TREE);
 
+  /* XOP instructions */
+  tree v2di_ftype_v2di_v2di_v2di
+    = build_function_type_list (V2DI_type_node,
+				V2DI_type_node,
+				V2DI_type_node,
+				V2DI_type_node,
+				NULL_TREE);
+
+  tree v4di_ftype_v4di_v4di_v4di
+    = build_function_type_list (V4DI_type_node,
+				V4DI_type_node,
+				V4DI_type_node,
+				V4DI_type_node,
+				NULL_TREE);
+
+  tree v4si_ftype_v4si_v4si_v4si
+    = build_function_type_list (V4SI_type_node,
+				V4SI_type_node,
+				V4SI_type_node,
+				V4SI_type_node,
+				NULL_TREE);
+
+  tree v8si_ftype_v8si_v8si_v8si
+    = build_function_type_list (V8SI_type_node,
+				V8SI_type_node,
+				V8SI_type_node,
+				V8SI_type_node,
+				NULL_TREE);
+
+  tree v32qi_ftype_v32qi_v32qi_v32qi
+    = build_function_type_list (V32QI_type_node,
+				V32QI_type_node,
+				V32QI_type_node,
+				V32QI_type_node,
+				NULL_TREE);
+
+  tree v4si_ftype_v4si_v4si_v2di
+    = build_function_type_list (V4SI_type_node,
+				V4SI_type_node,
+				V4SI_type_node,
+				V2DI_type_node,
+				NULL_TREE);
+
+  tree v8hi_ftype_v8hi_v8hi_v8hi
+    = build_function_type_list (V8HI_type_node,
+				V8HI_type_node,
+				V8HI_type_node,
+				V8HI_type_node,
+				NULL_TREE);
+
+  tree v16hi_ftype_v16hi_v16hi_v16hi
+    = build_function_type_list (V16HI_type_node,
+				V16HI_type_node,
+				V16HI_type_node,
+				V16HI_type_node,
+				NULL_TREE);
+
+  tree v8hi_ftype_v8hi_v8hi_v4si
+    = build_function_type_list (V8HI_type_node,
+				V8HI_type_node,
+				V8HI_type_node,
+				V4SI_type_node,
+				NULL_TREE);
+
+  tree v2di_ftype_v2di_si
+    = build_function_type_list (V2DI_type_node,
+				V2DI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v4si_ftype_v4si_si
+    = build_function_type_list (V4SI_type_node,
+				V4SI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v8hi_ftype_v8hi_si
+    = build_function_type_list (V8HI_type_node,
+				V8HI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v16qi_ftype_v16qi_si
+    = build_function_type_list (V16QI_type_node,
+				V16QI_type_node,
+				integer_type_node,
+				NULL_TREE);
+
+  tree v2di_ftype_v2di
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
+
+  tree v16qi_ftype_v8hi_v8hi
+    = build_function_type_list (V16QI_type_node,
+				V8HI_type_node, V8HI_type_node,
+				NULL_TREE);
+  tree v8hi_ftype_v4si_v4si
+    = build_function_type_list (V8HI_type_node,
+				V4SI_type_node, V4SI_type_node,
+				NULL_TREE);
+  tree v8hi_ftype_v16qi_v16qi 
+    = build_function_type_list (V8HI_type_node,
+				V16QI_type_node, V16QI_type_node,
+				NULL_TREE);
+  tree v4hi_ftype_v8qi_v8qi 
+    = build_function_type_list (V4HI_type_node,
+				V8QI_type_node, V8QI_type_node,
+				NULL_TREE);
+  tree unsigned_ftype_unsigned_uchar
+    = build_function_type_list (unsigned_type_node,
+				unsigned_type_node,
+				unsigned_char_type_node,
+				NULL_TREE);
+  tree unsigned_ftype_unsigned_ushort
+    = build_function_type_list (unsigned_type_node,
+				unsigned_type_node,
+				short_unsigned_type_node,
+				NULL_TREE);
+  tree unsigned_ftype_unsigned_unsigned
+    = build_function_type_list (unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+  tree uint64_ftype_uint64_uint64
+    = build_function_type_list (long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				NULL_TREE);
+  tree float_ftype_float
+    = build_function_type_list (float_type_node,
+				float_type_node,
+				NULL_TREE);
+
   /* Integer intrinsics.  */
   tree uint64_ftype_void
     = build_function_type (long_long_unsigned_type_node,
@@ -22670,6 +23250,50 @@ ix86_init_mmx_sse_builtins (void)
 				integer_type_node,
 				NULL_TREE);
 
+  /* LWP instructions.  */
+
+  tree void_ftype_ushort_unsigned_ushort
+    = build_function_type_list (void_type_node,
+				short_unsigned_type_node,
+				unsigned_type_node,
+				short_unsigned_type_node,
+				NULL_TREE);
+
+  tree void_ftype_unsigned_unsigned_unsigned
+    = build_function_type_list (void_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
+  tree void_ftype_uint64_unsigned_unsigned
+    = build_function_type_list (void_type_node,
+				long_long_unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
+  tree uchar_ftype_ushort_unsigned_ushort
+    = build_function_type_list (unsigned_char_type_node,
+				short_unsigned_type_node,
+				unsigned_type_node,
+				short_unsigned_type_node,
+				NULL_TREE);
+
+  tree uchar_ftype_unsigned_unsigned_unsigned
+    = build_function_type_list (unsigned_char_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
+  tree uchar_ftype_uint64_unsigned_unsigned
+    = build_function_type_list (unsigned_char_type_node,
+				long_long_unsigned_type_node,
+				unsigned_type_node,
+				unsigned_type_node,
+				NULL_TREE);
+
   tree ftype;
 
   /* Add all special builtins with variable number of operands.  */
@@ -22783,6 +23407,25 @@ ix86_init_mmx_sse_builtins (void)
 	case VOID_FTYPE_PV2DF_V2DF_V2DF:
 	  type = void_ftype_pv2df_v2df_v2df;
 	  break;
+	case VOID_FTYPE_USHORT_UINT_USHORT:
+	  type = void_ftype_ushort_unsigned_ushort;
+	  break;
+	case VOID_FTYPE_UINT_UINT_UINT:
+	  type = void_ftype_unsigned_unsigned_unsigned;
+	  break;
+	case VOID_FTYPE_UINT64_UINT_UINT:
+	  type = void_ftype_uint64_unsigned_unsigned;
+	  break;
+	case UCHAR_FTYPE_USHORT_UINT_USHORT:
+	  type = uchar_ftype_ushort_unsigned_ushort;
+	  break;
+	case UCHAR_FTYPE_UINT_UINT_UINT:
+	  type = uchar_ftype_unsigned_unsigned_unsigned;
+	  break;
+	case UCHAR_FTYPE_UINT64_UINT_UINT:
+	  type = uchar_ftype_uint64_unsigned_unsigned;
+	  break;
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -23409,6 +24052,50 @@ ix86_init_mmx_sse_builtins (void)
 	case MULTI_ARG_3_DF:     mtype = v2df_ftype_v2df_v2df_v2df; 	break;
 	case MULTI_ARG_3_SF2:    mtype = v8sf_ftype_v8sf_v8sf_v8sf; 	break;
 	case MULTI_ARG_3_DF2:    mtype = v4df_ftype_v4df_v4df_v4df; 	break;
+	case MULTI_ARG_3_DI:     mtype = v2di_ftype_v2di_v2di_v2di; 	break;
+	case MULTI_ARG_3_SI:     mtype = v4si_ftype_v4si_v4si_v4si; 	break;
+	case MULTI_ARG_3_SI_DI:  mtype = v4si_ftype_v4si_v4si_v2di; 	break;
+	case MULTI_ARG_3_HI:     mtype = v8hi_ftype_v8hi_v8hi_v8hi; 	break;
+	case MULTI_ARG_3_HI_SI:  mtype = v8hi_ftype_v8hi_v8hi_v4si; 	break;
+	case MULTI_ARG_3_QI:     mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
+	case MULTI_ARG_3_DI2:    mtype = v4di_ftype_v4di_v4di_v4di; 	break;
+	case MULTI_ARG_3_SI2:    mtype = v8si_ftype_v8si_v8si_v8si; 	break;
+	case MULTI_ARG_3_HI2:    mtype = v16hi_ftype_v16hi_v16hi_v16hi; break;
+	case MULTI_ARG_3_QI2:    mtype = v32qi_ftype_v32qi_v32qi_v32qi; break;
+	case MULTI_ARG_2_SF:     mtype = v4sf_ftype_v4sf_v4sf;      	break;
+	case MULTI_ARG_2_DF:     mtype = v2df_ftype_v2df_v2df;      	break;
+	case MULTI_ARG_2_DI:     mtype = v2di_ftype_v2di_v2di;      	break;
+	case MULTI_ARG_2_SI:     mtype = v4si_ftype_v4si_v4si;      	break;
+	case MULTI_ARG_2_HI:     mtype = v8hi_ftype_v8hi_v8hi;      	break;
+	case MULTI_ARG_2_QI:     mtype = v16qi_ftype_v16qi_v16qi;      	break;
+	case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si;        	break;
+	case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si;        	break;
+	case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si;        	break;
+	case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si;        	break;
+	case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di;      	break;
+	case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si;      	break;
+	case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi;      	break;
+	case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi;      	break;
+	case MULTI_ARG_2_SF_TF:  mtype = v4sf_ftype_v4sf_v4sf;      	break;
+	case MULTI_ARG_2_DF_TF:  mtype = v2df_ftype_v2df_v2df;      	break;
+	case MULTI_ARG_2_DI_TF:  mtype = v2di_ftype_v2di_v2di;      	break;
+	case MULTI_ARG_2_SI_TF:  mtype = v4si_ftype_v4si_v4si;      	break;
+	case MULTI_ARG_2_HI_TF:  mtype = v8hi_ftype_v8hi_v8hi;      	break;
+	case MULTI_ARG_2_QI_TF:  mtype = v16qi_ftype_v16qi_v16qi;      	break;
+	case MULTI_ARG_1_SF:     mtype = v4sf_ftype_v4sf;           	break;
+	case MULTI_ARG_1_DF:     mtype = v2df_ftype_v2df;           	break;
+	case MULTI_ARG_1_SF2:    mtype = v8sf_ftype_v8sf;           	break;
+	case MULTI_ARG_1_DF2:    mtype = v4df_ftype_v4df;           	break;
+	case MULTI_ARG_1_DI:     mtype = v2di_ftype_v2di;           	break;
+	case MULTI_ARG_1_SI:     mtype = v4si_ftype_v4si;           	break;
+	case MULTI_ARG_1_HI:     mtype = v8hi_ftype_v8hi;           	break;
+	case MULTI_ARG_1_QI:     mtype = v16qi_ftype_v16qi;           	break;
+	case MULTI_ARG_1_SI_DI:  mtype = v2di_ftype_v4si;           	break;
+	case MULTI_ARG_1_HI_DI:  mtype = v2di_ftype_v8hi;           	break;
+	case MULTI_ARG_1_HI_SI:  mtype = v4si_ftype_v8hi;           	break;
+	case MULTI_ARG_1_QI_DI:  mtype = v2di_ftype_v16qi;           	break;
+	case MULTI_ARG_1_QI_SI:  mtype = v4si_ftype_v16qi;           	break;
+	case MULTI_ARG_1_QI_HI:  mtype = v8hi_ftype_v16qi;           	break;
 
 	case MULTI_ARG_UNKNOWN:
 	default:
@@ -23628,9 +24315,71 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
     case MULTI_ARG_3_DF:
     case MULTI_ARG_3_SF2:
     case MULTI_ARG_3_DF2:
+    case MULTI_ARG_3_DI:
+    case MULTI_ARG_3_SI:
+    case MULTI_ARG_3_SI_DI:
+    case MULTI_ARG_3_HI:
+    case MULTI_ARG_3_HI_SI:
+    case MULTI_ARG_3_QI:
+    case MULTI_ARG_3_DI2:
+    case MULTI_ARG_3_SI2:
+    case MULTI_ARG_3_HI2:
+    case MULTI_ARG_3_QI2:
       nargs = 3;
       break;
 
+    case MULTI_ARG_2_SF:
+    case MULTI_ARG_2_DF:
+    case MULTI_ARG_2_DI:
+    case MULTI_ARG_2_SI:
+    case MULTI_ARG_2_HI:
+    case MULTI_ARG_2_QI:
+      nargs = 2;
+      break;
+
+    case MULTI_ARG_2_DI_IMM:
+    case MULTI_ARG_2_SI_IMM:
+    case MULTI_ARG_2_HI_IMM:
+    case MULTI_ARG_2_QI_IMM:
+      nargs = 2;
+      last_arg_constant = true;
+      break;
+
+    case MULTI_ARG_1_SF:
+    case MULTI_ARG_1_DF:
+    case MULTI_ARG_1_SF2:
+    case MULTI_ARG_1_DF2:
+    case MULTI_ARG_1_DI:
+    case MULTI_ARG_1_SI:
+    case MULTI_ARG_1_HI:
+    case MULTI_ARG_1_QI:
+    case MULTI_ARG_1_SI_DI:
+    case MULTI_ARG_1_HI_DI:
+    case MULTI_ARG_1_HI_SI:
+    case MULTI_ARG_1_QI_DI:
+    case MULTI_ARG_1_QI_SI:
+    case MULTI_ARG_1_QI_HI:
+      nargs = 1;
+      break;
+
+    case MULTI_ARG_2_DI_CMP:
+    case MULTI_ARG_2_SI_CMP:
+    case MULTI_ARG_2_HI_CMP:
+    case MULTI_ARG_2_QI_CMP:
+      nargs = 2;
+      comparison_p = true;
+      break;
+
+    case MULTI_ARG_2_SF_TF:
+    case MULTI_ARG_2_DF_TF:
+    case MULTI_ARG_2_DI_TF:
+    case MULTI_ARG_2_SI_TF:
+    case MULTI_ARG_2_HI_TF:
+    case MULTI_ARG_2_QI_TF:
+      nargs = 2;
+      tf_p = true;
+      break;
+
     case MULTI_ARG_UNKNOWN:
     default:
       gcc_unreachable ();
@@ -24568,6 +25317,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       /* Reserve memory operand for target.  */
       memory = ARRAY_SIZE (args);
       break;
+    case VOID_FTYPE_USHORT_UINT_USHORT:
+    case VOID_FTYPE_UINT_UINT_UINT:
+    case VOID_FTYPE_UINT64_UINT_UINT:
+    case UCHAR_FTYPE_USHORT_UINT_USHORT:
+    case UCHAR_FTYPE_UINT_UINT_UINT:
+    case UCHAR_FTYPE_UINT64_UINT_UINT:
+      nargs = 3;
+      klass = store;
+      memory = 0;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -25311,7 +26070,7 @@ static tree
 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
 			 bool sqrt ATTRIBUTE_UNUSED)
 {
-  if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+  if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
 	 && flag_finite_math_only && !flag_trapping_math
 	 && flag_unsafe_math_optimizations))
     return NULL_TREE;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index b412604dbd8..4bc8ef18500 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_FMA	OPTION_ISA_FMA
 #define TARGET_SSE4A	OPTION_ISA_SSE4A
 #define TARGET_FMA4	OPTION_ISA_FMA4
+#define TARGET_XOP	OPTION_ISA_XOP
+#define TARGET_LWP	OPTION_ISA_LWP
 #define TARGET_ROUND	OPTION_ISA_ROUND
 #define TARGET_ABM	OPTION_ISA_ABM
 #define TARGET_POPCNT	OPTION_ISA_POPCNT
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index dc605abde06..82f5352597c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -57,6 +57,7 @@
 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
 ;; & -- print some in-use local-dynamic symbol name.
 ;; H -- print a memory address offset by 8; used for sse high-parts
+;; Y -- print condition for XOP pcom* instruction.
 ;; + -- print a branch hint as 'cs' or 'ds' prefix
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
 
@@ -199,6 +200,15 @@
    (UNSPEC_FMA4_INTRINSIC	150)
    (UNSPEC_FMA4_FMADDSUB	151)
    (UNSPEC_FMA4_FMSUBADD	152)
+   (UNSPEC_XOP_UNSIGNED_CMP	151)
+   (UNSPEC_XOP_TRUEFALSE	152)
+   (UNSPEC_XOP_PERMUTE		153)
+   (UNSPEC_FRCZ			154)
+   (UNSPEC_LLWP_INTRINSIC	155)
+   (UNSPEC_SLWP_INTRINSIC	156)
+   (UNSPECV_LWPVAL_INTRINSIC	157)
+   (UNSPECV_LWPINS_INTRINSIC	158)
+
    ; For AES support
    (UNSPEC_AESENC		159)
    (UNSPEC_AESENCLAST		160)
@@ -254,6 +264,20 @@
    (COM_TRUE_P			5)
   ])
 
+;; Constants used in the XOP pperm instruction
+(define_constants
+  [(PPERM_SRC			0x00)	/* copy source */
+   (PPERM_INVERT		0x20)	/* invert source */
+   (PPERM_REVERSE		0x40)	/* bit reverse source */
+   (PPERM_REV_INV		0x60)	/* bit reverse & invert src */
+   (PPERM_ZERO			0x80)	/* all 0's */
+   (PPERM_ONES			0xa0)	/* all 1's */
+   (PPERM_SIGN			0xc0)	/* propagate sign bit */
+   (PPERM_INV_SIGN		0xe0)	/* invert & propagate sign */
+   (PPERM_SRC1			0x00)	/* use first source byte */
+   (PPERM_SRC2			0x10)	/* use second source byte */
+   ])
+
 ;; Registers by name.
 (define_constants
   [(AX_REG			 0)
@@ -333,7 +357,7 @@
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
    sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
    sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
-   ssemuladd,sse4arg,
+   ssemuladd,sse4arg,lwp,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
@@ -19676,6 +19700,20 @@
   [(set_attr "type" "fcmov")
    (set_attr "mode" "XF")])
 
+;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
+;; XOP conditional move
+(define_insn "*xop_pcmov_<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(if_then_else:MODEF
+	  (match_operand:MODEF 1 "register_operand" "x")
+	  (match_operand:MODEF 2 "register_operand" "x")
+	  (match_operand:MODEF 3 "register_operand" "x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+  [(set_attr "type" "sse4arg")])
+
 ;; These versions of the min/max patterns are intentionally ignorant of
 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
@@ -19985,6 +20023,18 @@
   DONE;
 })
 
+;; Use IOR for stack probes, this is shorter.
+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  ""
+{
+  if (GET_MODE (operands[0]) == DImode)
+    emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx));
+  else
+    emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx));
+  DONE;
+})
+
 (define_expand "builtin_setjmp_receiver"
   [(label_ref (match_operand 0 "" ""))]
   "!TARGET_64BIT && flag_pic"
@@ -20488,7 +20538,9 @@
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
@@ -20503,7 +20555,9 @@
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
@@ -21252,19 +21306,19 @@
 	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
 	      (match_operand:DI 2 "" "")))
    (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
-   (clobber (reg:TI 27))
-   (clobber (reg:TI 28))
-   (clobber (reg:TI 45))
-   (clobber (reg:TI 46))
-   (clobber (reg:TI 47))
-   (clobber (reg:TI 48))
-   (clobber (reg:TI 49))
-   (clobber (reg:TI 50))
-   (clobber (reg:TI 51))
-   (clobber (reg:TI 52))
+   (clobber (reg:TI XMM6_REG))
+   (clobber (reg:TI XMM7_REG))
+   (clobber (reg:TI XMM8_REG))
+   (clobber (reg:TI XMM9_REG))
+   (clobber (reg:TI XMM10_REG))
+   (clobber (reg:TI XMM11_REG))
+   (clobber (reg:TI XMM12_REG))
+   (clobber (reg:TI XMM13_REG))
+   (clobber (reg:TI XMM14_REG))
+   (clobber (reg:TI XMM15_REG))
    (clobber (reg:DI SI_REG))
    (clobber (reg:DI DI_REG))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
 {
   if (constant_call_address_operand (operands[1], Pmode))
     return "call\t%P1";
@@ -21303,14 +21357,14 @@
 
 (define_expand "sse_prologue_save"
   [(parallel [(set (match_operand:BLK 0 "" "")
-		   (unspec:BLK [(reg:DI 21)
-				(reg:DI 22)
-				(reg:DI 23)
-				(reg:DI 24)
-				(reg:DI 25)
-				(reg:DI 26)
-				(reg:DI 27)
-				(reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+		   (unspec:BLK [(reg:DI XMM0_REG)
+				(reg:DI XMM1_REG)
+				(reg:DI XMM2_REG)
+				(reg:DI XMM3_REG)
+				(reg:DI XMM4_REG)
+				(reg:DI XMM5_REG)
+				(reg:DI XMM6_REG)
+				(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
 	      (use (match_operand:DI 1 "register_operand" ""))
 	      (use (match_operand:DI 2 "immediate_operand" ""))
 	      (use (label_ref:DI (match_operand 3 "" "")))])]
@@ -21320,14 +21374,14 @@
 (define_insn "*sse_prologue_save_insn"
   [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
 			  (match_operand:DI 4 "const_int_operand" "n")))
-	(unspec:BLK [(reg:DI 21)
-		     (reg:DI 22)
-		     (reg:DI 23)
-		     (reg:DI 24)
-		     (reg:DI 25)
-		     (reg:DI 26)
-		     (reg:DI 27)
-		     (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+	(unspec:BLK [(reg:DI XMM0_REG)
+		     (reg:DI XMM1_REG)
+		     (reg:DI XMM2_REG)
+		     (reg:DI XMM3_REG)
+		     (reg:DI XMM4_REG)
+		     (reg:DI XMM5_REG)
+		     (reg:DI XMM6_REG)
+		     (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
    (use (match_operand:DI 1 "register_operand" "r"))
    (use (match_operand:DI 2 "const_int_operand" "i"))
    (use (label_ref:DI (match_operand 3 "" "X")))]
@@ -21804,6 +21858,120 @@
   [(set_attr "type" "other")
    (set_attr "length" "3")])
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; LWP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "lwp_llwpcbhi1"
+  [(unspec [(match_operand:HI 0 "register_operand" "r")]
+  	   UNSPEC_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_llwpcbsi1"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")]
+  	   UNSPEC_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_llwpcbdi1"
+  [(unspec [(match_operand:DI 0 "register_operand" "r")]
+  	   UNSPEC_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
+(define_insn "lwp_slwpcbhi1"
+  [(unspec [(match_operand:HI 0 "register_operand" "r")]
+  	   UNSPEC_SLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_slwpcbsi1"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")]
+  	   UNSPEC_SLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_slwpcbdi1"
+  [(unspec [(match_operand:DI 0 "register_operand" "r")]
+  	   UNSPEC_SLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
+(define_insn "lwp_lwpvalhi3"
+  [(unspec_volatile [(match_operand:HI 0 "register_operand" "r")
+  	   	     (match_operand:SI 1 "nonimmediate_operand" "rm")
+	   	     (match_operand:HI 2 "const_int_operand" "")]
+  	   	    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_lwpvalsi3"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+    	    	     (match_operand:SI 1 "nonimmediate_operand" "rm")
+	    	     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_lwpvaldi3"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
+(define_insn "lwp_lwpinshi3"
+  [(unspec_volatile [(match_operand:HI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:HI 2 "const_int_operand" "")]
+		    UNSPECV_LWPINS_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "HI")])
+
+(define_insn "lwp_lwpinssi3"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPINS_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "SI")])
+
+(define_insn "lwp_lwpinsdi3"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+  		     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "")]
+		    UNSPECV_LWPINS_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "DI")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9668ff6504d..dd47b7d1dc5 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -314,6 +314,14 @@ mfma4
 Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) VarExists Save
 Support FMA4 built-in functions and code generation 
 
+mxop
+Target Report Mask(ISA_XOP) Var(ix86_isa_flags) VarExists Save
+Support XOP built-in functions and code generation 
+
+mlwp
+Target Report Mask(ISA_LWP) Var(ix86_isa_flags) VarExists Save
+Support LWP built-in functions and code generation 
+
 mabm
 Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save
 Support code generation of Advanced Bit Manipulation (ABM) instructions.
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index e701b19e2a8..540bc3f09ee 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -49,6 +49,7 @@ __bswapd (int __X)
   return __builtin_bswap32 (__X);
 }
 
+#ifdef __SSE4_2__
 /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
 extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -70,6 +71,7 @@ __crc32d (unsigned int __C, unsigned int __V)
 {
   return __builtin_ia32_crc32si (__C, __V);
 }
+#endif /* SSE4.2 */
 
 /* 32bit popcnt */
 extern __inline int
diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h
index 5e2e0136fcb..5d8e5ad2cbe 100644
--- a/gcc/config/i386/linux.h
+++ b/gcc/config/i386/linux.h
@@ -207,6 +207,9 @@ along with GCC; see the file COPYING3.  If not see
 
 #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
 
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+
 /* This macro may be overridden in i386/k*bsd-gnu.h.  */
 #define REG_NAME(reg) reg
 
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index cfa3f49e870..d07547a804f 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -110,6 +110,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
 
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+
 /* This macro may be overridden in i386/k*bsd-gnu.h.  */
 #define REG_NAME(reg) reg
 
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
new file mode 100644
index 00000000000..e5137ec24f4
--- /dev/null
+++ b/gcc/config/i386/lwpintrin.h
@@ -0,0 +1,109 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _LWPINTRIN_H_INCLUDED
+#define _LWPINTRIN_H_INCLUDED
+
+#ifndef __LWP__
+# error "LWP instruction set not enabled"
+#else
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb16 (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb16 (pcbAddress);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb32 (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb32 (pcbAddress);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb64 (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb64 (pcbAddress);
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb16 (void)
+{
+  return __builtin_ia32_slwpcb16 ();
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb32 (void)
+{
+  return __builtin_ia32_slwpcb32 ();
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb64 (void)
+{
+  return __builtin_ia32_slwpcb64 ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval16 (unsigned short data2, unsigned int data1, unsigned short flags)
+{
+  __builtin_ia32_lwpval16 (data2, data1, flags);
+}
+/*
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval32 (data2, data1, flags);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval64 (unsigned __int64 data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval64 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins16 (unsigned short data2, unsigned int data1, unsigned short flags)
+{
+  return __builtin_ia32_lwpins16 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins32 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins64 (unsigned __int64 data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins64 (data2, data1, flags);
+}
+*/
+#endif /* __LWP__ */
+
+#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e90296512ad..bad39bb69c8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -86,6 +86,9 @@
 
 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
 
+;; Mapping of the max integer size for xop rotate immediate constraint
+(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
+
 ;; Mapping of vector modes back to the scalar modes
 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
 				 (V16QI "QI") (V8HI "HI")
@@ -1455,7 +1458,8 @@
 	(match_operator:SSEMODEF4 3 "sse_comparison_operator"
 		[(match_operand:SSEMODEF4 1 "register_operand" "0")
 		 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
-  "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
+  "!TARGET_XOP 
+  && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
   "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -5614,7 +5618,7 @@
 		   (match_operand:V4SI 2 "register_operand" "")))]
   "TARGET_SSE2"
 {
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE4_1 || TARGET_XOP)
     ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
 })
 
@@ -5643,7 +5647,7 @@
   [(set (match_operand:V4SI 0 "register_operand" "")
 	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
 		   (match_operand:V4SI 2 "register_operand" "")))]
-  "TARGET_SSE2 && !TARGET_SSE4_1
+  "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP
    && can_create_pseudo_p ()"
   "#"
   "&& 1"
@@ -5705,6 +5709,42 @@
   rtx t1, t2, t3, t4, t5, t6, thirtytwo;
   rtx op0, op1, op2;
 
+  if (TARGET_XOP)
+    {
+      /* op1: A,B,C,D, op2: E,F,G,H */
+      op0 = operands[0];
+      op1 = gen_lowpart (V4SImode, operands[1]);
+      op2 = gen_lowpart (V4SImode, operands[2]);
+      t1 = gen_reg_rtx (V4SImode);
+      t2 = gen_reg_rtx (V4SImode);
+      t3 = gen_reg_rtx (V4SImode);
+      t4 = gen_reg_rtx (V2DImode);
+      t5 = gen_reg_rtx (V2DImode);
+
+      /* t1: B,A,D,C */
+      emit_insn (gen_sse2_pshufd_1 (t1, op1,
+				    GEN_INT (1),
+				    GEN_INT (0),
+				    GEN_INT (3),
+				    GEN_INT (2)));
+
+      /* t2: 0 */
+      emit_move_insn (t2, CONST0_RTX (V4SImode));
+
+      /* t3: (B*E),(A*F),(D*G),(C*H) */
+      emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2));
+
+      /* t4: (B*E)+(A*F), (D*G)+(C*H) */
+      emit_insn (gen_xop_phadddq (t4, t3));
+
+      /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
+      emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
+
+      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
+      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5));
+      DONE;
+    }
+
   op0 = operands[0];
   op1 = operands[1];
   op2 = operands[2];
@@ -5820,6 +5860,56 @@
   DONE;
 })
 
+(define_expand "vec_widen_smult_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx t1, t2;
+
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx t1, t2;
+
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+  DONE;
+})
+
 (define_expand "vec_widen_umult_hi_v4si"
   [(match_operand:V2DI 0 "register_operand" "")
    (match_operand:V4SI 1 "register_operand" "")
@@ -6217,7 +6307,7 @@
 	(eq:SSEMODE124
 	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
 	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !TARGET_XOP "
   "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
 
 (define_insn "*avx_eq<mode>3"
@@ -6240,7 +6330,7 @@
 	(eq:SSEMODE124
 	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
 	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
+  "TARGET_SSE2 && !TARGET_XOP
    && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
   "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
@@ -6286,7 +6376,7 @@
 	(gt:SSEMODE124
 	  (match_operand:SSEMODE124 1 "register_operand" "0")
 	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !TARGET_XOP"
   "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecmp")
    (set_attr "prefix_data16" "1")
@@ -10364,6 +10454,1445 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; XOP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; XOP parallel integer multiply/add instructions.
+;; Note the instruction does not allow the value being added to be a memory
+;; operation.  However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;;	a[i] = b[i] * c[i] + d[i];
+(define_insn "xop_pmacsww"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+        (plus:V8HI
+	 (mult:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
+	 (match_operand:V8HI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
+  "@
+   vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; Split pmacsww with two memory operands into a load and the pmacsww.
+(define_split
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(plus:V8HI
+	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+		    (match_operand:V8HI 2 "nonimmediate_operand" ""))
+	 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
+  "TARGET_XOP
+   && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
+   && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
+   && !reg_mentioned_p (operands[0], operands[1])
+   && !reg_mentioned_p (operands[0], operands[2])
+   && !reg_mentioned_p (operands[0], operands[3])"
+  [(const_int 0)]
+{
+  ix86_expand_fma4_multiple_memory (operands, 4, V8HImode);
+  emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
+			      operands[3]));
+  DONE;
+})
+
+(define_insn "xop_pmacssww"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+        (ss_plus:V8HI
+	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+		    (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
+	 (match_operand:V8HI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; Note the instruction does not allow the value being added to be a memory
+;; operation.  However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;;	a[i] = b[i] * c[i] + d[i];
+(define_insn "xop_pmacsdd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+        (plus:V4SI
+	 (mult:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
+  "@
+   vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; Split pmacsdd with two memory operands into a load and the pmacsdd.
+(define_split
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(plus:V4SI
+	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
+		    (match_operand:V4SI 2 "nonimmediate_operand" ""))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
+  "TARGET_XOP
+   && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
+   && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
+   && !reg_mentioned_p (operands[0], operands[1])
+   && !reg_mentioned_p (operands[0], operands[2])
+   && !reg_mentioned_p (operands[0], operands[3])"
+  [(const_int 0)]
+{
+  ix86_expand_fma4_multiple_memory (operands, 4, V4SImode);
+  emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
+			      operands[3]));
+  DONE;
+})
+
+(define_insn "xop_pmacssdd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+        (ss_plus:V4SI
+	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+		    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdql"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(ss_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (vec_select:V2SI
+	   (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	   (parallel [(const_int 1)
+		      (const_int 3)])))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdqh"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(ss_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdql"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "*xop_pmacsdql_mem"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add.  In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as operands 1/2
+(define_insn_and_split "xop_mulv2div2di3_low"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (parallel [(const_int 1)
+			 (const_int 3)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 1)
+			 (const_int 3)])))))]
+  "TARGET_XOP"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_dup 0)))]
+{
+  operands[3] = CONST0_RTX (V2DImode);
+}
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdqh"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "*xop_pmacsdqh_mem"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add.  In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as either operands[1] or operands[2]
+(define_insn_and_split "xop_mulv2div2di3_high"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (parallel [(const_int 0)
+			 (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0)
+			 (const_int 2)])))))]
+  "TARGET_XOP"
+  "#"
+  "&& (reload_completed
+       || (!reg_mentioned_p (operands[0], operands[1])
+	   && !reg_mentioned_p (operands[0], operands[2])))"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_dup 0)))]
+{
+  operands[3] = CONST0_RTX (V2DImode);
+}
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel integer multiply/add instructions for the intrinisics
+(define_insn "xop_pmacsswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(ss_plus:V4SI
+	 (mult:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)]))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(plus:V4SI
+	 (mult:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)]))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcsswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(ss_plus:V4SI
+	 (plus:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)]))))
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 2)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)]))))
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 2)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))))
+	 (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+  "@
+   vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel XMM conditional moves
+(define_insn "xop_pcmov_<mode>"
+  [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
+	(if_then_else:SSEMODE
+	  (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,xm")
+	  (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,x")
+	  (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "@
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
+(define_insn "xop_pcmov_<mode>256"
+  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
+	(if_then_else:AVX256MODE
+	  (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,xm")
+	  (match_operand:AVX256MODE 1 "vector_move_operand" "x,xm,x")
+	  (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "@
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+   vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
+;; XOP horizontal add/subtract instructions
+(define_insn "xop_phaddbw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(plus:V8HI
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphaddbw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)
+		       (const_int 8)
+		       (const_int 12)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)
+		       (const_int 9)
+		       (const_int 13)]))))
+	 (plus:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)
+		       (const_int 10)
+		       (const_int 14)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)
+		       (const_int 11)
+		       (const_int 15)]))))))]
+  "TARGET_XOP"
+  "vphaddbd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 4)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 5)]))))
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 2)
+			(const_int 6)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 3)
+			(const_int 7)])))))
+	 (plus:V2DI
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 8)
+			(const_int 12)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 9)
+			(const_int 13)]))))
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 10)
+			(const_int 14)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 11)
+			(const_int 15)])))))))]
+  "TARGET_XOP"
+  "vphaddbq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphaddwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)]))))
+	 (plus:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)]))))))]
+  "TARGET_XOP"
+  "vphaddwq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadddq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphadddq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(plus:V8HI
+	 (zero_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (zero_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphaddubw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)
+		       (const_int 8)
+		       (const_int 12)])))
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)
+		       (const_int 9)
+		       (const_int 13)]))))
+	 (plus:V4SI
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)
+		       (const_int 10)
+		       (const_int 14)])))
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)
+		       (const_int 11)
+		       (const_int 15)]))))))]
+  "TARGET_XOP"
+  "vphaddubd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 4)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 5)]))))
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 2)
+			(const_int 6)])))
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 3)
+			(const_int 7)])))))
+	 (plus:V2DI
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 8)
+			(const_int 12)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 9)
+			(const_int 13)]))))
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 10)
+			(const_int 14)])))
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 11)
+			(const_int 15)])))))))]
+  "TARGET_XOP"
+  "vphaddubq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (zero_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (zero_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphadduwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)])))
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)]))))
+	 (plus:V2DI
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)])))
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)]))))))]
+  "TARGET_XOP"
+  "vphadduwq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddudq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (zero_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (zero_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphaddudq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubbw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(minus:V8HI
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphsubbw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(minus:V4SI
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphsubwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(minus:V2DI
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphsubdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+;; XOP permute instructions
+(define_insn "xop_pperm"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,xm")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,x")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
+	  UNSPEC_XOP_PERMUTE))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP pack instructions that combine two vectors into a smaller vector
+(define_insn "xop_pperm_pack_v2di_v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+	(vec_concat:V4SI
+	 (truncate:V2SI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,xm"))
+	 (truncate:V2SI
+	  (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v4si_v8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+	(vec_concat:V8HI
+	 (truncate:V4HI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "x,x,xm"))
+	 (truncate:V4HI
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v8hi_v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+	(vec_concat:V16QI
+	 (truncate:V8QI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "x,x,xm"))
+	 (truncate:V8QI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP packed rotate instructions
+(define_expand "rotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+	(rotate:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+        {
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+      DONE;
+    }
+})
+
+(define_expand "rotr<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+	(rotatert:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx neg = gen_reg_rtx (<MODE>mode);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+        {
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_neg<mode>2 (neg, reg));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
+      DONE;
+    }
+})
+
+(define_insn "xop_rotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(rotate:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+  "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_rotr<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(rotatert:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+{
+  operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+  return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "vrotr<mode>3"
+  [(match_operand:SSEMODE1248 0 "register_operand" "")
+   (match_operand:SSEMODE1248 1 "register_operand" "")
+   (match_operand:SSEMODE1248 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx reg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (reg, operands[2]));
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "vrotl<mode>3"
+  [(match_operand:SSEMODE1248 0 "register_operand" "")
+   (match_operand:SSEMODE1248 1 "register_operand" "")
+   (match_operand:SSEMODE1248 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "xop_vrotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+	  (const_int 0))
+	 (rotate:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+	  (match_dup 2))
+	 (rotatert:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+;; XOP packed shift instructions.
+;; FIXME: add V2DI back in
+(define_expand "vlshr<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vashr<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vashl<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "xop_ashl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+	  (const_int 0))
+	 (ashift:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+	  (match_dup 2))
+	 (ashiftrt:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_lshl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+	  (const_int 0))
+	 (ashift:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+	  (match_dup 2))
+	 (lshiftrt:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+  "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+;; SSE2 doesn't have some shift varients, so define versions for XOP
+(define_expand "ashlv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = operands[2];
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+  emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "lshlv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = operands[2];
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+  emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "ashrv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  rtx ele = ((CONST_INT_P (operands[2]))
+	     ? GEN_INT (- INTVAL (operands[2]))
+	     : operands[2]);
+
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = ele;
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+
+  if (!CONST_INT_P (operands[2]))
+    {
+      rtx neg = gen_reg_rtx (V16QImode);
+      emit_insn (gen_negv16qi2 (neg, reg));
+      emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+
+  DONE;
+})
+
+(define_expand "ashrv2di3"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V2DI 1 "register_operand" "")
+   (match_operand:DI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (2);
+  rtx par = gen_rtx_PARALLEL (V2DImode, vs);
+  rtx reg = gen_reg_rtx (V2DImode);
+  rtx ele;
+
+  if (CONST_INT_P (operands[2]))
+    ele = GEN_INT (- INTVAL (operands[2]));
+  else if (GET_MODE (operands[2]) != DImode)
+    {
+      rtx move = gen_reg_rtx (DImode);
+      ele = gen_reg_rtx (DImode);
+      convert_move (move, operands[2], false);
+      emit_insn (gen_negdi2 (ele, move));
+    }
+  else
+    {
+      ele = gen_reg_rtx (DImode);
+      emit_insn (gen_negdi2 (ele, operands[2]));
+    }
+
+  RTVEC_ELT (vs, 0) = ele;
+  RTVEC_ELT (vs, 1) = ele;
+  emit_insn (gen_vec_initv2di (reg, par));
+  emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+;; XOP FRCZ support
+;; parallel insns
+(define_insn "xop_frcz<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
+	 UNSPEC_FRCZ))]
+  "TARGET_XOP"
+  "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+;; scalar insns
+(define_insn "xop_vmfrcz<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	   [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+	   UNSPEC_FRCZ)
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (const_int 1)))]
+  "TARGET_XOP"
+  "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_frcz<mode>2256"
+  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
+	(unspec:FMA4MODEF4
+	 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
+	 UNSPEC_FRCZ))]
+  "TARGET_XOP"
+  "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_maskcmp<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
+	 [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	  (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_maskcmp_uns<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+	 [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	  (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
+;; and pcomneu* not to be converted to the signed ones in case somebody needs
+;; the exact instruction generated for the intrinsic.
+(define_insn "xop_maskcmp_uns2<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(unspec:SSEMODE1248
+	 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+	  [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	   (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
+	 UNSPEC_XOP_UNSIGNED_CMP))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
+;; being added here to be complete.
+(define_insn "xop_pcom_tf<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(unspec:SSEMODE1248
+	  [(match_operand:SSEMODE1248 1 "register_operand" "x")
+	   (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_int_operand" "n")]
+	  UNSPEC_XOP_TRUEFALSE))]
+  "TARGET_XOP"
+{
+  return ((INTVAL (operands[3]) != 0)
+	  ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+	  : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (define_insn "*avx_aesenc"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 7bc47f8f15d..ac7e21fd6f7 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -54,10 +54,6 @@
 #include <smmintrin.h>
 #endif
 
-#ifdef __FMA4__
-#include <fma4intrin.h>
-#endif
-
 #if defined (__AES__) || defined (__PCLMUL__)
 #include <wmmintrin.h>
 #endif
@@ -69,4 +65,16 @@
 #include <mm3dnow.h>
 #endif
 
+#ifdef __FMA4__
+#include <fma4intrin.h>
+#endif
+
+#ifdef __XOP__
+#include <xopintrin.h>
+#endif
+
+#ifdef __LWP__
+#include <lwpintrin.h>
+#endif
+
 #endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h
new file mode 100644
index 00000000000..803417a6a45
--- /dev/null
+++ b/gcc/config/i386/xopintrin.h
@@ -0,0 +1,771 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _XOPMMINTRIN_H_INCLUDED
+#define _XOPMMINTRIN_H_INCLUDED
+
+#ifndef __XOP__
+# error "XOP instruction set not enabled"
+#else
+
+#include <fma4intrin.h>
+
+/* Integer multiply/add intructions. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+/* Packed Integer Horizontal Add and Subtract */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
+}
+
+/* Vector conditional move and permute */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
+/* Packed Integer Rotates and Shifts
+   Rotates - Non-Immediate form */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi64(__m128i __A,  __m128i __B)
+{
+  return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Rotates - Immediate form */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi8(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi16(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi32(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi64(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
+}
+#else
+#define _mm_roti_epi8(A, N) \
+  ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+  ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+  ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+  ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
+#endif
+
+/* Shifts */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
+}
+
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Compare and Predicate Generation
+   pcom (integer, unsinged bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, unsinged words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, unsinged double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, unsinged quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
+}
+
+/*pcom (integer, signed bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, signed words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, signed double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, signed quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
+}
+
+/* FRCZ */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_pd (__m128d __A)
+{
+  return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
+}
+
+#endif /* __XOP__ */
+
+#endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
index c0914d3be02..78e3115c291 100644
--- a/gcc/config/m32c/m32c.h
+++ b/gcc/config/m32c/m32c.h
@@ -560,7 +560,6 @@ typedef struct m32c_cumulative_args
 
 #define HAVE_PRE_DECREMENT 1
 #define HAVE_POST_INCREMENT 1
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
 #define MAX_REGS_PER_ADDRESS 1
 
 /* This is passed to the macros below, so that they can be implemented
diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h
index ee0f9f67fca..278ba15c4fa 100644
--- a/gcc/config/m68hc11/m68hc11.h
+++ b/gcc/config/m68hc11/m68hc11.h
@@ -1108,9 +1108,6 @@ extern unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER];
      && (GET_CODE (XEXP (operand, 0)) == POST_INC) \
      && (SP_REG_P (XEXP (XEXP (operand, 0), 0))))
 
-/* 1 if X is an rtx for a constant that is a valid address.  */
-#define CONSTANT_ADDRESS_P(X)	(CONSTANT_P (X))
-
 /* Maximum number of registers that can appear in a valid memory address */
 #define MAX_REGS_PER_ADDRESS	2
 
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
index 0862936b1b4..8db98fc4f46 100644
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -1399,6 +1399,30 @@ flags_in_68881 (void)
   return cc_status.flags & CC_IN_68881;
 }
 
+/* Return true if PARALLEL contains register REGNO.  */
+static bool
+m68k_reg_present_p (const_rtx parallel, unsigned int regno)
+{
+  int i;
+
+  if (REG_P (parallel) && REGNO (parallel) == regno)
+    return true;
+
+  if (GET_CODE (parallel) != PARALLEL)
+    return false;
+
+  for (i = 0; i < XVECLEN (parallel, 0); ++i)
+    {
+      const_rtx x;
+
+      x = XEXP (XVECEXP (parallel, 0, i), 0);
+      if (REG_P (x) && REGNO (x) == regno)
+	return true;
+    }
+
+  return false;
+}
+
 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL_P.  */
 
 static bool
@@ -1411,6 +1435,26 @@ m68k_ok_for_sibcall_p (tree decl, tree exp)
   if (CALL_EXPR_STATIC_CHAIN (exp))
     return false;
 
+  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Check that the return value locations are the same.  For
+	 example that we aren't returning a value from the sibling in
+	 a D0 register but then need to transfer it to a A0 register.  */
+      rtx cfun_value;
+      rtx call_value;
+
+      cfun_value = FUNCTION_VALUE (TREE_TYPE (DECL_RESULT (cfun->decl)),
+				   cfun->decl);
+      call_value = FUNCTION_VALUE (TREE_TYPE (exp), decl);
+
+      /* Check that the values are equal or that the result the callee
+	 function returns is superset of what the current function returns.  */
+      if (!(rtx_equal_p (cfun_value, call_value)
+	    || (REG_P (cfun_value)
+		&& m68k_reg_present_p (call_value, REGNO (cfun_value)))))
+	return false;
+    }
+
   kind = m68k_get_function_kind (current_function_decl);
   if (kind == m68k_fk_normal_function)
     /* We can always sibcall from a normal function, because it's
@@ -5188,6 +5232,9 @@ m68k_libcall_value (enum machine_mode mode)
   return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG);
 }
 
+/* Location in which function value is returned.
+   NOTE: Due to differences in ABIs, don't call this function directly,
+   use FUNCTION_VALUE instead.  */
 rtx
 m68k_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
 {
diff --git a/gcc/config/mep/mep.h b/gcc/config/mep/mep.h
index 8b00a444ce2..9d286e33b94 100644
--- a/gcc/config/mep/mep.h
+++ b/gcc/config/mep/mep.h
@@ -567,8 +567,6 @@ typedef struct
 #define TRAMPOLINE_SIZE 20
 
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 #ifdef REG_OK_STRICT
diff --git a/gcc/config/mips/iris.h b/gcc/config/mips/iris.h
index fce82174e66..373691ee6e1 100644
--- a/gcc/config/mips/iris.h
+++ b/gcc/config/mips/iris.h
@@ -63,9 +63,6 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_FINISH_DECLARE_OBJECT
 #define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object
 
-/* The linker needs a space after "-o".  */
-#define SWITCHES_NEED_SPACES "o"
-
 /* Specify wchar_t types.  */
 #undef WCHAR_TYPE
 #define WCHAR_TYPE (Pmode == DImode ? "int" : "long int")
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 5005bf7f0fb..76fc37bd479 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -1930,7 +1930,7 @@
    (set (match_dup 0) (match_dup 5))
    (set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI))
 
-   ;; Zero-extend OP4.
+   ;; Zero-extend OP0.
    (set (match_dup 0)
 	(ashift:DI (match_dup 0)
 		   (const_int 32)))
@@ -1938,7 +1938,7 @@
 	(lshiftrt:DI (match_dup 0)
 		     (const_int 32)))
 
-   ;; Shift OP0 into place.
+   ;; Shift OP4 into place.
    (set (match_dup 4)
 	(ashift:DI (match_dup 4)
 		   (const_int 32)))
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
index bdbc948ac12..c732aa07180 100644
--- a/gcc/config/mn10300/mn10300.h
+++ b/gcc/config/mn10300/mn10300.h
@@ -600,10 +600,6 @@ struct cum_arg {int nbytes; };
    ? gen_rtx_MEM (Pmode, arg_pointer_rtx) \
    : (rtx) 0)
 
-/* 1 if X is an rtx for a constant that is a valid address.  */
-
-#define CONSTANT_ADDRESS_P(X)   (CONSTANT_P (X) && GET_CODE (X) != CONST_DOUBLE)
-
 /* Maximum number of registers that can appear in a valid memory address.  */
 
 #define MAX_REGS_PER_ADDRESS 2
diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h
index f1b77eaf0c2..384bce4a986 100644
--- a/gcc/config/moxie/moxie.h
+++ b/gcc/config/moxie/moxie.h
@@ -475,10 +475,6 @@ enum reg_class
    an immediate operand on the target machine.  */
 #define LEGITIMATE_CONSTANT_P(X) 1
 
-/* A C expression that is 1 if the RTX X is a constant which is a
-   valid address.  */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
-
 /* A number, the maximum number of registers that can appear in a
    valid memory address.  */
 #define MAX_REGS_PER_ADDRESS 1
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
index 8997612ba5a..fe8c9e8aed3 100644
--- a/gcc/config/pdp11/pdp11.h
+++ b/gcc/config/pdp11/pdp11.h
@@ -594,10 +594,6 @@ extern int may_call_alloca;
 
 #define MAX_REGS_PER_ADDRESS 1
 
-/* Recognize any constant value that is a valid address.  */
-
-#define CONSTANT_ADDRESS_P(X)  CONSTANT_P (X)
-
 /* Nonzero if the constant value X is a legitimate general operand.
    It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
 
diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h
index 44559f22333..4d0c96278e0 100644
--- a/gcc/config/picochip/picochip.h
+++ b/gcc/config/picochip/picochip.h
@@ -471,8 +471,6 @@ extern const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER];
 
 /* Addressing Modes  */
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 /* Legitimize reload address tries machine dependent means of
diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md
index f15b586afb5..52bf7df3621 100644
--- a/gcc/config/rx/constraints.md
+++ b/gcc/config/rx/constraints.md
@@ -55,7 +55,7 @@
 
 ;; This constraint is used by the SUBSI3 pattern because the
 ;; RX SUB instruction can only take a 4-bit unsigned integer
-;; value.
+;; value.  Also used by the MVTIPL instruction.
 (define_constraint "Uint04"
   "@internal An unsigned 4-bit immediate value"
   (and (match_code "const_int")
diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md
index 75cf8ebaed8..d7a363ebb88 100644
--- a/gcc/config/rx/predicates.md
+++ b/gcc/config/rx/predicates.md
@@ -117,16 +117,22 @@
   /* Check that the next element is the first push.  */
   element = XVECEXP (op, 0, 1);
   if (   ! SET_P (element)
+      || ! REG_P (SET_SRC (element))
+      || GET_MODE (SET_SRC (element)) != SImode
       || ! MEM_P (SET_DEST (element))
-      || ! REG_P (XEXP (SET_DEST (element), 0))
-      ||   REGNO (XEXP (SET_DEST (element), 0)) != SP_REG
-      || ! REG_P (SET_SRC (element)))
+      || GET_MODE (SET_DEST (element)) != SImode
+      || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+      || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+      ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+      || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+        != GET_MODE_SIZE (SImode))
     return false;
 
   src_regno = REGNO (SET_SRC (element));
 
   /* Check that the remaining elements use SP-<disp>
-     addressing and incremental register numbers.  */
+     addressing and decreasing register numbers.  */
   for (i = 2; i < count; i++)
     {
       element = XVECEXP (op, 0, i);
@@ -134,7 +140,7 @@
       if (   ! SET_P (element)
 	  || ! REG_P (SET_SRC (element))
 	  || GET_MODE (SET_SRC (element)) != SImode
-	  || REGNO (SET_SRC (element)) != src_regno + (i - 1)
+	  || REGNO (SET_SRC (element)) != src_regno - (i - 1)
 	  || ! MEM_P (SET_DEST (element))
 	  || GET_MODE (SET_DEST (element)) != SImode
 	  || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
@@ -142,7 +148,7 @@
           ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
 	  || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
 	  || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
-	     != (i - 1) * GET_MODE_SIZE (SImode))
+	     != i * GET_MODE_SIZE (SImode))
 	return false;
     }
   return true;
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index cf2b098e83c..885f52581de 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -51,6 +51,8 @@
 #include "target-def.h"
 #include "langhooks.h"
 
+enum rx_cpu_types  rx_cpu_type = RX600;
+
 /* Return true if OP is a reference to an object in a small data area.  */
 
 static bool
@@ -249,7 +251,6 @@ rx_is_mode_dependent_addr (rtx addr)
     }
 }
 
-
 /* A C compound statement to output to stdio stream FILE the
    assembler syntax for an instruction operand that is a memory
    reference whose address is ADDR.  */
@@ -445,8 +446,13 @@ rx_print_operand (FILE * file, rtx op, int letter)
 	fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
       else if (CONST_INT_P (op))
 	{
+	  HOST_WIDE_INT v = INTVAL (op);
+
 	  fprintf (file, "#");
-	  rx_print_integer (file, INTVAL (op) >> 32);
+	  /* Trickery to avoid problems with shifting 32 bits at a time.  */
+	  v = v >> 16;
+	  v = v >> 16;	  
+	  rx_print_integer (file, v);
 	}
       else
 	{
@@ -840,22 +846,20 @@ has_func_attr (const_tree decl, const char * func_attr)
   return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE;
 }
 
-/* Returns true if the provided function has
-   the "[fast_]interrupt" attribute.  */
+/* Returns true if the provided function has the "fast_interrupt" attribute.  */
 
 static inline bool
 is_fast_interrupt_func (const_tree decl)
 {
-  return has_func_attr (decl, "interrupt")
-    || has_func_attr (decl, "fast_interrupt") ;
+  return has_func_attr (decl, "fast_interrupt");
 }
 
-/* Returns true if the provided function has the "exception" attribute.  */
+/* Returns true if the provided function has the "interrupt" attribute.  */
 
 static inline bool
-is_exception_func (const_tree decl)
+is_interrupt_func (const_tree decl)
 {
-  return has_func_attr (decl, "exception");
+  return has_func_attr (decl, "interrupt");
 }
 
 /* Returns true if the provided function has the "naked" attribute.  */
@@ -945,8 +949,8 @@ rx_set_current_function (tree fndecl)
 {
   /* Remember the last target of rx_set_current_function.  */
   static tree rx_previous_fndecl;
-  bool prev_was_interrupt;
-  bool current_is_interrupt;
+  bool prev_was_fast_interrupt;
+  bool current_is_fast_interrupt;
 
   /* Only change the context if the function changes.  This hook is called
      several times in the course of compiling a function, and we don't want
@@ -954,18 +958,19 @@ rx_set_current_function (tree fndecl)
   if (fndecl == rx_previous_fndecl)
     return;
 
-  prev_was_interrupt
+  prev_was_fast_interrupt
     = rx_previous_fndecl
     ? is_fast_interrupt_func (rx_previous_fndecl) : false;
-  current_is_interrupt
+
+  current_is_fast_interrupt
     = fndecl ? is_fast_interrupt_func (fndecl) : false;
       
-  if (prev_was_interrupt != current_is_interrupt)
+  if (prev_was_fast_interrupt != current_is_fast_interrupt)
     {
-      use_fixed_regs = current_is_interrupt;
+      use_fixed_regs = current_is_fast_interrupt;
       target_reinit ();
     }
-      
+
   rx_previous_fndecl = fndecl;
 }
 
@@ -1057,8 +1062,8 @@ rx_get_stack_layout (unsigned int * lowest,
       if (df_regs_ever_live_p (reg)
 	  && (! call_used_regs[reg]
 	      /* Even call clobbered registered must
-		 be pushed inside exception handlers.  */
-	      || is_exception_func (NULL_TREE)))
+		 be pushed inside interrupt handlers.  */
+	      || is_interrupt_func (NULL_TREE)))
 	{
 	  if (low == 0)
 	    low = reg;
@@ -1142,9 +1147,8 @@ rx_emit_stack_pushm (rtx * operands)
   gcc_assert (REG_P (first_push));
 
   asm_fprintf (asm_out_file, "\tpushm\t%s-%s\n",
-	       reg_names [REGNO (first_push)],
-	       reg_names [REGNO (first_push) + last_reg]);
-  
+	       reg_names [REGNO (first_push) - last_reg],
+	       reg_names [REGNO (first_push)]);
 }
 
 /* Generate a PARALLEL that will pass the rx_store_multiple_vector predicate.  */
@@ -1167,14 +1171,30 @@ gen_rx_store_vector (unsigned int low, unsigned int high)
     XVECEXP (vector, 0, i + 1) =
       gen_rtx_SET (SImode,
 		   gen_rtx_MEM (SImode,
-				i == 0 ? stack_pointer_rtx
-				: gen_rtx_MINUS (SImode, stack_pointer_rtx,
-						 GEN_INT (i * UNITS_PER_WORD))),
-		   gen_rtx_REG (SImode, low + i));
-
+				gen_rtx_MINUS (SImode, stack_pointer_rtx,
+					       GEN_INT ((i + 1) * UNITS_PER_WORD))),
+		   gen_rtx_REG (SImode, high - i));
   return vector;
 }
 
+/* Mark INSN as being frame related.  If it is a PARALLEL
+   then mark each element as being frame related as well.  */
+
+static void
+mark_frame_related (rtx insn)
+{
+  RTX_FRAME_RELATED_P (insn) = 1;
+  insn = PATTERN (insn);
+
+  if (GET_CODE (insn) == PARALLEL)
+    {
+      unsigned int i;
+
+      for (i = 0; i < XVECLEN (insn, 0); i++)
+	RTX_FRAME_RELATED_P (XVECEXP (insn, 0, i)) = 1;
+    }
+}
+
 void
 rx_expand_prologue (void)
 {
@@ -1183,6 +1203,7 @@ rx_expand_prologue (void)
   unsigned int mask;
   unsigned int low;
   unsigned int high;
+  unsigned int reg;
   rtx insn;
 
   /* Naked functions use their own, programmer provided prologues.  */
@@ -1196,14 +1217,12 @@ rx_expand_prologue (void)
   /* If we use any of the callee-saved registers, save them now.  */
   if (mask)
     {
-      unsigned int reg;
-
       /* Push registers in reverse order.  */
       for (reg = FIRST_PSEUDO_REGISTER; reg --;)
 	if (mask & (1 << reg))
 	  {
 	    insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg)));
-	    RTX_FRAME_RELATED_P (insn) = 1;
+	    mark_frame_related (insn);
 	  }
     }
   else if (low)
@@ -1214,7 +1233,57 @@ rx_expand_prologue (void)
 	insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1)
 						    * UNITS_PER_WORD),
 					   gen_rx_store_vector (low, high)));
-      RTX_FRAME_RELATED_P (insn) = 1;
+      mark_frame_related (insn);
+    }
+
+  if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER)
+    {
+      unsigned int acc_high, acc_low;
+
+      /* Interrupt handlers have to preserve the accumulator
+	 register if so requested by the user.  Use the first
+         two pushed register as intermediaries.  */
+      if (mask)
+	{
+	  acc_low = acc_high = 0;
+
+	  for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++)
+	    if (mask & (1 << reg))
+	      {
+		if (acc_low == 0)
+		  acc_low = reg;
+		else
+		  {
+		    acc_high = reg;
+		    break;
+		  }
+	      }
+	    
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high != 0);
+
+	  /* Note - the bottom 16 bits of the accumulator are inaccessible.
+	     We just assume that they are zero.  */
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_high)));
+	}
+      else
+	{
+	  acc_low = low;
+	  acc_high = low + 1;
+
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high <= high);
+
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_pushm (GEN_INT (2 * UNITS_PER_WORD),
+				      gen_rx_store_vector (acc_low, acc_high)));
+	}
+
+      frame_size += 2 * UNITS_PER_WORD;
     }
 
   /* If needed, set up the frame pointer.  */
@@ -1270,8 +1339,8 @@ rx_output_function_prologue (FILE * file,
   if (is_fast_interrupt_func (NULL_TREE))
     asm_fprintf (file, "\t; Note: Fast Interrupt Handler\n");
 
-  if (is_exception_func (NULL_TREE))
-    asm_fprintf (file, "\t; Note: Exception Handler\n");
+  if (is_interrupt_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Interrupt Handler\n");
 
   if (is_naked_func (NULL_TREE))
     asm_fprintf (file, "\t; Note: Naked Function\n");
@@ -1382,6 +1451,7 @@ rx_expand_epilogue (bool is_sibcall)
   unsigned int stack_size;
   unsigned int register_mask;
   unsigned int regs_size;
+  unsigned int reg;
   unsigned HOST_WIDE_INT total_size;
 
   if (is_naked_func (NULL_TREE))
@@ -1407,14 +1477,14 @@ rx_expand_epilogue (bool is_sibcall)
        their caller.  Instead they branch to their sibling and allow their
        return instruction to return to this function's parent.
 
-     - Fast interrupt and exception handling functions have to use special
+     - Fast and normal interrupt handling functions have to use special
        return instructions.
 
      - Functions where we have pushed a fragmented set of registers into the
        call-save area must have the same set of registers popped.  */
   if (is_sibcall
       || is_fast_interrupt_func (NULL_TREE)
-      || is_exception_func (NULL_TREE)
+      || is_interrupt_func (NULL_TREE)
       || register_mask)
     {
       /* Cannot use the special instructions - deconstruct by hand.  */
@@ -1422,10 +1492,47 @@ rx_expand_epilogue (bool is_sibcall)
 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
 			       GEN_INT (total_size)));
 
-      if (register_mask)
+      if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER)
 	{
-	  unsigned int reg;
+	  unsigned int acc_low, acc_high;
+
+	  /* Reverse the saving of the accumulator register onto the stack.
+	     Note we must adjust the saved "low" accumulator value as it
+	     is really the middle 32-bits of the accumulator.  */
+	  if (register_mask)
+	    {
+	      acc_low = acc_high = 0;
+	      for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++)
+		if (register_mask & (1 << reg))
+		  {
+		    if (acc_low == 0)
+		      acc_low = reg;
+		    else
+		      {
+			acc_high = reg;
+			break;
+		      }
+		  }
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_high)));
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_low)));
+	    }
+	  else
+	    {
+	      acc_low = low;
+	      acc_high = low + 1;
+	      emit_insn (gen_stack_popm (GEN_INT (2 * UNITS_PER_WORD),
+					 gen_rx_popm_vector (acc_low, acc_high)));
+	    }
+
+	  emit_insn (gen_ashlsi3 (gen_rtx_REG (SImode, acc_low),
+				  gen_rtx_REG (SImode, acc_low),
+				  GEN_INT (16)));
+	  emit_insn (gen_mvtaclo (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvtachi (gen_rtx_REG (SImode, acc_high)));
+	}
 
+      if (register_mask)
+	{
 	  for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg ++)
 	    if (register_mask & (1 << reg))
 	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg)));
@@ -1441,7 +1548,7 @@ rx_expand_epilogue (bool is_sibcall)
 
       if (is_fast_interrupt_func (NULL_TREE))
 	emit_jump_insn (gen_fast_interrupt_return ());
-      else if (is_exception_func (NULL_TREE))
+      else if (is_interrupt_func (NULL_TREE))
 	emit_jump_insn (gen_exception_return ());
       else if (! is_sibcall)
 	emit_jump_insn (gen_simple_return ());
@@ -1670,6 +1777,7 @@ enum rx_builtin
   RX_BUILTIN_MVTACHI,
   RX_BUILTIN_MVTACLO,
   RX_BUILTIN_MVTC,
+  RX_BUILTIN_MVTIPL,
   RX_BUILTIN_RACW,
   RX_BUILTIN_REVW,
   RX_BUILTIN_RMPA,
@@ -1725,6 +1833,7 @@ rx_init_builtins (void)
   ADD_RX_BUILTIN1 (RMPA,    "rmpa",    void,  void);
   ADD_RX_BUILTIN1 (MVFC,    "mvfc",    intSI, integer);
   ADD_RX_BUILTIN2 (MVTC,    "mvtc",    void,  integer, integer);
+  ADD_RX_BUILTIN1 (MVTIPL,  "mvtipl",  void,  integer);
   ADD_RX_BUILTIN1 (RACW,    "racw",    void,  integer);
   ADD_RX_BUILTIN1 (ROUND,   "round",   intSI, float);
   ADD_RX_BUILTIN1 (REVW,    "revw",    intSI, intSI);
@@ -1733,20 +1842,6 @@ rx_init_builtins (void)
 }
 
 static rtx
-rx_expand_builtin_stz (rtx arg, rtx target, rtx (* gen_func)(rtx, rtx))
-{
-  if (! CONST_INT_P (arg))
-    return NULL_RTX;
-
-  if (target == NULL_RTX || ! REG_P (target))
-    target = gen_reg_rtx (SImode);
-
-  emit_insn (gen_func (target, arg));
-
-  return target;
-}
-
-static rtx
 rx_expand_void_builtin_1_arg (rtx arg, rtx (* gen_func)(rtx), bool reg)
 {
   if (reg && ! REG_P (arg))
@@ -1791,6 +1886,21 @@ rx_expand_builtin_mvfc (tree t_arg, rtx target)
 }
 
 static rtx
+rx_expand_builtin_mvtipl (rtx arg)
+{
+  /* The RX610 does not support the MVTIPL instruction.  */
+  if (rx_cpu_type == RX610)
+    return NULL_RTX;
+
+  if (! CONST_INT_P (arg) || ! IN_RANGE (arg, 0, (1 << 4) - 1))
+    return NULL_RTX;
+
+  emit_insn (gen_mvtipl (arg));
+
+  return NULL_RTX;
+}
+
+static rtx
 rx_expand_builtin_mac (tree exp, rtx (* gen_func)(rtx, rtx))
 {
   rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
@@ -1887,6 +1997,7 @@ rx_expand_builtin (tree exp,
     case RX_BUILTIN_RMPA:    emit_insn (gen_rmpa ()); return NULL_RTX;
     case RX_BUILTIN_MVFC:    return rx_expand_builtin_mvfc (arg, target);
     case RX_BUILTIN_MVTC:    return rx_expand_builtin_mvtc (exp);
+    case RX_BUILTIN_MVTIPL:  return rx_expand_builtin_mvtipl (op);
     case RX_BUILTIN_RACW:    return rx_expand_void_builtin_1_arg
 	(op, gen_racw, false);
     case RX_BUILTIN_ROUND:   return rx_expand_builtin_round (op, target);
@@ -1945,7 +2056,7 @@ rx_elf_asm_destructor (rtx symbol, int priority)
   rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */false);
 }
 
-/* Check "interrupt", "exception" and "naked" attributes.  */
+/* Check "fast_interrupt", "interrupt" and "naked" attributes.  */
 
 static tree
 rx_handle_func_attribute (tree * node,
@@ -1975,9 +2086,8 @@ rx_handle_func_attribute (tree * node,
 const struct attribute_spec rx_attribute_table[] =
 {
   /* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler.  */
-  { "interrupt",      0, 0, true, false, false, rx_handle_func_attribute },
   { "fast_interrupt", 0, 0, true, false, false, rx_handle_func_attribute },
-  { "exception",      0, 0, true, false, false, rx_handle_func_attribute },
+  { "interrupt",      0, 0, true, false, false, rx_handle_func_attribute },
   { "naked",          0, 0, true, false, false, rx_handle_func_attribute },
   { NULL,             0, 0, false, false, false, NULL }
 };
@@ -1993,7 +2103,7 @@ static bool
 rx_func_attr_inlinable (const_tree decl)
 {
   return ! is_fast_interrupt_func (decl)
-    &&   ! is_exception_func (decl)
+    &&   ! is_interrupt_func (decl)
     &&   ! is_naked_func (decl);  
 }
 
@@ -2115,6 +2225,20 @@ rx_is_legitimate_constant (rtx x)
 		        ( 1 << (rx_max_constant_size * 8)));
 }
 
+/* This is a tri-state variable.  The default value of 0 means that the user
+   has specified neither -mfpu nor -mnofpu on the command line.  In this case
+   the selection of RX FPU instructions is entirely based upon the size of
+   the floating point object and whether unsafe math optimizations were
+   enabled.  If 32-bit doubles have been enabled then both floats and doubles
+   can make use of FPU instructions, otherwise only floats may do so.
+
+   If the value is 1 then the user has specified -mfpu and the FPU
+   instructions should be used.  Unsafe math optimizations will automatically
+   be enabled and doubles set to 32-bits.  If the value is -1 then -mnofpu
+   has been specified and FPU instructions will not be used, even if unsafe
+   math optimizations have been enabled.  */
+int rx_enable_fpu = 0;
+
 /* Extra processing for target specific command line options.  */
 
 static bool
@@ -2122,6 +2246,27 @@ rx_handle_option (size_t code, const char *  arg ATTRIBUTE_UNUSED, int value)
 {
   switch (code)
     {
+      /* -mfpu enables the use of RX FPU instructions.  This implies the use
+	 of 32-bit doubles and also the enabling of fast math optimizations.
+	 (Since the RX FPU instructions are not IEEE compliant).  The -mnofpu
+	 option disables the use of RX FPU instructions, but does not make
+	 place any constraints on the size of doubles or the use of fast math
+	 optimizations.
+
+	 The selection of 32-bit vs 64-bit doubles is handled by the setting
+	 of the 32BIT_DOUBLES mask in the rx.opt file.  Enabling fast math
+	 optimizations is performed in OVERRIDE_OPTIONS since if it was done
+	 here it could be overridden by a -fno-fast-math option specified
+	 *earlier* on the command line.  (Target specific options are
+	 processed before generic ones).  */
+    case OPT_fpu:
+      rx_enable_fpu = 1;
+      break;
+
+    case OPT_nofpu:
+      rx_enable_fpu = -1;
+      break;
+
     case OPT_mint_register_:
       switch (value)
 	{
@@ -2145,12 +2290,21 @@ rx_handle_option (size_t code, const char *  arg ATTRIBUTE_UNUSED, int value)
       break;
 
     case OPT_mmax_constant_size_:
-      /* Make sure that the the -mmax-constant_size option is in range.  */
+      /* Make sure that the -mmax-constant_size option is in range.  */
       return IN_RANGE (value, 0, 4);
 
+    case OPT_mcpu_:
+    case OPT_patch_:
+      if (strcasecmp (arg, "RX610") == 0)
+	rx_cpu_type = RX610;
+      /* FIXME: Should we check for non-RX cpu names here ?  */
+      break;
+      
     default:
-      return true;
+      break;
     }
+
+  return true;
 }
 
 static int
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index a01e194910b..bb7cf7f1e3e 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -24,18 +24,24 @@
     {                                           \
       builtin_define ("__RX__"); 		\
       builtin_assert ("cpu=RX"); 		\
-      builtin_assert ("machine=RX");		\
+      if (rx_cpu_type == RX610)			\
+        builtin_assert ("machine=RX610");	\
+     else					\
+        builtin_assert ("machine=RX600");	\
       						\
       if (TARGET_BIG_ENDIAN_DATA)		\
 	builtin_define ("__RX_BIG_ENDIAN__");	\
       else					\
 	builtin_define ("__RX_LITTLE_ENDIAN__");\
       						\
-      if (TARGET_64BIT_DOUBLES)			\
-	builtin_define ("__RX_64BIT_DOUBLES__");\
-      else					\
+      if (TARGET_32BIT_DOUBLES)			\
 	builtin_define ("__RX_32BIT_DOUBLES__");\
+      else					\
+	builtin_define ("__RX_64BIT_DOUBLES__");\
       						\
+      if (ALLOW_RX_FPU_INSNS)			\
+	builtin_define ("__RX_FPU_INSNS__");	\
+						\
       if (TARGET_AS100_SYNTAX)			\
 	builtin_define ("__RX_AS100_SYNTAX__"); \
       else					\
@@ -43,6 +49,17 @@
     }                                           \
   while (0)
 
+enum rx_cpu_types
+{
+  RX600,
+  RX610
+};
+
+extern enum rx_cpu_types  rx_cpu_type;
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}"
+
 #undef  STARTFILE_SPEC
 #define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s"
 
@@ -52,7 +69,8 @@
 #undef  ASM_SPEC
 #define ASM_SPEC "\
 %{mbig-endian-data:-mbig-endian-data} \
-%{m64bit-doubles:-m64bit-doubles} \
+%{m32bit-doubles:-m32bit-doubles} \
+%{!m32bit-doubles:-m64bit-doubles} \
 %{msmall-data-limit*:-msmall-data-limit} \
 %{mrelax:-relax} \
 "
@@ -88,16 +106,17 @@
 #define LONG_LONG_TYPE_SIZE		64
 
 #define FLOAT_TYPE_SIZE 		32
-#define DOUBLE_TYPE_SIZE 		(TARGET_64BIT_DOUBLES ? 64 : 32)
+#define DOUBLE_TYPE_SIZE 		(TARGET_32BIT_DOUBLES ? 32 : 64)
 #define LONG_DOUBLE_TYPE_SIZE		DOUBLE_TYPE_SIZE
 
-#ifdef __RX_64BIT_DOUBLES__
-#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   64
-#define LIBGCC2_DOUBLE_TYPE_SIZE	64
-#define LIBGCC2_HAS_DF_MODE		1
-#else
+#ifdef __RX_32BIT_DOUBLES__
+#define LIBGCC2_HAS_DF_MODE		0
 #define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   32
 #define LIBGCC2_DOUBLE_TYPE_SIZE	32
+#else
+#define LIBGCC2_HAS_DF_MODE		1
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   64
+#define LIBGCC2_DOUBLE_TYPE_SIZE	64
 #endif
 
 #define DEFAULT_SIGNED_CHAR		0
@@ -591,7 +610,6 @@ typedef unsigned int CUMULATIVE_ARGS;
 #define PRINT_OPERAND_ADDRESS(FILE, ADDR)	\
   rx_print_operand_address (FILE, ADDR)
 
-
 #define CC_NO_CARRY			0400
 #define NOTICE_UPDATE_CC(EXP, INSN)	rx_notice_update_cc (EXP, INSN)
 
@@ -614,19 +632,28 @@ extern int rx_float_compare_mode;
 #define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \
 				  ? DBX_DEBUG : DWARF2_DEBUG)
 
-#undef  CC1_SPEC
-#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}"
+#define INCOMING_FRAME_SP_OFFSET		4
+#define ARG_POINTER_CFA_OFFSET(FNDECL)		4
+#define FRAME_POINTER_CFA_OFFSET(FNDECL)	4
+
+extern int rx_enable_fpu;
 
 /* For some unknown reason LTO compression is not working, at
    least on my local system.  So set the default compression
-   level to none, for now.  */
+   level to none, for now.
+
+   For an explanation of rx_flag_no_fpu see rx_handle_option().  */
 #define OVERRIDE_OPTIONS			\
   do						\
     {						\
       if (flag_lto_compression_level == -1)	\
         flag_lto_compression_level = 0;		\
+						\
+      if (rx_enable_fpu == 1)			\
+	set_fast_math_flags (true);		\
     }						\
   while (0)
 
 /* This macro is used to decide when RX FPU instructions can be used.  */
-#define ALLOW_RX_FPU_INSNS	flag_unsafe_math_optimizations
+#define ALLOW_RX_FPU_INSNS	((rx_enable_fpu != -1) \
+				 && flag_unsafe_math_optimizations)
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 165da4f41a1..360f6235558 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -27,8 +27,8 @@
 ;; This code iterator is used for sign- and zero- extensions.
 (define_mode_iterator small_int_modes [(HI "") (QI "")])
 
-;; We do not handle DFmode here because by default it is
-;; the same as SFmode, and if -m64bit-doubles is active
+;; We do not handle DFmode here because it is either
+;; the same as SFmode, or if -m64bit-doubles is active
 ;; then all operations on doubles have to be handled by
 ;; library functions.
 (define_mode_iterator register_modes
@@ -75,15 +75,14 @@
    (UNSPEC_BUILTIN_MVTACHI 41)
    (UNSPEC_BUILTIN_MVTACLO 42)
    (UNSPEC_BUILTIN_MVTC    43)
-   (UNSPEC_BUILTIN_MVTCP   44)
-   (UNSPEC_BUILTIN_OPEPC   45)
-   (UNSPEC_BUILTIN_RACW	   46)
-   (UNSPEC_BUILTIN_REVW    47)
-   (UNSPEC_BUILTIN_RMPA	   48)
-   (UNSPEC_BUILTIN_ROUND   49)
-   (UNSPEC_BUILTIN_SAT     50)
-   (UNSPEC_BUILTIN_SETPSW  51)
-   (UNSPEC_BUILTIN_WAIT	   52)
+   (UNSPEC_BUILTIN_MVTIPL  44)
+   (UNSPEC_BUILTIN_RACW	   45)
+   (UNSPEC_BUILTIN_REVW    46)
+   (UNSPEC_BUILTIN_RMPA	   47)
+   (UNSPEC_BUILTIN_ROUND   48)
+   (UNSPEC_BUILTIN_SAT     49)
+   (UNSPEC_BUILTIN_SETPSW  50)
+   (UNSPEC_BUILTIN_WAIT	   51)
   ]
 )
 
@@ -1002,10 +1001,8 @@
    (set_attr "timings" "11,11,11,11,11,33")
    (set_attr "length" "3,4,5,6,7,6")]
 )
-
+
 ;; Floating Point Instructions
-;; These patterns are only enabled with -ffast-math because the RX FPU
-;; cannot handle sub-normal values.
 
 (define_insn "addsf3"
   [(set (match_operand:SF          0 "register_operand"  "=r,r,r")
@@ -1298,7 +1295,6 @@
   [(set_attr "length" "3,6")
    (set_attr "timings" "22")]
 )
-
 
 ;; Block move functions.
 
@@ -1580,8 +1576,8 @@
 
 ;; Move to Accumulator (high)
 (define_insn "mvtachi"
-  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")]
-	      UNSPEC_BUILTIN_MVTACHI)]
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACHI)]
   ""
   "mvtachi\t%0"
   [(set_attr "length" "3")]
@@ -1589,8 +1585,8 @@
 
 ;; Move to Accumulator (low)
 (define_insn "mvtaclo"
-  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")]
-	      UNSPEC_BUILTIN_MVTACLO)]
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACLO)]
   ""
   "mvtaclo\t%0"
   [(set_attr "length" "3")]
@@ -1598,8 +1594,8 @@
 
 ;; Round Accumulator
 (define_insn "racw"
-  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")]
-	      UNSPEC_BUILTIN_RACW)]
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+		       UNSPEC_BUILTIN_RACW)]
   ""
   "racw\t%0"
   [(set_attr "length" "3")]
@@ -1679,7 +1675,7 @@
 
 ;; Move from control register
 (define_insn "mvfc"
-  [(set (match_operand:SI                      0 "register_operand" "=r")
+  [(set (match_operand:SI             0 "register_operand" "=r")
 	(unspec:SI [(match_operand:SI 1 "immediate_operand" "i")]
 		   UNSPEC_BUILTIN_MVFC))]
   ""
@@ -1691,13 +1687,24 @@
 (define_insn "mvtc"
   [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i")
 	       (match_operand:SI 1 "nonmemory_operand" "r,i")]
-	      UNSPEC_BUILTIN_MVTC)
-   (clobber (cc0))]
+	      UNSPEC_BUILTIN_MVTC)]
   ""
   "mvtc\t%1, %C0"
-  [(set_attr "length" "3,7")
-   (set_attr "cc" "clobber")]  ;; Just in case the control
-                               ;; register selected is the psw.
+  [(set_attr "length" "3,7")]
+  ;; Ignore possible clobbering of the comparison flags in the
+  ;; PSW register.  This is a cc0 target so any cc0 setting
+  ;; instruction will always be paired with a cc0 user, without
+  ;; the possibility of this instruction being placed in between
+  ;; them.
+)
+
+;; Move to interrupt priority level
+(define_insn "mvtipl"
+  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "Uint04")]
+	      UNSPEC_BUILTIN_MVTIPL)]
+  ""
+  "mvtipl\t%0"
+  [(set_attr "length" "3")]
 )
 
 ;;---------- Interrupts ------------------------
@@ -1748,27 +1755,6 @@
   [(set_attr "length" "5")]
 )
 
-;; Move to co-processor register
-(define_insn "mvtcp"
-  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i")
-	       (match_operand:SI 1 "nonmemory_operand" "i,r")
-	       (match_operand:SI 2 "immediate_operand" "i,i")]
-	      UNSPEC_BUILTIN_MVTCP)]
-  ""
-  "; mvtcp\t%0, %1, %2"
-  [(set_attr "length" "7,5")]
-)
-
-;; Co-processor operation
-(define_insn "opecp"
-  [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")
-	       (match_operand:SI 1 "immediate_operand" "i")]
-	      UNSPEC_BUILTIN_OPEPC)]
-  ""
-  "; opecp\t%0, %1"
-  [(set_attr "length" "5")]
-)
-
 ;;---------- Misc ------------------------
 
 ;; Required by cfglayout.c...
diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt
index 83e75bfba76..768d565b478 100644
--- a/gcc/config/rx/rx.opt
+++ b/gcc/config/rx/rx.opt
@@ -19,13 +19,31 @@
 ; <http://www.gnu.org/licenses/>.
 ;---------------------------------------------------
 
+m32bit-doubles
+Target RejectNegative Mask(32BIT_DOUBLES)
+Stores doubles in 32 bits.
+
 m64bit-doubles
-Target RejectNegative Mask(64BIT_DOUBLES)
-Store doubles in 64 bits.
+Target RejectNegative InverseMask(32BIT_DOUBLES)
+Store doubles in 64 bits.  This is the default.
 
-m32bit-doubles
-Target RejectNegative InverseMask(64BIT_DOUBLES)
-Stores doubles in 32 bits.  This is the default.
+fpu
+Target RejectNegative Mask(32BIT_DOUBLES) MaskExists
+Enable the use of RX FPU instructions.
+
+nofpu
+Target RejectNegative InverseMask(32BIT_DOUBLES) MaskExists
+Disable the use of RX FPU instructions.
+
+;---------------------------------------------------
+
+mcpu=
+Target RejectNegative Joined Var(rx_cpu_name)
+Specify the target RX cpu type.
+
+patch=
+Target RejectNegative Joined Var(rx_cpu_name)
+Alias for -mcpu.
 
 ;---------------------------------------------------
 
@@ -72,3 +90,9 @@ Maximum size in bytes of constant values allowed as operands.
 mint-register=
 Target RejectNegative Joined UInteger Var(rx_interrupt_registers) Init(0)
 Specifies the number of registers to reserve for interrupt handlers.
+
+;---------------------------------------------------
+
+msave-acc-in-interrupts
+Target Mask(SAVE_ACC_REGISTER)
+Specifies whether interrupt functions should save and restore the accumulator register.
diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx
index 39cda72af57..eb1ca48d3a3 100644
--- a/gcc/config/rx/t-rx
+++ b/gcc/config/rx/t-rx
@@ -20,9 +20,9 @@
 
 # Enable multilibs:
 
-MULTILIB_OPTIONS    = m64bit-doubles mbig-endian-data
-MULTILIB_DIRNAMES   = 64fp           big-endian-data
-MULTILIB_MATCHES    = m64bit-doubles=mieee
+MULTILIB_OPTIONS    = m32bit-doubles mbig-endian-data
+MULTILIB_DIRNAMES   = 32fp           big-endian-data
+MULTILIB_MATCHES    = m32bit-doubles=fpu
 MULTILIB_EXCEPTIONS =
 MULTILIB_EXTRA_OPTS = 
 
diff --git a/gcc/config/score/score.h b/gcc/config/score/score.h
index 0b7af7b2739..cde9c222546 100644
--- a/gcc/config/score/score.h
+++ b/gcc/config/score/score.h
@@ -688,9 +688,6 @@ typedef struct score_args
 #define HAVE_PRE_MODIFY_REG             0
 #define HAVE_POST_MODIFY_REG            0
 
-/* Recognize any constant value that is a valid address.  */
-#define CONSTANT_ADDRESS_P(X)           CONSTANT_P (X)
-
 /* Maximum number of registers that can appear in a valid memory address.  */
 #define MAX_REGS_PER_ADDRESS            1
 
diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h
index 682f7e6f466..fa97e8becdc 100644
--- a/gcc/config/stormy16/stormy16.h
+++ b/gcc/config/stormy16/stormy16.h
@@ -522,8 +522,6 @@ enum reg_class
 
 #define HAVE_PRE_DECREMENT 1
 
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
 #define MAX_REGS_PER_ADDRESS 1
 
 #ifdef REG_OK_STRICT
diff --git a/gcc/config/vax/linux.h b/gcc/config/vax/linux.h
index 1087069adbb..dccbe9cc8ee 100644
--- a/gcc/config/vax/linux.h
+++ b/gcc/config/vax/linux.h
@@ -21,17 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_VERSION
 #define TARGET_VERSION fprintf (stderr, " (VAX GNU/Linux with ELF)");
 
-#define TARGET_OS_CPP_BUILTINS()		\
-  do						\
-    {						\
-	LINUX_TARGET_OS_CPP_BUILTINS();		\
-	if (flag_pic)				\
-	  {					\
-	    builtin_define ("__PIC__");		\
-	    builtin_define ("__pic__");		\
-	  }					\
-    }						\
-  while (0)
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
 
 /* We use GAS, G-float double and want new DI patterns.  */
 #undef TARGET_DEFAULT
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2009-11-06 22:51:05 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2009-11-06 22:51:05 +0000
commit	e1647522f93999450cc558341bb2066ca26e070f (patch)
tree	ec9704394836b7bb5123d7d8c1d9647eace77c5d /gcc/config
parent	035ef3e66f39f67a3fab95825e0fbc750bc8160d (diff)
download	gcc-e1647522f93999450cc558341bb2066ca26e070f.tar.gz