summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2009-11-06 22:51:05 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2009-11-06 22:51:05 +0000
commite1647522f93999450cc558341bb2066ca26e070f (patch)
treeec9704394836b7bb5123d7d8c1d9647eace77c5d /gcc/config
parent035ef3e66f39f67a3fab95825e0fbc750bc8160d (diff)
downloadgcc-e1647522f93999450cc558341bb2066ca26e070f.tar.gz
2009-11-06 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 153975 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@153981 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/alpha/osf.h4
-rw-r--r--gcc/config/arm/arm.c189
-rw-r--r--gcc/config/arm/arm.h73
-rw-r--r--gcc/config/arm/arm.md42
-rw-r--r--gcc/config/arm/arm_neon.h100
-rw-r--r--gcc/config/arm/bpabi.h2
-rw-r--r--gcc/config/arm/fpa.md4
-rw-r--r--gcc/config/arm/linux-elf.h2
-rw-r--r--gcc/config/arm/neon-gen.ml3
-rw-r--r--gcc/config/arm/neon.ml3
-rw-r--r--gcc/config/arm/netbsd-elf.h2
-rw-r--r--gcc/config/arm/vxworks.h2
-rw-r--r--gcc/config/avr/avr.h2
-rw-r--r--gcc/config/bfin/bfin.h3
-rw-r--r--gcc/config/cris/cris.h2
-rw-r--r--gcc/config/fr30/fr30.h10
-rw-r--r--gcc/config/frv/frv.h10
-rw-r--r--gcc/config/i386/cpuid.h4
-rw-r--r--gcc/config/i386/fma4intrin.h9
-rw-r--r--gcc/config/i386/i386-c.c4
-rw-r--r--gcc/config/i386/i386.c865
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/i386.md228
-rw-r--r--gcc/config/i386/i386.opt8
-rw-r--r--gcc/config/i386/ia32intrin.h2
-rw-r--r--gcc/config/i386/linux.h3
-rw-r--r--gcc/config/i386/linux64.h3
-rw-r--r--gcc/config/i386/lwpintrin.h109
-rw-r--r--gcc/config/i386/sse.md1541
-rw-r--r--gcc/config/i386/x86intrin.h16
-rw-r--r--gcc/config/i386/xopintrin.h771
-rw-r--r--gcc/config/m32c/m32c.h1
-rw-r--r--gcc/config/m68hc11/m68hc11.h3
-rw-r--r--gcc/config/m68k/m68k.c47
-rw-r--r--gcc/config/mep/mep.h2
-rw-r--r--gcc/config/mips/iris.h3
-rw-r--r--gcc/config/mips/mips.md4
-rw-r--r--gcc/config/mn10300/mn10300.h4
-rw-r--r--gcc/config/moxie/moxie.h4
-rw-r--r--gcc/config/pdp11/pdp11.h4
-rw-r--r--gcc/config/picochip/picochip.h2
-rw-r--r--gcc/config/rx/constraints.md2
-rw-r--r--gcc/config/rx/predicates.md18
-rw-r--r--gcc/config/rx/rx.c268
-rw-r--r--gcc/config/rx/rx.h59
-rw-r--r--gcc/config/rx/rx.md82
-rw-r--r--gcc/config/rx/rx.opt34
-rw-r--r--gcc/config/rx/t-rx6
-rw-r--r--gcc/config/score/score.h3
-rw-r--r--gcc/config/stormy16/stormy16.h2
-rw-r--r--gcc/config/vax/linux.h12
51 files changed, 4036 insertions, 542 deletions
diff --git a/gcc/config/alpha/osf.h b/gcc/config/alpha/osf.h
index 2b5165c0754..81c12aa14fc 100644
--- a/gcc/config/alpha/osf.h
+++ b/gcc/config/alpha/osf.h
@@ -167,10 +167,6 @@ __enable_execute_stack (void *addr) \
#define LD_INIT_SWITCH "-init"
#define LD_FINI_SWITCH "-fini"
-/* The linker needs a space after "-o". This allows -oldstyle_liblookup to
- be passed to ld. */
-#define SWITCHES_NEED_SPACES "o"
-
/* Select a format to encode pointers in exception handling data. CODE
is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
true if the symbol may be affected by dynamic relocations.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 25e433cd3a4..4c7fcb65854 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -524,14 +524,11 @@ enum processor_type arm_tune = arm_none;
/* The default processor used if not overridden by commandline. */
static enum processor_type arm_default_cpu = arm_none;
-/* Which floating point model to use. */
-enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available. */
-enum fputype arm_fpu_arch;
-
/* Which floating point hardware to schedule for. */
-enum fputype arm_fpu_tune;
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use. */
+const struct arm_fpu_desc *arm_fpu_desc;
/* Whether to use floating point hardware. */
enum float_abi_type arm_float_abi;
@@ -809,46 +806,21 @@ static struct arm_cpu_select arm_select[] =
char arm_arch_name[] = "__ARM_ARCH_0UNK__";
-struct fpu_desc
-{
- const char * name;
- enum fputype fpu;
-};
-
-
/* Available values for -mfpu=. */
-static const struct fpu_desc all_fpus[] =
-{
- {"fpa", FPUTYPE_FPA},
- {"fpe2", FPUTYPE_FPA_EMU2},
- {"fpe3", FPUTYPE_FPA_EMU2},
- {"maverick", FPUTYPE_MAVERICK},
- {"vfp", FPUTYPE_VFP},
- {"vfp3", FPUTYPE_VFP3},
- {"vfpv3", FPUTYPE_VFP3},
- {"vfpv3-d16", FPUTYPE_VFP3D16},
- {"neon", FPUTYPE_NEON},
- {"neon-fp16", FPUTYPE_NEON_FP16}
-};
-
-
-/* Floating point models used by the different hardware.
- See fputype in arm.h. */
-
-static const enum arm_fp_model fp_model_for_fpu[] =
-{
- /* No FP hardware. */
- ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
- ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
- ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
- ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
+static const struct arm_fpu_desc all_fpus[] =
+{
+ {"fpa", ARM_FP_MODEL_FPA, 0, 0, false, false},
+ {"fpe2", ARM_FP_MODEL_FPA, 2, 0, false, false},
+ {"fpe3", ARM_FP_MODEL_FPA, 3, 0, false, false},
+ {"maverick", ARM_FP_MODEL_MAVERICK, 0, 0, false, false},
+ {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+ {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+ {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+ {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+ {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+ /* Compatibility aliases. */
+ {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
};
@@ -1615,7 +1587,6 @@ arm_override_options (void)
if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
error ("iwmmxt abi requires an iwmmxt capable cpu");
- arm_fp_model = ARM_FP_MODEL_UNKNOWN;
if (target_fpu_name == NULL && target_fpe_name != NULL)
{
if (streq (target_fpe_name, "2"))
@@ -1626,46 +1597,52 @@ arm_override_options (void)
error ("invalid floating point emulation option: -mfpe=%s",
target_fpe_name);
}
- if (target_fpu_name != NULL)
- {
- /* The user specified a FPU. */
- for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
- {
- if (streq (all_fpus[i].name, target_fpu_name))
- {
- arm_fpu_arch = all_fpus[i].fpu;
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- break;
- }
- }
- if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
- error ("invalid floating point option: -mfpu=%s", target_fpu_name);
- }
- else
+
+ if (target_fpu_name == NULL)
{
#ifdef FPUTYPE_DEFAULT
- /* Use the default if it is specified for this platform. */
- arm_fpu_arch = FPUTYPE_DEFAULT;
- arm_fpu_tune = FPUTYPE_DEFAULT;
+ target_fpu_name = FPUTYPE_DEFAULT;
#else
- /* Pick one based on CPU type. */
- /* ??? Some targets assume FPA is the default.
- if ((insn_flags & FL_VFP) != 0)
- arm_fpu_arch = FPUTYPE_VFP;
- else
- */
if (arm_arch_cirrus)
- arm_fpu_arch = FPUTYPE_MAVERICK;
+ target_fpu_name = "maverick";
else
- arm_fpu_arch = FPUTYPE_FPA_EMU2;
+ target_fpu_name = "fpe2";
#endif
- if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
- arm_fpu_tune = FPUTYPE_FPA;
+ }
+
+ arm_fpu_desc = NULL;
+ for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
+ {
+ if (streq (all_fpus[i].name, target_fpu_name))
+ {
+ arm_fpu_desc = &all_fpus[i];
+ break;
+ }
+ }
+ if (!arm_fpu_desc)
+ error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+
+ switch (arm_fpu_desc->model)
+ {
+ case ARM_FP_MODEL_FPA:
+ if (arm_fpu_desc->rev == 2)
+ arm_fpu_attr = FPU_FPE2;
+ else if (arm_fpu_desc->rev == 3)
+ arm_fpu_attr = FPU_FPE3;
else
- arm_fpu_tune = arm_fpu_arch;
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
- gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
+ arm_fpu_attr = FPU_FPA;
+ break;
+
+ case ARM_FP_MODEL_MAVERICK:
+ arm_fpu_attr = FPU_MAVERICK;
+ break;
+
+ case ARM_FP_MODEL_VFP:
+ arm_fpu_attr = FPU_VFP;
+ break;
+
+ default:
+ gcc_unreachable();
}
if (target_float_abi_name != NULL)
@@ -1687,7 +1664,7 @@ arm_override_options (void)
arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
if (TARGET_AAPCS_BASED
- && (arm_fp_model == ARM_FP_MODEL_FPA))
+ && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
error ("FPA is unsupported in the AAPCS");
if (TARGET_AAPCS_BASED)
@@ -1715,7 +1692,7 @@ arm_override_options (void)
/* If soft-float is specified then don't use FPU. */
if (TARGET_SOFT_FLOAT)
- arm_fpu_arch = FPUTYPE_NONE;
+ arm_fpu_attr = FPU_NONE;
if (TARGET_AAPCS_BASED)
{
@@ -1742,8 +1719,7 @@ arm_override_options (void)
/* For arm2/3 there is no need to do any scheduling if there is only
a floating point emulator, or we are doing software floating-point. */
if ((TARGET_SOFT_FLOAT
- || arm_fpu_tune == FPUTYPE_FPA_EMU2
- || arm_fpu_tune == FPUTYPE_FPA_EMU3)
+ || (TARGET_FPA && arm_fpu_desc->rev))
&& (tune_flags & FL_MODE32) == 0)
flag_schedule_insns = flag_schedule_insns_after_reload = 0;
@@ -13305,7 +13281,7 @@ arm_output_epilogue (rtx sibling)
/* This variable is for the Virtual Frame Pointer, not VFP regs. */
int vfp_offset = offsets->frame;
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -13528,7 +13504,7 @@ arm_output_epilogue (rtx sibling)
SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
}
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -14254,7 +14230,7 @@ arm_save_coproc_regs(void)
/* Save any floating point call-saved registers used by this
function. */
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ if (TARGET_FPA_EMU2)
{
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
@@ -19736,45 +19712,8 @@ arm_file_start (void)
}
else
{
- int set_float_abi_attributes = 0;
- switch (arm_fpu_arch)
- {
- case FPUTYPE_FPA:
- fpu_name = "fpa";
- break;
- case FPUTYPE_FPA_EMU2:
- fpu_name = "fpe2";
- break;
- case FPUTYPE_FPA_EMU3:
- fpu_name = "fpe3";
- break;
- case FPUTYPE_MAVERICK:
- fpu_name = "maverick";
- break;
- case FPUTYPE_VFP:
- fpu_name = "vfp";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_VFP3D16:
- fpu_name = "vfpv3-d16";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_VFP3:
- fpu_name = "vfpv3";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_NEON:
- fpu_name = "neon";
- set_float_abi_attributes = 1;
- break;
- case FPUTYPE_NEON_FP16:
- fpu_name = "neon-fp16";
- set_float_abi_attributes = 1;
- break;
- default:
- abort();
- }
- if (set_float_abi_attributes)
+ fpu_name = arm_fpu_desc->name;
+ if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
{
if (TARGET_HARD_FLOAT)
asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 9272ca51cba..2dfd22df45c 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -190,9 +190,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT)
/* Use hardware floating point calling convention. */
#define TARGET_HARD_FLOAT_ABI (arm_float_abi == ARM_FLOAT_ABI_HARD)
-#define TARGET_FPA (arm_fp_model == ARM_FP_MODEL_FPA)
-#define TARGET_MAVERICK (arm_fp_model == ARM_FP_MODEL_MAVERICK)
-#define TARGET_VFP (arm_fp_model == ARM_FP_MODEL_VFP)
+#define TARGET_FPA (arm_fpu_desc->model == ARM_FP_MODEL_FPA)
+#define TARGET_MAVERICK (arm_fpu_desc->model == ARM_FP_MODEL_MAVERICK)
+#define TARGET_VFP (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
#define TARGET_IWMMXT (arm_arch_iwmmxt)
#define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_32BIT)
#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
@@ -216,6 +216,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_THUMB2 (TARGET_THUMB && arm_arch_thumb2)
/* Thumb-1 only. */
#define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm)
+/* FPA emulator without LFM. */
+#define TARGET_FPA_EMU2 (TARGET_FPA && arm_fpu_desc->rev == 2)
/* The following two macros concern the ability to execute coprocessor
instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently
@@ -223,27 +225,21 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
to be more careful with TARGET_NEON as noted below. */
/* FPU is has the full VFPv3/NEON register file of 32 D registers. */
-#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \
- && (arm_fpu_arch == FPUTYPE_VFP3 \
- || arm_fpu_arch == FPUTYPE_NEON \
- || arm_fpu_arch == FPUTYPE_NEON_FP16))
+#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32)
/* FPU supports VFPv3 instructions. */
-#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \
- && (arm_fpu_arch == FPUTYPE_VFP3D16 \
- || TARGET_VFPD32))
+#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
/* FPU supports NEON/VFP half-precision floating-point. */
-#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16)
+#define TARGET_NEON_FP16 \
+ (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16)
/* FPU supports Neon instructions. The setting of this macro gets
revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT
and TARGET_HARD_FLOAT to ensure that NEON instructions are
available. */
#define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \
- && arm_fp_model == ARM_FP_MODEL_VFP \
- && (arm_fpu_arch == FPUTYPE_NEON \
- || arm_fpu_arch == FPUTYPE_NEON_FP16))
+ && TARGET_VFP && arm_fpu_desc->neon)
/* "DSP" multiply instructions, eg. SMULxy. */
#define TARGET_DSP_MULTIPLY \
@@ -300,42 +296,25 @@ enum arm_fp_model
ARM_FP_MODEL_VFP
};
-extern enum arm_fp_model arm_fp_model;
-
-/* Which floating point hardware is available. Also update
- fp_model_for_fpu in arm.c when adding entries to this list. */
-enum fputype
+enum vfp_reg_type
{
- /* No FP hardware. */
- FPUTYPE_NONE,
- /* Full FPA support. */
- FPUTYPE_FPA,
- /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM). */
- FPUTYPE_FPA_EMU2,
- /* Emulated FPA hardware, Issue 3 emulator. */
- FPUTYPE_FPA_EMU3,
- /* Cirrus Maverick floating point co-processor. */
- FPUTYPE_MAVERICK,
- /* VFP. */
- FPUTYPE_VFP,
- /* VFPv3-D16. */
- FPUTYPE_VFP3D16,
- /* VFPv3. */
- FPUTYPE_VFP3,
- /* Neon. */
- FPUTYPE_NEON,
- /* Neon with half-precision float extensions. */
- FPUTYPE_NEON_FP16
+ VFP_REG_D16,
+ VFP_REG_D32,
+ VFP_REG_SINGLE
};
-/* Recast the floating point class to be the floating point attribute. */
-#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune)
-
-/* What type of floating point to tune for */
-extern enum fputype arm_fpu_tune;
-
-/* What type of floating point instructions are available */
-extern enum fputype arm_fpu_arch;
+extern const struct arm_fpu_desc
+{
+ const char *name;
+ enum arm_fp_model model;
+ int rev;
+ enum vfp_reg_type regs;
+ int neon;
+ int fp16;
+} *arm_fpu_desc;
+
+/* Which floating point hardware to schedule for. */
+extern int arm_fpu_attr;
enum float_abi_type
{
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b8bf700242b..52edcbaa17b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -160,7 +160,7 @@
; Floating Point Unit. If we only have floating point emulation, then there
; is no point in scheduling the floating point insns. (Well, for best
; performance we should try and group them together).
-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16"
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
(const (symbol_ref "arm_fpu_attr")))
; LENGTH of an instruction (in bytes)
@@ -6770,6 +6770,7 @@
(const_int 6)
(const_int 8))))]
)
+
(define_insn "*movsi_cbranchsi4"
[(set (pc)
(if_then_else
@@ -6833,6 +6834,45 @@
(const_int 10)))))]
)
+(define_peephole2
+ [(set (match_operand:SI 0 "low_register_operand" "")
+ (match_operand:SI 1 "low_register_operand" ""))
+ (set (pc)
+ (if_then_else (match_operator 2 "arm_comparison_operator"
+ [(match_dup 1) (const_int 0)])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ "TARGET_THUMB1"
+ [(parallel
+ [(set (pc)
+ (if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)])
+ (label_ref (match_dup 3))
+ (pc)))
+ (set (match_dup 0) (match_dup 1))])]
+ ""
+)
+
+;; Sigh! This variant shouldn't be needed, but combine often fails to
+;; merge cases like this because the op1 is a hard register in
+;; CLASS_LIKELY_SPILLED_P.
+(define_peephole2
+ [(set (match_operand:SI 0 "low_register_operand" "")
+ (match_operand:SI 1 "low_register_operand" ""))
+ (set (pc)
+ (if_then_else (match_operator 2 "arm_comparison_operator"
+ [(match_dup 0) (const_int 0)])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ "TARGET_THUMB1"
+ [(parallel
+ [(set (pc)
+ (if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)])
+ (label_ref (match_dup 3))
+ (pc)))
+ (set (match_dup 0) (match_dup 1))])]
+ ""
+)
+
(define_insn "*negated_cbranchsi4"
[(set (pc)
(if_then_else
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index faaaf7bca39..ccfc7426077 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -61,7 +61,7 @@ typedef __builtin_neon_uhi uint16x8_t __attribute__ ((__vector_size__ (16)));
typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16)));
typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16)));
-typedef __builtin_neon_sf float32_t;
+typedef float float32_t;
typedef __builtin_neon_poly8 poly8_t;
typedef __builtin_neon_poly16 poly16_t;
@@ -5085,7 +5085,7 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
{
- return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c);
+ return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5151,7 +5151,7 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
{
- return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c);
+ return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5283,7 +5283,7 @@ vdup_n_s32 (int32_t __a)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vdup_n_f32 (float32_t __a)
{
- return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+ return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5349,7 +5349,7 @@ vdupq_n_s32 (int32_t __a)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vdupq_n_f32 (float32_t __a)
{
- return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+ return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -5415,7 +5415,7 @@ vmov_n_s32 (int32_t __a)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmov_n_f32 (float32_t __a)
{
- return (float32x2_t)__builtin_neon_vdup_nv2sf (__a);
+ return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -5481,7 +5481,7 @@ vmovq_n_s32 (int32_t __a)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmovq_n_f32 (float32_t __a)
{
- return (float32x4_t)__builtin_neon_vdup_nv4sf (__a);
+ return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -6591,7 +6591,7 @@ vmul_n_s32 (int32x2_t __a, int32_t __b)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_n_f32 (float32x2_t __a, float32_t __b)
{
- return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3);
+ return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6621,7 +6621,7 @@ vmulq_n_s32 (int32x4_t __a, int32_t __b)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_n_f32 (float32x4_t __a, float32_t __b)
{
- return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3);
+ return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6735,7 +6735,7 @@ vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
{
- return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3);
+ return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6765,7 +6765,7 @@ vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
{
- return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3);
+ return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -6831,7 +6831,7 @@ vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
{
- return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3);
+ return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
@@ -6861,7 +6861,7 @@ vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
{
- return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3);
+ return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
@@ -7851,7 +7851,7 @@ vld1_s64 (const int64_t * __a)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t * __a)
{
- return (float32x2_t)__builtin_neon_vld1v2sf (__a);
+ return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -7917,7 +7917,7 @@ vld1q_s64 (const int64_t * __a)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t * __a)
{
- return (float32x4_t)__builtin_neon_vld1v4sf (__a);
+ return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -7977,7 +7977,7 @@ vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
{
- return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c);
+ return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8043,7 +8043,7 @@ vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
{
- return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c);
+ return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8109,7 +8109,7 @@ vld1_dup_s32 (const int32_t * __a)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_dup_f32 (const float32_t * __a)
{
- return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a);
+ return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -8175,7 +8175,7 @@ vld1q_dup_s32 (const int32_t * __a)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32 (const float32_t * __a)
{
- return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a);
+ return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -8247,7 +8247,7 @@ vst1_s64 (int64_t * __a, int64x1_t __b)
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_f32 (float32_t * __a, float32x2_t __b)
{
- __builtin_neon_vst1v2sf (__a, __b);
+ __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8313,7 +8313,7 @@ vst1q_s64 (int64_t * __a, int64x2_t __b)
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_f32 (float32_t * __a, float32x4_t __b)
{
- __builtin_neon_vst1v4sf (__a, __b);
+ __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8373,7 +8373,7 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
{
- __builtin_neon_vst1_lanev2sf (__a, __b, __c);
+ __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8439,7 +8439,7 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
__extension__ static __inline void __attribute__ ((__always_inline__))
vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
{
- __builtin_neon_vst1_lanev4sf (__a, __b, __c);
+ __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8512,7 +8512,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_f32 (const float32_t * __a)
{
union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
- __rv.__o = __builtin_neon_vld2v2sf (__a);
+ __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -8600,7 +8600,7 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vld2q_f32 (const float32_t * __a)
{
union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
- __rv.__o = __builtin_neon_vld2v4sf (__a);
+ __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -8676,7 +8676,7 @@ vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
{
union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
- __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c);
+ __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
return __rv.__i;
}
@@ -8748,7 +8748,7 @@ vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
{
union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
- __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c);
+ __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
return __rv.__i;
}
@@ -8807,7 +8807,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vld2_dup_f32 (const float32_t * __a)
{
union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
- __rv.__o = __builtin_neon_vld2_dupv2sf (__a);
+ __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -8892,7 +8892,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_f32 (float32_t * __a, float32x2x2_t __b)
{
union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
- __builtin_neon_vst2v2sf (__a, __bu.__o);
+ __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -8969,7 +8969,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_f32 (float32_t * __a, float32x4x2_t __b)
{
union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
- __builtin_neon_vst2v4sf (__a, __bu.__o);
+ __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9032,7 +9032,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
{
union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
- __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c);
+ __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9088,7 +9088,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
{
union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
- __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c);
+ __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9140,7 +9140,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_f32 (const float32_t * __a)
{
union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
- __rv.__o = __builtin_neon_vld3v2sf (__a);
+ __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -9228,7 +9228,7 @@ __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
vld3q_f32 (const float32_t * __a)
{
union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
- __rv.__o = __builtin_neon_vld3v4sf (__a);
+ __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -9304,7 +9304,7 @@ vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
{
union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
- __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c);
+ __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
return __rv.__i;
}
@@ -9376,7 +9376,7 @@ vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
{
union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
- __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c);
+ __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
return __rv.__i;
}
@@ -9435,7 +9435,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
vld3_dup_f32 (const float32_t * __a)
{
union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
- __rv.__o = __builtin_neon_vld3_dupv2sf (__a);
+ __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -9520,7 +9520,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_f32 (float32_t * __a, float32x2x3_t __b)
{
union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
- __builtin_neon_vst3v2sf (__a, __bu.__o);
+ __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9597,7 +9597,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_f32 (float32_t * __a, float32x4x3_t __b)
{
union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
- __builtin_neon_vst3v4sf (__a, __bu.__o);
+ __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9660,7 +9660,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
{
union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
- __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c);
+ __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9716,7 +9716,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
{
union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
- __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c);
+ __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -9768,7 +9768,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_f32 (const float32_t * __a)
{
union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
- __rv.__o = __builtin_neon_vld4v2sf (__a);
+ __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -9856,7 +9856,7 @@ __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
vld4q_f32 (const float32_t * __a)
{
union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
- __rv.__o = __builtin_neon_vld4v4sf (__a);
+ __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -9932,7 +9932,7 @@ vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
{
union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
- __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c);
+ __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
return __rv.__i;
}
@@ -10004,7 +10004,7 @@ vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
{
union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
- __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c);
+ __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
return __rv.__i;
}
@@ -10063,7 +10063,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
vld4_dup_f32 (const float32_t * __a)
{
union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
- __rv.__o = __builtin_neon_vld4_dupv2sf (__a);
+ __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a);
return __rv.__i;
}
@@ -10148,7 +10148,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_f32 (float32_t * __a, float32x2x4_t __b)
{
union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
- __builtin_neon_vst4v2sf (__a, __bu.__o);
+ __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10225,7 +10225,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_f32 (float32_t * __a, float32x4x4_t __b)
{
union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
- __builtin_neon_vst4v4sf (__a, __bu.__o);
+ __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10288,7 +10288,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
{
union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
- __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c);
+ __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
@@ -10344,7 +10344,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__))
vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
{
union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
- __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c);
+ __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
}
__extension__ static __inline void __attribute__ ((__always_inline__))
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index 8d3afbf55fd..ba206022b75 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -30,7 +30,7 @@
/* Section 4.1 of the AAPCS requires the use of VFP format. */
#undef FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
/* TARGET_BIG_ENDIAN_DEFAULT is set in
config.gcc for big endian configurations. */
diff --git a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md
index fcd92b002d7..515de43d28b 100644
--- a/gcc/config/arm/fpa.md
+++ b/gcc/config/arm/fpa.md
@@ -599,10 +599,10 @@
{
default:
case 0: return \"mvf%?e\\t%0, %1\";
- case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ case 1: if (TARGET_FPA_EMU2)
return \"ldf%?e\\t%0, %1\";
return \"lfm%?\\t%0, 1, %1\";
- case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
+ case 2: if (TARGET_FPA_EMU2)
return \"stf%?e\\t%1, %0\";
return \"sfm%?\\t%1, 1, %0\";
}
diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h
index 07455ee87fd..9fdca414e8e 100644
--- a/gcc/config/arm/linux-elf.h
+++ b/gcc/config/arm/linux-elf.h
@@ -98,7 +98,7 @@
/* NWFPE always understands FPA instructions. */
#undef FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3
+#define FPUTYPE_DEFAULT "fpe3"
/* Call the function profiler with a given profile label. */
#undef ARM_FUNCTION_PROFILER
diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml
index 9c8e2a89b86..112c8be6e3b 100644
--- a/gcc/config/arm/neon-gen.ml
+++ b/gcc/config/arm/neon-gen.ml
@@ -122,6 +122,7 @@ let rec signed_ctype = function
| T_uint16 | T_int16 -> T_intHI
| T_uint32 | T_int32 -> T_intSI
| T_uint64 | T_int64 -> T_intDI
+ | T_float32 -> T_floatSF
| T_poly8 -> T_intQI
| T_poly16 -> T_intHI
| T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
@@ -320,7 +321,7 @@ let deftypes () =
typeinfo;
Format.print_newline ();
(* Extra types not in <stdint.h>. *)
- Format.printf "typedef __builtin_neon_sf float32_t;\n";
+ Format.printf "typedef float float32_t;\n";
Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 10393b33ebc..114097d22a7 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -50,7 +50,7 @@ type vectype = T_int8x8 | T_int8x16
| T_ptrto of vectype | T_const of vectype
| T_void | T_intQI
| T_intHI | T_intSI
- | T_intDI
+ | T_intDI | T_floatSF
(* The meanings of the following are:
TImode : "Tetra", two registers (four words).
@@ -1693,6 +1693,7 @@ let string_of_vectype vt =
| T_intHI -> "__builtin_neon_hi"
| T_intSI -> "__builtin_neon_si"
| T_intDI -> "__builtin_neon_di"
+ | T_floatSF -> "__builtin_neon_sf"
| T_arrayof (num, base) ->
let basename = name (fun x -> x) base in
affix (Printf.sprintf "%sx%d" basename num)
diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h
index 4c06fa1cb3b..9cf186b338d 100644
--- a/gcc/config/arm/netbsd-elf.h
+++ b/gcc/config/arm/netbsd-elf.h
@@ -153,5 +153,5 @@ do \
while (0)
#undef FPUTYPE_DEFAULT
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 8879fedb7d7..aa7e197bc5d 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -97,7 +97,7 @@ along with GCC; see the file COPYING3. If not see
/* There is no default multilib. */
#undef MULTILIB_DEFAULTS
-#define FPUTYPE_DEFAULT FPUTYPE_VFP
+#define FPUTYPE_DEFAULT "vfp"
#undef FUNCTION_PROFILER
#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 0927e3928c1..782ad11627b 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -406,8 +406,6 @@ extern int avr_reg_order[];
#define HAVE_POST_INCREMENT 1
#define HAVE_PRE_DECREMENT 1
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
#define MAX_REGS_PER_ADDRESS 1
#define REG_OK_FOR_BASE_NOSTRICT_P(X) \
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 03a279036f3..365680ee9fa 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -911,9 +911,6 @@ typedef struct {
/* Addressing Modes */
-/* Recognize any constant value that is a valid address. */
-#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X))
-
/* Nonzero if the constant value X is a legitimate general operand.
symbol_ref are not legitimate and will be put into constant pool.
See force_const_mem().
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 0fea7d77b39..3c426b74ae5 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -950,8 +950,6 @@ struct cum_args {int regs;};
#define HAVE_POST_INCREMENT 1
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
/* Must be a compile-time constant, so we go with the highest value
among all CRIS variants. */
#define MAX_REGS_PER_ADDRESS 2
diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h
index 20e157173d8..5e6237895b5 100644
--- a/gcc/config/fr30/fr30.h
+++ b/gcc/config/fr30/fr30.h
@@ -741,16 +741,6 @@ enum reg_class
/*}}}*/
/*{{{ Addressing Modes. */
-/* A C expression that is 1 if the RTX X is a constant which is a valid
- address. On most machines, this can be defined as `CONSTANT_P (X)', but a
- few machines are more restrictive in which constant addresses are supported.
-
- `CONSTANT_P' accepts integer-values expressions whose values are not
- explicitly known, such as `symbol_ref', `label_ref', and `high' expressions
- and `const' arithmetic expressions, in addition to `const_int' and
- `const_double' expressions. */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
/* A number, the maximum number of registers that can appear in a valid memory
address. Note that it is up to you to specify a value equal to the maximum
number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept. */
diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h
index d48aa1ef17d..d5a7a4a6670 100644
--- a/gcc/config/frv/frv.h
+++ b/gcc/config/frv/frv.h
@@ -1927,16 +1927,6 @@ __asm__("\n" \
/* Addressing Modes. */
-/* A C expression that is 1 if the RTX X is a constant which is a valid
- address. On most machines, this can be defined as `CONSTANT_P (X)', but a
- few machines are more restrictive in which constant addresses are supported.
-
- `CONSTANT_P' accepts integer-values expressions whose values are not
- explicitly known, such as `symbol_ref', `label_ref', and `high' expressions
- and `const' arithmetic expressions, in addition to `const_int' and
- `const_double' expressions. */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
/* A number, the maximum number of registers that can appear in a valid memory
address. Note that it is up to you to specify a value equal to the maximum
number that `TARGET_LEGITIMATE_ADDRESS_P' would ever accept. */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 49acfa780e4..21f0e3184ef 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -46,9 +46,11 @@
/* Extended Features */
/* %ecx */
+#define bit_FMA4 (1 << 16)
#define bit_LAHF_LM (1 << 0)
+#define bit_LWP (1 << 15)
#define bit_SSE4a (1 << 6)
-#define bit_FMA4 (1 << 16)
+#define bit_XOP (1 << 11)
/* %edx */
#define bit_LM (1 << 29)
diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h
index 42782ade0ed..2bd411a0f05 100644
--- a/gcc/config/i386/fma4intrin.h
+++ b/gcc/config/i386/fma4intrin.h
@@ -35,15 +35,6 @@
/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */
#include <ammintrin.h>
-/* Internal data types for implementing the intrinsics. */
-typedef float __v8sf __attribute__ ((__vector_size__ (32)));
-typedef double __v4df __attribute__ ((__vector_size__ (32)));
-
-typedef float __m256 __attribute__ ((__vector_size__ (32),
- __may_alias__));
-typedef double __m256d __attribute__ ((__vector_size__ (32),
- __may_alias__));
-
/* 128b Floating point multiply/add type instructions. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 12a3f1759a8..5a5311fba0f 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -232,6 +232,10 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__SSE4A__");
if (isa_flag & OPTION_MASK_ISA_FMA4)
def_or_undef (parse_in, "__FMA4__");
+ if (isa_flag & OPTION_MASK_ISA_XOP)
+ def_or_undef (parse_in, "__XOP__");
+ if (isa_flag & OPTION_MASK_ISA_LWP)
+ def_or_undef (parse_in, "__LWP__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
def_or_undef (parse_in, "__SSE_MATH__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c29a7848ae1..2031dfb6e98 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1964,6 +1964,10 @@ static int ix86_isa_flags_explicit;
#define OPTION_MASK_ISA_FMA4_SET \
(OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
| OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_XOP_SET \
+ (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
+#define OPTION_MASK_ISA_LWP_SET \
+ OPTION_MASK_ISA_LWP
/* AES and PCLMUL need SSE2 because they use xmm registers */
#define OPTION_MASK_ISA_AES_SET \
@@ -2015,7 +2019,10 @@ static int ix86_isa_flags_explicit;
#define OPTION_MASK_ISA_SSE4A_UNSET \
(OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
-#define OPTION_MASK_ISA_FMA4_UNSET OPTION_MASK_ISA_FMA4
+#define OPTION_MASK_ISA_FMA4_UNSET \
+ (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
+#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
+#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
@@ -2263,6 +2270,32 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
}
return true;
+ case OPT_mxop:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
+ }
+ return true;
+
+ case OPT_mlwp:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
+ }
+ return true;
+
case OPT_mabm:
if (value)
{
@@ -2391,6 +2424,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{
{ "-m64", OPTION_MASK_ISA_64BIT },
{ "-mfma4", OPTION_MASK_ISA_FMA4 },
+ { "-mxop", OPTION_MASK_ISA_XOP },
+ { "-mlwp", OPTION_MASK_ISA_LWP },
{ "-msse4a", OPTION_MASK_ISA_SSE4A },
{ "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
{ "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
@@ -2621,7 +2656,9 @@ override_options (bool main_args_p)
PTA_AVX = 1 << 18,
PTA_FMA = 1 << 19,
PTA_MOVBE = 1 << 20,
- PTA_FMA4 = 1 << 21
+ PTA_FMA4 = 1 << 21,
+ PTA_XOP = 1 << 22,
+ PTA_LWP = 1 << 23
};
static struct pta
@@ -2967,6 +3004,12 @@ override_options (bool main_args_p)
if (processor_alias_table[i].flags & PTA_FMA4
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
+ if (processor_alias_table[i].flags & PTA_XOP
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
+ ix86_isa_flags |= OPTION_MASK_ISA_XOP;
+ if (processor_alias_table[i].flags & PTA_LWP
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
+ ix86_isa_flags |= OPTION_MASK_ISA_LWP;
if (processor_alias_table[i].flags & PTA_ABM
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
ix86_isa_flags |= OPTION_MASK_ISA_ABM;
@@ -3649,6 +3692,8 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
IX86_ATTR_ISA ("sse4a", OPT_msse4a),
IX86_ATTR_ISA ("ssse3", OPT_mssse3),
IX86_ATTR_ISA ("fma4", OPT_mfma4),
+ IX86_ATTR_ISA ("xop", OPT_mxop),
+ IX86_ATTR_ISA ("lwp", OPT_mlwp),
/* string options */
IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
@@ -11290,6 +11335,7 @@ get_some_local_dynamic_name (void)
X -- don't print any sort of PIC '@' suffix for a symbol.
& -- print some in-use local-dynamic symbol name.
H -- print a memory address offset by 8; used for sse high-parts
+ Y -- print condition for XOP pcom* instruction.
+ -- print a branch hint as 'cs' or 'ds' prefix
; -- print a semicolon (after prefixes due to bug in older gas).
*/
@@ -11707,6 +11753,61 @@ print_operand (FILE *file, rtx x, int code)
return;
}
+ case 'Y':
+ switch (GET_CODE (x))
+ {
+ case NE:
+ fputs ("neq", file);
+ break;
+ case EQ:
+ fputs ("eq", file);
+ break;
+ case GE:
+ case GEU:
+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
+ break;
+ case GT:
+ case GTU:
+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
+ break;
+ case LE:
+ case LEU:
+ fputs ("le", file);
+ break;
+ case LT:
+ case LTU:
+ fputs ("lt", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ case UNEQ:
+ fputs ("ueq", file);
+ break;
+ case UNGE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ fputs ("nle", file);
+ break;
+ case UNLE:
+ fputs ("ule", file);
+ break;
+ case UNLT:
+ fputs ("ult", file);
+ break;
+ case LTGT:
+ fputs ("une", file);
+ break;
+ default:
+ output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+ return;
+ }
+ return;
+
case ';':
#if TARGET_MACHO
fputs (" ; ", file);
@@ -15916,6 +16017,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
x = gen_rtx_AND (mode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
+ else if (TARGET_XOP)
+ {
+ rtx pcmov = gen_rtx_SET (mode, dest,
+ gen_rtx_IF_THEN_ELSE (mode, cmp,
+ op_true,
+ op_false));
+ emit_insn (pcmov);
+ }
else
{
op_true = force_reg (mode, op_true);
@@ -16038,6 +16147,9 @@ ix86_expand_int_vcond (rtx operands[])
cop0 = operands[4];
cop1 = operands[5];
+ /* XOP supports all of the comparisons on all vector int types. */
+ if (!TARGET_XOP)
+ {
/* Canonicalize the comparison to EQ, GT, GTU. */
switch (code)
{
@@ -16148,6 +16260,7 @@ ix86_expand_int_vcond (rtx operands[])
cop0 = x;
cop1 = CONST0_RTX (mode);
}
+ }
x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
operands[1+negate], operands[2-negate]);
@@ -20806,7 +20919,7 @@ enum ix86_builtins
IX86_BUILTIN_CVTUDQ2PS,
- /* FMA4 instructions. */
+ /* FMA4 and XOP instructions. */
IX86_BUILTIN_VFMADDSS,
IX86_BUILTIN_VFMADDSD,
IX86_BUILTIN_VFMADDPS,
@@ -20839,6 +20952,164 @@ enum ix86_builtins
IX86_BUILTIN_VFNMADDPD256,
IX86_BUILTIN_VFNMSUBPS256,
IX86_BUILTIN_VFNMSUBPD256,
+
+ IX86_BUILTIN_VPCMOV,
+ IX86_BUILTIN_VPCMOV_V2DI,
+ IX86_BUILTIN_VPCMOV_V4SI,
+ IX86_BUILTIN_VPCMOV_V8HI,
+ IX86_BUILTIN_VPCMOV_V16QI,
+ IX86_BUILTIN_VPCMOV_V4SF,
+ IX86_BUILTIN_VPCMOV_V2DF,
+ IX86_BUILTIN_VPCMOV256,
+ IX86_BUILTIN_VPCMOV_V4DI256,
+ IX86_BUILTIN_VPCMOV_V8SI256,
+ IX86_BUILTIN_VPCMOV_V16HI256,
+ IX86_BUILTIN_VPCMOV_V32QI256,
+ IX86_BUILTIN_VPCMOV_V8SF256,
+ IX86_BUILTIN_VPCMOV_V4DF256,
+
+ IX86_BUILTIN_VPPERM,
+
+ IX86_BUILTIN_VPMACSSWW,
+ IX86_BUILTIN_VPMACSWW,
+ IX86_BUILTIN_VPMACSSWD,
+ IX86_BUILTIN_VPMACSWD,
+ IX86_BUILTIN_VPMACSSDD,
+ IX86_BUILTIN_VPMACSDD,
+ IX86_BUILTIN_VPMACSSDQL,
+ IX86_BUILTIN_VPMACSSDQH,
+ IX86_BUILTIN_VPMACSDQL,
+ IX86_BUILTIN_VPMACSDQH,
+ IX86_BUILTIN_VPMADCSSWD,
+ IX86_BUILTIN_VPMADCSWD,
+
+ IX86_BUILTIN_VPHADDBW,
+ IX86_BUILTIN_VPHADDBD,
+ IX86_BUILTIN_VPHADDBQ,
+ IX86_BUILTIN_VPHADDWD,
+ IX86_BUILTIN_VPHADDWQ,
+ IX86_BUILTIN_VPHADDDQ,
+ IX86_BUILTIN_VPHADDUBW,
+ IX86_BUILTIN_VPHADDUBD,
+ IX86_BUILTIN_VPHADDUBQ,
+ IX86_BUILTIN_VPHADDUWD,
+ IX86_BUILTIN_VPHADDUWQ,
+ IX86_BUILTIN_VPHADDUDQ,
+ IX86_BUILTIN_VPHSUBBW,
+ IX86_BUILTIN_VPHSUBWD,
+ IX86_BUILTIN_VPHSUBDQ,
+
+ IX86_BUILTIN_VPROTB,
+ IX86_BUILTIN_VPROTW,
+ IX86_BUILTIN_VPROTD,
+ IX86_BUILTIN_VPROTQ,
+ IX86_BUILTIN_VPROTB_IMM,
+ IX86_BUILTIN_VPROTW_IMM,
+ IX86_BUILTIN_VPROTD_IMM,
+ IX86_BUILTIN_VPROTQ_IMM,
+
+ IX86_BUILTIN_VPSHLB,
+ IX86_BUILTIN_VPSHLW,
+ IX86_BUILTIN_VPSHLD,
+ IX86_BUILTIN_VPSHLQ,
+ IX86_BUILTIN_VPSHAB,
+ IX86_BUILTIN_VPSHAW,
+ IX86_BUILTIN_VPSHAD,
+ IX86_BUILTIN_VPSHAQ,
+
+ IX86_BUILTIN_VFRCZSS,
+ IX86_BUILTIN_VFRCZSD,
+ IX86_BUILTIN_VFRCZPS,
+ IX86_BUILTIN_VFRCZPD,
+ IX86_BUILTIN_VFRCZPS256,
+ IX86_BUILTIN_VFRCZPD256,
+
+ IX86_BUILTIN_VPCOMEQUB,
+ IX86_BUILTIN_VPCOMNEUB,
+ IX86_BUILTIN_VPCOMLTUB,
+ IX86_BUILTIN_VPCOMLEUB,
+ IX86_BUILTIN_VPCOMGTUB,
+ IX86_BUILTIN_VPCOMGEUB,
+ IX86_BUILTIN_VPCOMFALSEUB,
+ IX86_BUILTIN_VPCOMTRUEUB,
+
+ IX86_BUILTIN_VPCOMEQUW,
+ IX86_BUILTIN_VPCOMNEUW,
+ IX86_BUILTIN_VPCOMLTUW,
+ IX86_BUILTIN_VPCOMLEUW,
+ IX86_BUILTIN_VPCOMGTUW,
+ IX86_BUILTIN_VPCOMGEUW,
+ IX86_BUILTIN_VPCOMFALSEUW,
+ IX86_BUILTIN_VPCOMTRUEUW,
+
+ IX86_BUILTIN_VPCOMEQUD,
+ IX86_BUILTIN_VPCOMNEUD,
+ IX86_BUILTIN_VPCOMLTUD,
+ IX86_BUILTIN_VPCOMLEUD,
+ IX86_BUILTIN_VPCOMGTUD,
+ IX86_BUILTIN_VPCOMGEUD,
+ IX86_BUILTIN_VPCOMFALSEUD,
+ IX86_BUILTIN_VPCOMTRUEUD,
+
+ IX86_BUILTIN_VPCOMEQUQ,
+ IX86_BUILTIN_VPCOMNEUQ,
+ IX86_BUILTIN_VPCOMLTUQ,
+ IX86_BUILTIN_VPCOMLEUQ,
+ IX86_BUILTIN_VPCOMGTUQ,
+ IX86_BUILTIN_VPCOMGEUQ,
+ IX86_BUILTIN_VPCOMFALSEUQ,
+ IX86_BUILTIN_VPCOMTRUEUQ,
+
+ IX86_BUILTIN_VPCOMEQB,
+ IX86_BUILTIN_VPCOMNEB,
+ IX86_BUILTIN_VPCOMLTB,
+ IX86_BUILTIN_VPCOMLEB,
+ IX86_BUILTIN_VPCOMGTB,
+ IX86_BUILTIN_VPCOMGEB,
+ IX86_BUILTIN_VPCOMFALSEB,
+ IX86_BUILTIN_VPCOMTRUEB,
+
+ IX86_BUILTIN_VPCOMEQW,
+ IX86_BUILTIN_VPCOMNEW,
+ IX86_BUILTIN_VPCOMLTW,
+ IX86_BUILTIN_VPCOMLEW,
+ IX86_BUILTIN_VPCOMGTW,
+ IX86_BUILTIN_VPCOMGEW,
+ IX86_BUILTIN_VPCOMFALSEW,
+ IX86_BUILTIN_VPCOMTRUEW,
+
+ IX86_BUILTIN_VPCOMEQD,
+ IX86_BUILTIN_VPCOMNED,
+ IX86_BUILTIN_VPCOMLTD,
+ IX86_BUILTIN_VPCOMLED,
+ IX86_BUILTIN_VPCOMGTD,
+ IX86_BUILTIN_VPCOMGED,
+ IX86_BUILTIN_VPCOMFALSED,
+ IX86_BUILTIN_VPCOMTRUED,
+
+ IX86_BUILTIN_VPCOMEQQ,
+ IX86_BUILTIN_VPCOMNEQ,
+ IX86_BUILTIN_VPCOMLTQ,
+ IX86_BUILTIN_VPCOMLEQ,
+ IX86_BUILTIN_VPCOMGTQ,
+ IX86_BUILTIN_VPCOMGEQ,
+ IX86_BUILTIN_VPCOMFALSEQ,
+ IX86_BUILTIN_VPCOMTRUEQ,
+
+ /* LWP instructions. */
+ IX86_BUILTIN_LLWPCB16,
+ IX86_BUILTIN_LLWPCB32,
+ IX86_BUILTIN_LLWPCB64,
+ IX86_BUILTIN_SLWPCB16,
+ IX86_BUILTIN_SLWPCB32,
+ IX86_BUILTIN_SLWPCB64,
+ IX86_BUILTIN_LWPVAL16,
+ IX86_BUILTIN_LWPVAL32,
+ IX86_BUILTIN_LWPVAL64,
+ IX86_BUILTIN_LWPINS16,
+ IX86_BUILTIN_LWPINS32,
+ IX86_BUILTIN_LWPINS64,
+
IX86_BUILTIN_MAX
};
@@ -21052,7 +21323,13 @@ enum ix86_special_builtin_type
VOID_FTYPE_PV8SF_V8SF_V8SF,
VOID_FTYPE_PV4DF_V4DF_V4DF,
VOID_FTYPE_PV4SF_V4SF_V4SF,
- VOID_FTYPE_PV2DF_V2DF_V2DF
+ VOID_FTYPE_PV2DF_V2DF_V2DF,
+ VOID_FTYPE_USHORT_UINT_USHORT,
+ VOID_FTYPE_UINT_UINT_UINT,
+ VOID_FTYPE_UINT64_UINT_UINT,
+ UCHAR_FTYPE_USHORT_UINT_USHORT,
+ UCHAR_FTYPE_UINT_UINT_UINT,
+ UCHAR_FTYPE_UINT64_UINT_UINT
};
/* Builtin types */
@@ -21299,6 +21576,22 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbhi1, "__builtin_ia32_llwpcb16", IX86_BUILTIN_LLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbsi1, "__builtin_ia32_llwpcb32", IX86_BUILTIN_LLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbdi1, "__builtin_ia32_llwpcb64", IX86_BUILTIN_LLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbhi1, "__builtin_ia32_slwpcb16", IX86_BUILTIN_SLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbsi1, "__builtin_ia32_slwpcb32", IX86_BUILTIN_SLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbdi1, "__builtin_ia32_slwpcb64", IX86_BUILTIN_SLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalhi3, "__builtin_ia32_lwpval16", IX86_BUILTIN_LWPVAL16, UNKNOWN, (int) VOID_FTYPE_USHORT_UINT_USHORT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinshi3, "__builtin_ia32_lwpins16", IX86_BUILTIN_LWPINS16, UNKNOWN, (int) UCHAR_FTYPE_USHORT_UINT_USHORT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+
};
/* Builtins with variable number of arguments. */
@@ -21912,13 +22205,58 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
};
-/* FMA4. */
+/* FMA4 and XOP. */
enum multi_arg_type {
MULTI_ARG_UNKNOWN,
MULTI_ARG_3_SF,
MULTI_ARG_3_DF,
MULTI_ARG_3_SF2,
- MULTI_ARG_3_DF2
+ MULTI_ARG_3_DF2,
+ MULTI_ARG_3_DI,
+ MULTI_ARG_3_SI,
+ MULTI_ARG_3_SI_DI,
+ MULTI_ARG_3_HI,
+ MULTI_ARG_3_HI_SI,
+ MULTI_ARG_3_QI,
+ MULTI_ARG_3_DI2,
+ MULTI_ARG_3_SI2,
+ MULTI_ARG_3_HI2,
+ MULTI_ARG_3_QI2,
+ MULTI_ARG_2_SF,
+ MULTI_ARG_2_DF,
+ MULTI_ARG_2_DI,
+ MULTI_ARG_2_SI,
+ MULTI_ARG_2_HI,
+ MULTI_ARG_2_QI,
+ MULTI_ARG_2_DI_IMM,
+ MULTI_ARG_2_SI_IMM,
+ MULTI_ARG_2_HI_IMM,
+ MULTI_ARG_2_QI_IMM,
+ MULTI_ARG_2_DI_CMP,
+ MULTI_ARG_2_SI_CMP,
+ MULTI_ARG_2_HI_CMP,
+ MULTI_ARG_2_QI_CMP,
+ MULTI_ARG_2_DI_TF,
+ MULTI_ARG_2_SI_TF,
+ MULTI_ARG_2_HI_TF,
+ MULTI_ARG_2_QI_TF,
+ MULTI_ARG_2_SF_TF,
+ MULTI_ARG_2_DF_TF,
+ MULTI_ARG_1_SF,
+ MULTI_ARG_1_DF,
+ MULTI_ARG_1_SF2,
+ MULTI_ARG_1_DF2,
+ MULTI_ARG_1_DI,
+ MULTI_ARG_1_SI,
+ MULTI_ARG_1_HI,
+ MULTI_ARG_1_QI,
+ MULTI_ARG_1_SI_DI,
+ MULTI_ARG_1_HI_DI,
+ MULTI_ARG_1_HI_SI,
+ MULTI_ARG_1_QI_DI,
+ MULTI_ARG_1_QI_SI,
+ MULTI_ARG_1_QI_HI
+
};
static const struct builtin_description bdesc_multi_arg[] =
@@ -21959,7 +22297,160 @@ static const struct builtin_description bdesc_multi_arg[] =
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
- { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
};
@@ -22341,51 +22832,6 @@ ix86_init_mmx_sse_builtins (void)
integer_type_node,
NULL_TREE);
-
- tree v2di_ftype_v2di
- = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
-
- tree v16qi_ftype_v8hi_v8hi
- = build_function_type_list (V16QI_type_node,
- V8HI_type_node, V8HI_type_node,
- NULL_TREE);
- tree v8hi_ftype_v4si_v4si
- = build_function_type_list (V8HI_type_node,
- V4SI_type_node, V4SI_type_node,
- NULL_TREE);
- tree v8hi_ftype_v16qi_v16qi
- = build_function_type_list (V8HI_type_node,
- V16QI_type_node, V16QI_type_node,
- NULL_TREE);
- tree v4hi_ftype_v8qi_v8qi
- = build_function_type_list (V4HI_type_node,
- V8QI_type_node, V8QI_type_node,
- NULL_TREE);
- tree unsigned_ftype_unsigned_uchar
- = build_function_type_list (unsigned_type_node,
- unsigned_type_node,
- unsigned_char_type_node,
- NULL_TREE);
- tree unsigned_ftype_unsigned_ushort
- = build_function_type_list (unsigned_type_node,
- unsigned_type_node,
- short_unsigned_type_node,
- NULL_TREE);
- tree unsigned_ftype_unsigned_unsigned
- = build_function_type_list (unsigned_type_node,
- unsigned_type_node,
- unsigned_type_node,
- NULL_TREE);
- tree uint64_ftype_uint64_uint64
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- NULL_TREE);
- tree float_ftype_float
- = build_function_type_list (float_type_node,
- float_type_node,
- NULL_TREE);
-
/* AVX builtins */
tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
V32QImode);
@@ -22397,6 +22843,8 @@ ix86_init_mmx_sse_builtins (void)
V4DImode);
tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
V4DFmode);
+ tree V16HI_type_node = build_vector_type_for_mode (intHI_type_node,
+ V16HImode);
tree v8sf_ftype_v8sf
= build_function_type_list (V8SF_type_node,
V8SF_type_node,
@@ -22641,6 +23089,138 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (V2DF_type_node,
V2DF_type_node, V2DI_type_node, NULL_TREE);
+ /* XOP instructions */
+ tree v2di_ftype_v2di_v2di_v2di
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node,
+ V2DI_type_node,
+ V2DI_type_node,
+ NULL_TREE);
+
+ tree v4di_ftype_v4di_v4di_v4di
+ = build_function_type_list (V4DI_type_node,
+ V4DI_type_node,
+ V4DI_type_node,
+ V4DI_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_v4si_v4si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+
+ tree v8si_ftype_v8si_v8si_v8si
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node,
+ V8SI_type_node,
+ V8SI_type_node,
+ NULL_TREE);
+
+ tree v32qi_ftype_v32qi_v32qi_v32qi
+ = build_function_type_list (V32QI_type_node,
+ V32QI_type_node,
+ V32QI_type_node,
+ V32QI_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_v4si_v2di
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ V2DI_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_v8hi_v8hi
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ NULL_TREE);
+
+ tree v16hi_ftype_v16hi_v16hi_v16hi
+ = build_function_type_list (V16HI_type_node,
+ V16HI_type_node,
+ V16HI_type_node,
+ V16HI_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_v8hi_v4si
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+
+ tree v2di_ftype_v2di_si
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_si
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v16qi_ftype_v16qi_si
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v2di_ftype_v2di
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
+
+ tree v16qi_ftype_v8hi_v8hi
+ = build_function_type_list (V16QI_type_node,
+ V8HI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v8hi_ftype_v4si_v4si
+ = build_function_type_list (V8HI_type_node,
+ V4SI_type_node, V4SI_type_node,
+ NULL_TREE);
+ tree v8hi_ftype_v16qi_v16qi
+ = build_function_type_list (V8HI_type_node,
+ V16QI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4hi_ftype_v8qi_v8qi
+ = build_function_type_list (V4HI_type_node,
+ V8QI_type_node, V8QI_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_unsigned_uchar
+ = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_char_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_unsigned_ushort
+ = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_unsigned_unsigned
+ = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+ tree uint64_ftype_uint64_uint64
+ = build_function_type_list (long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ NULL_TREE);
+ tree float_ftype_float
+ = build_function_type_list (float_type_node,
+ float_type_node,
+ NULL_TREE);
+
/* Integer intrinsics. */
tree uint64_ftype_void
= build_function_type (long_long_unsigned_type_node,
@@ -22670,6 +23250,50 @@ ix86_init_mmx_sse_builtins (void)
integer_type_node,
NULL_TREE);
+ /* LWP instructions. */
+
+ tree void_ftype_ushort_unsigned_ushort
+ = build_function_type_list (void_type_node,
+ short_unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+
+ tree void_ftype_unsigned_unsigned_unsigned
+ = build_function_type_list (void_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
+ tree void_ftype_uint64_unsigned_unsigned
+ = build_function_type_list (void_type_node,
+ long_long_unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
+ tree uchar_ftype_ushort_unsigned_ushort
+ = build_function_type_list (unsigned_char_type_node,
+ short_unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+
+ tree uchar_ftype_unsigned_unsigned_unsigned
+ = build_function_type_list (unsigned_char_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
+ tree uchar_ftype_uint64_unsigned_unsigned
+ = build_function_type_list (unsigned_char_type_node,
+ long_long_unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
tree ftype;
/* Add all special builtins with variable number of operands. */
@@ -22783,6 +23407,25 @@ ix86_init_mmx_sse_builtins (void)
case VOID_FTYPE_PV2DF_V2DF_V2DF:
type = void_ftype_pv2df_v2df_v2df;
break;
+ case VOID_FTYPE_USHORT_UINT_USHORT:
+ type = void_ftype_ushort_unsigned_ushort;
+ break;
+ case VOID_FTYPE_UINT_UINT_UINT:
+ type = void_ftype_unsigned_unsigned_unsigned;
+ break;
+ case VOID_FTYPE_UINT64_UINT_UINT:
+ type = void_ftype_uint64_unsigned_unsigned;
+ break;
+ case UCHAR_FTYPE_USHORT_UINT_USHORT:
+ type = uchar_ftype_ushort_unsigned_ushort;
+ break;
+ case UCHAR_FTYPE_UINT_UINT_UINT:
+ type = uchar_ftype_unsigned_unsigned_unsigned;
+ break;
+ case UCHAR_FTYPE_UINT64_UINT_UINT:
+ type = uchar_ftype_uint64_unsigned_unsigned;
+ break;
+
default:
gcc_unreachable ();
}
@@ -23409,6 +24052,50 @@ ix86_init_mmx_sse_builtins (void)
case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
case MULTI_ARG_3_SF2: mtype = v8sf_ftype_v8sf_v8sf_v8sf; break;
case MULTI_ARG_3_DF2: mtype = v4df_ftype_v4df_v4df_v4df; break;
+ case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
+ case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
+ case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
+ case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
+ case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
+ case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
+ case MULTI_ARG_3_DI2: mtype = v4di_ftype_v4di_v4di_v4di; break;
+ case MULTI_ARG_3_SI2: mtype = v8si_ftype_v8si_v8si_v8si; break;
+ case MULTI_ARG_3_HI2: mtype = v16hi_ftype_v16hi_v16hi_v16hi; break;
+ case MULTI_ARG_3_QI2: mtype = v32qi_ftype_v32qi_v32qi_v32qi; break;
+ case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
+ case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
+ case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
+ case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
+ case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
+ case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
+ case MULTI_ARG_1_SF2: mtype = v8sf_ftype_v8sf; break;
+ case MULTI_ARG_1_DF2: mtype = v4df_ftype_v4df; break;
+ case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
+ case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
+ case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
+ case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
+ case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
+ case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
+ case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
+ case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
+ case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
+ case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
case MULTI_ARG_UNKNOWN:
default:
@@ -23628,9 +24315,71 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
case MULTI_ARG_3_DF:
case MULTI_ARG_3_SF2:
case MULTI_ARG_3_DF2:
+ case MULTI_ARG_3_DI:
+ case MULTI_ARG_3_SI:
+ case MULTI_ARG_3_SI_DI:
+ case MULTI_ARG_3_HI:
+ case MULTI_ARG_3_HI_SI:
+ case MULTI_ARG_3_QI:
+ case MULTI_ARG_3_DI2:
+ case MULTI_ARG_3_SI2:
+ case MULTI_ARG_3_HI2:
+ case MULTI_ARG_3_QI2:
nargs = 3;
break;
+ case MULTI_ARG_2_SF:
+ case MULTI_ARG_2_DF:
+ case MULTI_ARG_2_DI:
+ case MULTI_ARG_2_SI:
+ case MULTI_ARG_2_HI:
+ case MULTI_ARG_2_QI:
+ nargs = 2;
+ break;
+
+ case MULTI_ARG_2_DI_IMM:
+ case MULTI_ARG_2_SI_IMM:
+ case MULTI_ARG_2_HI_IMM:
+ case MULTI_ARG_2_QI_IMM:
+ nargs = 2;
+ last_arg_constant = true;
+ break;
+
+ case MULTI_ARG_1_SF:
+ case MULTI_ARG_1_DF:
+ case MULTI_ARG_1_SF2:
+ case MULTI_ARG_1_DF2:
+ case MULTI_ARG_1_DI:
+ case MULTI_ARG_1_SI:
+ case MULTI_ARG_1_HI:
+ case MULTI_ARG_1_QI:
+ case MULTI_ARG_1_SI_DI:
+ case MULTI_ARG_1_HI_DI:
+ case MULTI_ARG_1_HI_SI:
+ case MULTI_ARG_1_QI_DI:
+ case MULTI_ARG_1_QI_SI:
+ case MULTI_ARG_1_QI_HI:
+ nargs = 1;
+ break;
+
+ case MULTI_ARG_2_DI_CMP:
+ case MULTI_ARG_2_SI_CMP:
+ case MULTI_ARG_2_HI_CMP:
+ case MULTI_ARG_2_QI_CMP:
+ nargs = 2;
+ comparison_p = true;
+ break;
+
+ case MULTI_ARG_2_SF_TF:
+ case MULTI_ARG_2_DF_TF:
+ case MULTI_ARG_2_DI_TF:
+ case MULTI_ARG_2_SI_TF:
+ case MULTI_ARG_2_HI_TF:
+ case MULTI_ARG_2_QI_TF:
+ nargs = 2;
+ tf_p = true;
+ break;
+
case MULTI_ARG_UNKNOWN:
default:
gcc_unreachable ();
@@ -24568,6 +25317,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
break;
+ case VOID_FTYPE_USHORT_UINT_USHORT:
+ case VOID_FTYPE_UINT_UINT_UINT:
+ case VOID_FTYPE_UINT64_UINT_UINT:
+ case UCHAR_FTYPE_USHORT_UINT_USHORT:
+ case UCHAR_FTYPE_UINT_UINT_UINT:
+ case UCHAR_FTYPE_UINT64_UINT_UINT:
+ nargs = 3;
+ klass = store;
+ memory = 0;
+ break;
default:
gcc_unreachable ();
}
@@ -25311,7 +26070,7 @@ static tree
ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
bool sqrt ATTRIBUTE_UNUSED)
{
- if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations))
return NULL_TREE;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index b412604dbd8..4bc8ef18500 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_FMA OPTION_ISA_FMA
#define TARGET_SSE4A OPTION_ISA_SSE4A
#define TARGET_FMA4 OPTION_ISA_FMA4
+#define TARGET_XOP OPTION_ISA_XOP
+#define TARGET_LWP OPTION_ISA_LWP
#define TARGET_ROUND OPTION_ISA_ROUND
#define TARGET_ABM OPTION_ISA_ABM
#define TARGET_POPCNT OPTION_ISA_POPCNT
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index dc605abde06..82f5352597c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -57,6 +57,7 @@
;; X -- don't print any sort of PIC '@' suffix for a symbol.
;; & -- print some in-use local-dynamic symbol name.
;; H -- print a memory address offset by 8; used for sse high-parts
+;; Y -- print condition for XOP pcom* instruction.
;; + -- print a branch hint as 'cs' or 'ds' prefix
;; ; -- print a semicolon (after prefixes due to bug in older gas).
@@ -199,6 +200,15 @@
(UNSPEC_FMA4_INTRINSIC 150)
(UNSPEC_FMA4_FMADDSUB 151)
(UNSPEC_FMA4_FMSUBADD 152)
+ (UNSPEC_XOP_UNSIGNED_CMP 151)
+ (UNSPEC_XOP_TRUEFALSE 152)
+ (UNSPEC_XOP_PERMUTE 153)
+ (UNSPEC_FRCZ 154)
+ (UNSPEC_LLWP_INTRINSIC 155)
+ (UNSPEC_SLWP_INTRINSIC 156)
+ (UNSPECV_LWPVAL_INTRINSIC 157)
+ (UNSPECV_LWPINS_INTRINSIC 158)
+
; For AES support
(UNSPEC_AESENC 159)
(UNSPEC_AESENCLAST 160)
@@ -254,6 +264,20 @@
(COM_TRUE_P 5)
])
+;; Constants used in the XOP pperm instruction
+(define_constants
+ [(PPERM_SRC 0x00) /* copy source */
+ (PPERM_INVERT 0x20) /* invert source */
+ (PPERM_REVERSE 0x40) /* bit reverse source */
+ (PPERM_REV_INV 0x60) /* bit reverse & invert src */
+ (PPERM_ZERO 0x80) /* all 0's */
+ (PPERM_ONES 0xa0) /* all 1's */
+ (PPERM_SIGN 0xc0) /* propagate sign bit */
+ (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
+ (PPERM_SRC1 0x00) /* use first source byte */
+ (PPERM_SRC2 0x10) /* use second source byte */
+ ])
+
;; Registers by name.
(define_constants
[(AX_REG 0)
@@ -333,7 +357,7 @@
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
- ssemuladd,sse4arg,
+ ssemuladd,sse4arg,lwp,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -19676,6 +19700,20 @@
[(set_attr "type" "fcmov")
(set_attr "mode" "XF")])
+;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
+;; XOP conditional move
+(define_insn "*xop_pcmov_<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (if_then_else:MODEF
+ (match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:MODEF 2 "register_operand" "x")
+ (match_operand:MODEF 3 "register_operand" "x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+ "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+ [(set_attr "type" "sse4arg")])
+
;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
@@ -19985,6 +20023,18 @@
DONE;
})
+;; Use IOR for stack probes, this is shorter.
+(define_expand "probe_stack"
+ [(match_operand 0 "memory_operand" "")]
+ ""
+{
+ if (GET_MODE (operands[0]) == DImode)
+ emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx));
+ else
+ emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx));
+ DONE;
+})
+
(define_expand "builtin_setjmp_receiver"
[(label_ref (match_operand 0 "" ""))]
"!TARGET_64BIT && flag_pic"
@@ -20488,7 +20538,9 @@
[(match_dup 0)
(match_operand:SI 1 "nonmemory_operand" "")]))
(clobber (reg:CC FLAGS_REG))])]
- "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+ "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+ /* Do not split stack checking probes. */
+ && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
[(set (match_dup 2) (match_dup 0))
(parallel [(set (match_dup 2)
(match_op_dup 3 [(match_dup 2) (match_dup 1)]))
@@ -20503,7 +20555,9 @@
[(match_operand:SI 1 "nonmemory_operand" "")
(match_dup 0)]))
(clobber (reg:CC FLAGS_REG))])]
- "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+ "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+ /* Do not split stack checking probes. */
+ && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
[(set (match_dup 2) (match_dup 0))
(parallel [(set (match_dup 2)
(match_op_dup 3 [(match_dup 1) (match_dup 2)]))
@@ -21252,19 +21306,19 @@
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
(match_operand:DI 2 "" "")))
(unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
- (clobber (reg:TI 27))
- (clobber (reg:TI 28))
- (clobber (reg:TI 45))
- (clobber (reg:TI 46))
- (clobber (reg:TI 47))
- (clobber (reg:TI 48))
- (clobber (reg:TI 49))
- (clobber (reg:TI 50))
- (clobber (reg:TI 51))
- (clobber (reg:TI 52))
+ (clobber (reg:TI XMM6_REG))
+ (clobber (reg:TI XMM7_REG))
+ (clobber (reg:TI XMM8_REG))
+ (clobber (reg:TI XMM9_REG))
+ (clobber (reg:TI XMM10_REG))
+ (clobber (reg:TI XMM11_REG))
+ (clobber (reg:TI XMM12_REG))
+ (clobber (reg:TI XMM13_REG))
+ (clobber (reg:TI XMM14_REG))
+ (clobber (reg:TI XMM15_REG))
(clobber (reg:DI SI_REG))
(clobber (reg:DI DI_REG))]
- "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "TARGET_64BIT && !SIBLING_CALL_P (insn)"
{
if (constant_call_address_operand (operands[1], Pmode))
return "call\t%P1";
@@ -21303,14 +21357,14 @@
(define_expand "sse_prologue_save"
[(parallel [(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(reg:DI 21)
- (reg:DI 22)
- (reg:DI 23)
- (reg:DI 24)
- (reg:DI 25)
- (reg:DI 26)
- (reg:DI 27)
- (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(use (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "immediate_operand" ""))
(use (label_ref:DI (match_operand 3 "" "")))])]
@@ -21320,14 +21374,14 @@
(define_insn "*sse_prologue_save_insn"
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
(match_operand:DI 4 "const_int_operand" "n")))
- (unspec:BLK [(reg:DI 21)
- (reg:DI 22)
- (reg:DI 23)
- (reg:DI 24)
- (reg:DI 25)
- (reg:DI 26)
- (reg:DI 27)
- (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
(use (match_operand:DI 1 "register_operand" "r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
(use (label_ref:DI (match_operand 3 "" "X")))]
@@ -21804,6 +21858,120 @@
[(set_attr "type" "other")
(set_attr "length" "3")])
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; LWP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "lwp_llwpcbhi1"
+ [(unspec [(match_operand:HI 0 "register_operand" "r")]
+ UNSPEC_LLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "llwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "HI")])
+
+(define_insn "lwp_llwpcbsi1"
+ [(unspec [(match_operand:SI 0 "register_operand" "r")]
+ UNSPEC_LLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "llwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "SI")])
+
+(define_insn "lwp_llwpcbdi1"
+ [(unspec [(match_operand:DI 0 "register_operand" "r")]
+ UNSPEC_LLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "llwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "DI")])
+
+(define_insn "lwp_slwpcbhi1"
+ [(unspec [(match_operand:HI 0 "register_operand" "r")]
+ UNSPEC_SLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "slwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "HI")])
+
+(define_insn "lwp_slwpcbsi1"
+ [(unspec [(match_operand:SI 0 "register_operand" "r")]
+ UNSPEC_SLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "slwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "SI")])
+
+(define_insn "lwp_slwpcbdi1"
+ [(unspec [(match_operand:DI 0 "register_operand" "r")]
+ UNSPEC_SLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "slwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "DI")])
+
+(define_insn "lwp_lwpvalhi3"
+ [(unspec_volatile [(match_operand:HI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:HI 2 "const_int_operand" "")]
+ UNSPECV_LWPVAL_INTRINSIC)]
+ "TARGET_LWP"
+ "lwpval\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "HI")])
+
+(define_insn "lwp_lwpvalsi3"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_LWPVAL_INTRINSIC)]
+ "TARGET_LWP"
+ "lwpval\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "SI")])
+
+(define_insn "lwp_lwpvaldi3"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_LWPVAL_INTRINSIC)]
+ "TARGET_LWP"
+ "lwpval\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "DI")])
+
+(define_insn "lwp_lwpinshi3"
+ [(unspec_volatile [(match_operand:HI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:HI 2 "const_int_operand" "")]
+ UNSPECV_LWPINS_INTRINSIC)]
+ "TARGET_LWP"
+ "lwpins\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "HI")])
+
+(define_insn "lwp_lwpinssi3"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_LWPINS_INTRINSIC)]
+ "TARGET_LWP"
+ "lwpins\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "SI")])
+
+(define_insn "lwp_lwpinsdi3"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_LWPINS_INTRINSIC)]
+ "TARGET_LWP"
+ "lwpins\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "DI")])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9668ff6504d..dd47b7d1dc5 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -314,6 +314,14 @@ mfma4
Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) VarExists Save
Support FMA4 built-in functions and code generation
+mxop
+Target Report Mask(ISA_XOP) Var(ix86_isa_flags) VarExists Save
+Support XOP built-in functions and code generation
+
+mlwp
+Target Report Mask(ISA_LWP) Var(ix86_isa_flags) VarExists Save
+Support LWP built-in functions and code generation
+
mabm
Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save
Support code generation of Advanced Bit Manipulation (ABM) instructions.
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index e701b19e2a8..540bc3f09ee 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -49,6 +49,7 @@ __bswapd (int __X)
return __builtin_bswap32 (__X);
}
+#ifdef __SSE4_2__
/* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -70,6 +71,7 @@ __crc32d (unsigned int __C, unsigned int __V)
{
return __builtin_ia32_crc32si (__C, __V);
}
+#endif /* SSE4.2 */
/* 32bit popcnt */
extern __inline int
diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h
index 5e2e0136fcb..5d8e5ad2cbe 100644
--- a/gcc/config/i386/linux.h
+++ b/gcc/config/i386/linux.h
@@ -207,6 +207,9 @@ along with GCC; see the file COPYING3. If not see
#define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
+/* The stack pointer needs to be moved while checking the stack. */
+#define STACK_CHECK_MOVING_SP 1
+
/* This macro may be overridden in i386/k*bsd-gnu.h. */
#define REG_NAME(reg) reg
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index cfa3f49e870..d07547a804f 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -110,6 +110,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
+/* The stack pointer needs to be moved while checking the stack. */
+#define STACK_CHECK_MOVING_SP 1
+
/* This macro may be overridden in i386/k*bsd-gnu.h. */
#define REG_NAME(reg) reg
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
new file mode 100644
index 00000000000..e5137ec24f4
--- /dev/null
+++ b/gcc/config/i386/lwpintrin.h
@@ -0,0 +1,109 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _LWPINTRIN_H_INCLUDED
+#define _LWPINTRIN_H_INCLUDED
+
+#ifndef __LWP__
+# error "LWP instruction set not enabled"
+#else
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb16 (void *pcbAddress)
+{
+ __builtin_ia32_llwpcb16 (pcbAddress);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb32 (void *pcbAddress)
+{
+ __builtin_ia32_llwpcb32 (pcbAddress);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb64 (void *pcbAddress)
+{
+ __builtin_ia32_llwpcb64 (pcbAddress);
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb16 (void)
+{
+ return __builtin_ia32_slwpcb16 ();
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb32 (void)
+{
+ return __builtin_ia32_slwpcb32 ();
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb64 (void)
+{
+ return __builtin_ia32_slwpcb64 ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval16 (unsigned short data2, unsigned int data1, unsigned short flags)
+{
+ __builtin_ia32_lwpval16 (data2, data1, flags);
+}
+/*
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+ __builtin_ia32_lwpval32 (data2, data1, flags);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval64 (unsigned __int64 data2, unsigned int data1, unsigned int flags)
+{
+ __builtin_ia32_lwpval64 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins16 (unsigned short data2, unsigned int data1, unsigned short flags)
+{
+ return __builtin_ia32_lwpins16 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+ return __builtin_ia32_lwpins32 (data2, data1, flags);
+}
+
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins64 (unsigned __int64 data2, unsigned int data1, unsigned int flags)
+{
+ return __builtin_ia32_lwpins64 (data2, data1, flags);
+}
+*/
+#endif /* __LWP__ */
+
+#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e90296512ad..bad39bb69c8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -86,6 +86,9 @@
(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
+;; Mapping of the max integer size for xop rotate immediate constraint
+(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
+
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
(V16QI "QI") (V8HI "HI")
@@ -1455,7 +1458,8 @@
(match_operator:SSEMODEF4 3 "sse_comparison_operator"
[(match_operand:SSEMODEF4 1 "register_operand" "0")
(match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
- "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
+ "!TARGET_XOP
+ && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
"cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
@@ -5614,7 +5618,7 @@
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2"
{
- if (TARGET_SSE4_1)
+ if (TARGET_SSE4_1 || TARGET_XOP)
ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
})
@@ -5643,7 +5647,7 @@
[(set (match_operand:V4SI 0 "register_operand" "")
(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
- "TARGET_SSE2 && !TARGET_SSE4_1
+ "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP
&& can_create_pseudo_p ()"
"#"
"&& 1"
@@ -5705,6 +5709,42 @@
rtx t1, t2, t3, t4, t5, t6, thirtytwo;
rtx op0, op1, op2;
+ if (TARGET_XOP)
+ {
+ /* op1: A,B,C,D, op2: E,F,G,H */
+ op0 = operands[0];
+ op1 = gen_lowpart (V4SImode, operands[1]);
+ op2 = gen_lowpart (V4SImode, operands[2]);
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ t4 = gen_reg_rtx (V2DImode);
+ t5 = gen_reg_rtx (V2DImode);
+
+ /* t1: B,A,D,C */
+ emit_insn (gen_sse2_pshufd_1 (t1, op1,
+ GEN_INT (1),
+ GEN_INT (0),
+ GEN_INT (3),
+ GEN_INT (2)));
+
+ /* t2: 0 */
+ emit_move_insn (t2, CONST0_RTX (V4SImode));
+
+ /* t3: (B*E),(A*F),(D*G),(C*H) */
+ emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2));
+
+ /* t4: (B*E)+(A*F), (D*G)+(C*H) */
+ emit_insn (gen_xop_phadddq (t4, t3));
+
+ /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
+ emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
+
+ /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
+ emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5));
+ DONE;
+ }
+
op0 = operands[0];
op1 = operands[1];
op2 = operands[2];
@@ -5820,6 +5860,56 @@
DONE;
})
+(define_expand "vec_widen_smult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_XOP"
+{
+ rtx t1, t2;
+
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_XOP"
+{
+ rtx t1, t2;
+
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+ DONE;
+})
+
(define_expand "vec_widen_umult_hi_v4si"
[(match_operand:V2DI 0 "register_operand" "")
(match_operand:V4SI 1 "register_operand" "")
@@ -6217,7 +6307,7 @@
(eq:SSEMODE124
(match_operand:SSEMODE124 1 "nonimmediate_operand" "")
(match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_XOP "
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
(define_insn "*avx_eq<mode>3"
@@ -6240,7 +6330,7 @@
(eq:SSEMODE124
(match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
(match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2
+ "TARGET_SSE2 && !TARGET_XOP
&& ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
"pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
@@ -6286,7 +6376,7 @@
(gt:SSEMODE124
(match_operand:SSEMODE124 1 "register_operand" "0")
(match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_XOP"
"pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
(set_attr "prefix_data16" "1")
@@ -10364,6 +10454,1445 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; XOP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; XOP parallel integer multiply/add instructions.
+;; Note the instruction does not allow the value being added to be a memory
+;; operation. However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;; a[i] = b[i] * c[i] + d[i];
+(define_insn "xop_pmacsww"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+ (plus:V8HI
+ (mult:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
+ (match_operand:V8HI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
+ "@
+ vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; Split pmacsww with two memory operands into a load and the pmacsww.
+(define_split
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (plus:V8HI
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" ""))
+ (match_operand:V8HI 3 "nonimmediate_operand" "")))]
+ "TARGET_XOP
+ && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
+ && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_fma4_multiple_memory (operands, 4, V8HImode);
+ emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+})
+
+(define_insn "xop_pmacssww"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+ (ss_plus:V8HI
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
+ (match_operand:V8HI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; Note the instruction does not allow the value being added to be a memory
+;; operation. However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;; a[i] = b[i] * c[i] + d[i];
+(define_insn "xop_pmacsdd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (plus:V4SI
+ (mult:V4SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+ (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
+ "@
+ vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; Split pmacsdd with two memory operands into a load and the pmacsdd.
+(define_split
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (plus:V4SI
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (match_operand:V4SI 2 "nonimmediate_operand" ""))
+ (match_operand:V4SI 3 "nonimmediate_operand" "")))]
+ "TARGET_XOP
+ && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
+ && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_fma4_multiple_memory (operands, 4, V4SImode);
+ emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+})
+
+(define_insn "xop_pmacssdd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (ss_plus:V4SI
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+ (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdql"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (ss_plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdqh"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (ss_plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdql"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "*xop_pmacsdql_mem"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add. In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as operands 1/2
+(define_insn_and_split "xop_mulv2div2di3_low"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_XOP"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_dup 0)))]
+{
+ operands[3] = CONST0_RTX (V2DImode);
+}
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdqh"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_operand:V2DI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "*xop_pmacsdqh_mem"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add. In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as either operands[1] or operands[2]
+(define_insn_and_split "xop_mulv2div2di3_high"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))))]
+ "TARGET_XOP"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_dup 0)))]
+{
+ operands[3] = CONST0_RTX (V2DImode);
+}
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; XOP parallel integer multiply/add instructions for the intrinisics
+(define_insn "xop_pmacsswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (ss_plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))
+ (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))
+ (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcsswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (ss_plus:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))
+ (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (plus:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))
+ (match_operand:V4SI 3 "register_operand" "x,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; XOP parallel XMM conditional moves
+(define_insn "xop_pcmov_<mode>"
+ [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
+ (if_then_else:SSEMODE
+ (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,xm")
+ (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,x")
+ (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+ "@
+ vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")])
+
+(define_insn "xop_pcmov_<mode>256"
+ [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
+ (if_then_else:AVX256MODE
+ (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,xm")
+ (match_operand:AVX256MODE 1 "vector_move_operand" "x,xm,x")
+ (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+ "@
+ vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")])
+
+;; XOP horizontal add/subtract instructions
+(define_insn "xop_phaddbw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (plus:V8HI
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))))]
+ "TARGET_XOP"
+ "vphaddbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (plus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 8)
+ (const_int 12)])))
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)
+ (const_int 9)
+ (const_int 13)]))))
+ (plus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)
+ (const_int 10)
+ (const_int 14)])))
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)
+ (const_int 11)
+ (const_int 15)]))))))]
+ "TARGET_XOP"
+ "vphaddbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)])))))
+ (plus:V2DI
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 8)
+ (const_int 12)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 9)
+ (const_int 13)]))))
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 10)
+ (const_int 14)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 11)
+ (const_int 15)])))))))]
+ "TARGET_XOP"
+ "vphaddbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))]
+ "TARGET_XOP"
+ "vphaddwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)]))))))]
+ "TARGET_XOP"
+ "vphaddwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadddq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_XOP"
+ "vphadddq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (plus:V8HI
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))))]
+ "TARGET_XOP"
+ "vphaddubw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (plus:V4SI
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 8)
+ (const_int 12)])))
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)
+ (const_int 9)
+ (const_int 13)]))))
+ (plus:V4SI
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)
+ (const_int 10)
+ (const_int 14)])))
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)
+ (const_int 11)
+ (const_int 15)]))))))]
+ "TARGET_XOP"
+ "vphaddubd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)])))))
+ (plus:V2DI
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 8)
+ (const_int 12)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 9)
+ (const_int 13)]))))
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 10)
+ (const_int 14)])))
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 11)
+ (const_int 15)])))))))]
+ "TARGET_XOP"
+ "vphaddubq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))]
+ "TARGET_XOP"
+ "vphadduwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)]))))))]
+ "TARGET_XOP"
+ "vphadduwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddudq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_XOP"
+ "vphaddudq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubbw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (minus:V8HI
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))))]
+ "TARGET_XOP"
+ "vphsubbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (minus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))]
+ "TARGET_XOP"
+ "vphsubwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (minus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_XOP"
+ "vphsubdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+;; XOP permute instructions
+(define_insn "xop_pperm"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,xm")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
+ UNSPEC_XOP_PERMUTE))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+ "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+;; XOP pack instructions that combine two vectors into a smaller vector
+(define_insn "xop_pperm_pack_v2di_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (vec_concat:V4SI
+ (truncate:V2SI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,xm"))
+ (truncate:V2SI
+ (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+ "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v4si_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (match_operand:V4SI 1 "nonimmediate_operand" "x,x,xm"))
+ (truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+ "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v8hi_v16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
+ (vec_concat:V16QI
+ (truncate:V8QI
+ (match_operand:V8HI 1 "nonimmediate_operand" "x,x,xm"))
+ (truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
+ "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+;; XOP packed rotate instructions
+(define_expand "rotl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+ (rotate:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand")))]
+ "TARGET_XOP"
+{
+ /* If we were given a scalar, convert it to parallel */
+ if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+ {
+ rtvec vs = rtvec_alloc (<ssescalarnum>);
+ rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+ rtx reg = gen_reg_rtx (<MODE>mode);
+ rtx op2 = operands[2];
+ int i;
+
+ if (GET_MODE (op2) != <ssescalarmode>mode)
+ {
+ op2 = gen_reg_rtx (<ssescalarmode>mode);
+ convert_move (op2, operands[2], false);
+ }
+
+ for (i = 0; i < <ssescalarnum>; i++)
+ RTVEC_ELT (vs, i) = op2;
+
+ emit_insn (gen_vec_init<mode> (reg, par));
+ emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+ DONE;
+ }
+})
+
+(define_expand "rotr<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+ (rotatert:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand")))]
+ "TARGET_XOP"
+{
+ /* If we were given a scalar, convert it to parallel */
+ if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+ {
+ rtvec vs = rtvec_alloc (<ssescalarnum>);
+ rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ rtx reg = gen_reg_rtx (<MODE>mode);
+ rtx op2 = operands[2];
+ int i;
+
+ if (GET_MODE (op2) != <ssescalarmode>mode)
+ {
+ op2 = gen_reg_rtx (<ssescalarmode>mode);
+ convert_move (op2, operands[2], false);
+ }
+
+ for (i = 0; i < <ssescalarnum>; i++)
+ RTVEC_ELT (vs, i) = op2;
+
+ emit_insn (gen_vec_init<mode> (reg, par));
+ emit_insn (gen_neg<mode>2 (neg, reg));
+ emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
+ DONE;
+ }
+})
+
+(define_insn "xop_rotl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (rotate:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+ "TARGET_XOP"
+ "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_rotr<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (rotatert:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+ "TARGET_XOP"
+{
+ operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+ return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "vrotr<mode>3"
+ [(match_operand:SSEMODE1248 0 "register_operand" "")
+ (match_operand:SSEMODE1248 1 "register_operand" "")
+ (match_operand:SSEMODE1248 2 "register_operand" "")]
+ "TARGET_XOP"
+{
+ rtx reg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (reg, operands[2]));
+ emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+(define_expand "vrotl<mode>3"
+ [(match_operand:SSEMODE1248 0 "register_operand" "")
+ (match_operand:SSEMODE1248 1 "register_operand" "")
+ (match_operand:SSEMODE1248 2 "register_operand" "")]
+ "TARGET_XOP"
+{
+ emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "xop_vrotl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+ (if_then_else:SSEMODE1248
+ (ge:SSEMODE1248
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+ (const_int 0))
+ (rotate:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+ (match_dup 2))
+ (rotatert:SSEMODE1248
+ (match_dup 1)
+ (neg:SSEMODE1248 (match_dup 2)))))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+ "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "0")
+ (set_attr "prefix_extra" "2")
+ (set_attr "mode" "TI")])
+
+;; XOP packed shift instructions.
+;; FIXME: add V2DI back in
+(define_expand "vlshr<mode>3"
+ [(match_operand:SSEMODE124 0 "register_operand" "")
+ (match_operand:SSEMODE124 1 "register_operand" "")
+ (match_operand:SSEMODE124 2 "register_operand" "")]
+ "TARGET_XOP"
+{
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
+ DONE;
+})
+
+(define_expand "vashr<mode>3"
+ [(match_operand:SSEMODE124 0 "register_operand" "")
+ (match_operand:SSEMODE124 1 "register_operand" "")
+ (match_operand:SSEMODE124 2 "register_operand" "")]
+ "TARGET_XOP"
+{
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
+ DONE;
+})
+
+(define_expand "vashl<mode>3"
+ [(match_operand:SSEMODE124 0 "register_operand" "")
+ (match_operand:SSEMODE124 1 "register_operand" "")
+ (match_operand:SSEMODE124 2 "register_operand" "")]
+ "TARGET_XOP"
+{
+ emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "xop_ashl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+ (if_then_else:SSEMODE1248
+ (ge:SSEMODE1248
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+ (const_int 0))
+ (ashift:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+ (match_dup 2))
+ (ashiftrt:SSEMODE1248
+ (match_dup 1)
+ (neg:SSEMODE1248 (match_dup 2)))))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+ "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "0")
+ (set_attr "prefix_extra" "2")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_lshl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+ (if_then_else:SSEMODE1248
+ (ge:SSEMODE1248
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+ (const_int 0))
+ (ashift:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+ (match_dup 2))
+ (lshiftrt:SSEMODE1248
+ (match_dup 1)
+ (neg:SSEMODE1248 (match_dup 2)))))]
+ "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
+ "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "0")
+ (set_attr "prefix_extra" "2")
+ (set_attr "mode" "TI")])
+
+;; SSE2 doesn't have some shift varients, so define versions for XOP
+(define_expand "ashlv16qi3"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")]
+ "TARGET_XOP"
+{
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ int i;
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = operands[2];
+
+ emit_insn (gen_vec_initv16qi (reg, par));
+ emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+(define_expand "lshlv16qi3"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")]
+ "TARGET_XOP"
+{
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ int i;
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = operands[2];
+
+ emit_insn (gen_vec_initv16qi (reg, par));
+ emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+(define_expand "ashrv16qi3"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")]
+ "TARGET_XOP"
+{
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ int i;
+ rtx ele = ((CONST_INT_P (operands[2]))
+ ? GEN_INT (- INTVAL (operands[2]))
+ : operands[2]);
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = ele;
+
+ emit_insn (gen_vec_initv16qi (reg, par));
+
+ if (!CONST_INT_P (operands[2]))
+ {
+ rtx neg = gen_reg_rtx (V16QImode);
+ emit_insn (gen_negv16qi2 (neg, reg));
+ emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
+ }
+ else
+ emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+
+ DONE;
+})
+
+(define_expand "ashrv2di3"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonmemory_operand" "")]
+ "TARGET_XOP"
+{
+ rtvec vs = rtvec_alloc (2);
+ rtx par = gen_rtx_PARALLEL (V2DImode, vs);
+ rtx reg = gen_reg_rtx (V2DImode);
+ rtx ele;
+
+ if (CONST_INT_P (operands[2]))
+ ele = GEN_INT (- INTVAL (operands[2]));
+ else if (GET_MODE (operands[2]) != DImode)
+ {
+ rtx move = gen_reg_rtx (DImode);
+ ele = gen_reg_rtx (DImode);
+ convert_move (move, operands[2], false);
+ emit_insn (gen_negdi2 (ele, move));
+ }
+ else
+ {
+ ele = gen_reg_rtx (DImode);
+ emit_insn (gen_negdi2 (ele, operands[2]));
+ }
+
+ RTVEC_ELT (vs, 0) = ele;
+ RTVEC_ELT (vs, 1) = ele;
+ emit_insn (gen_vec_initv2di (reg, par));
+ emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+;; XOP FRCZ support
+;; parallel insns
+(define_insn "xop_frcz<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FRCZ))]
+ "TARGET_XOP"
+ "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt1")
+ (set_attr "mode" "<MODE>")])
+
+;; scalar insns
+(define_insn "xop_vmfrcz<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_FRCZ)
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_XOP"
+ "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt1")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_frcz<mode>2256"
+ [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
+ (unspec:FMA4MODEF4
+ [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FRCZ))]
+ "TARGET_XOP"
+ "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt1")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_maskcmp<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
+ [(match_operand:SSEMODE1248 2 "register_operand" "x")
+ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_XOP"
+ "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "prefix_data16" "0")
+ (set_attr "prefix_rep" "0")
+ (set_attr "prefix_extra" "2")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "xop_maskcmp_uns<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+ [(match_operand:SSEMODE1248 2 "register_operand" "x")
+ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_XOP"
+ "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_data16" "0")
+ (set_attr "prefix_rep" "0")
+ (set_attr "prefix_extra" "2")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
+;; and pcomneu* not to be converted to the signed ones in case somebody needs
+;; the exact instruction generated for the intrinsic.
+(define_insn "xop_maskcmp_uns2<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (unspec:SSEMODE1248
+ [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+ [(match_operand:SSEMODE1248 2 "register_operand" "x")
+ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
+ UNSPEC_XOP_UNSIGNED_CMP))]
+ "TARGET_XOP"
+ "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_data16" "0")
+ (set_attr "prefix_extra" "2")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+;; Pcomtrue and pcomfalse support. These are useless instructions, but are
+;; being added here to be complete.
+(define_insn "xop_pcom_tf<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (unspec:SSEMODE1248
+ [(match_operand:SSEMODE1248 1 "register_operand" "x")
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_int_operand" "n")]
+ UNSPEC_XOP_TRUEFALSE))]
+ "TARGET_XOP"
+{
+ return ((INTVAL (operands[3]) != 0)
+ ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_data16" "0")
+ (set_attr "prefix_extra" "2")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "*avx_aesenc"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 7bc47f8f15d..ac7e21fd6f7 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -54,10 +54,6 @@
#include <smmintrin.h>
#endif
-#ifdef __FMA4__
-#include <fma4intrin.h>
-#endif
-
#if defined (__AES__) || defined (__PCLMUL__)
#include <wmmintrin.h>
#endif
@@ -69,4 +65,16 @@
#include <mm3dnow.h>
#endif
+#ifdef __FMA4__
+#include <fma4intrin.h>
+#endif
+
+#ifdef __XOP__
+#include <xopintrin.h>
+#endif
+
+#ifdef __LWP__
+#include <lwpintrin.h>
+#endif
+
#endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h
new file mode 100644
index 00000000000..803417a6a45
--- /dev/null
+++ b/gcc/config/i386/xopintrin.h
@@ -0,0 +1,771 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _XOPMMINTRIN_H_INCLUDED
+#define _XOPMMINTRIN_H_INCLUDED
+
+#ifndef __XOP__
+# error "XOP instruction set not enabled"
+#else
+
+#include <fma4intrin.h>
+
+/* Integer multiply/add intructions. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+/* Packed Integer Horizontal Add and Subtract */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubw_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubd_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubq_epi32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
+}
+
+/* Vector conditional move and permute */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
+/* Packed Integer Rotates and Shifts
+ Rotates - Non-Immediate form */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Rotates - Immediate form */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi8(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi16(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi32(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi64(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
+}
+#else
+#define _mm_roti_epi8(A, N) \
+ ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+ ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+ ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+ ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
+#endif
+
+/* Shifts */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
+}
+
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Compare and Predicate Generation
+ pcom (integer, unsinged bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, unsinged words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, unsinged double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, unsinged quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
+}
+
+/*pcom (integer, signed bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, signed words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, signed double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, signed quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
+}
+
+/* FRCZ */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
+}
+
+#endif /* __XOP__ */
+
+#endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
index c0914d3be02..78e3115c291 100644
--- a/gcc/config/m32c/m32c.h
+++ b/gcc/config/m32c/m32c.h
@@ -560,7 +560,6 @@ typedef struct m32c_cumulative_args
#define HAVE_PRE_DECREMENT 1
#define HAVE_POST_INCREMENT 1
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
#define MAX_REGS_PER_ADDRESS 1
/* This is passed to the macros below, so that they can be implemented
diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h
index ee0f9f67fca..278ba15c4fa 100644
--- a/gcc/config/m68hc11/m68hc11.h
+++ b/gcc/config/m68hc11/m68hc11.h
@@ -1108,9 +1108,6 @@ extern unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER];
&& (GET_CODE (XEXP (operand, 0)) == POST_INC) \
&& (SP_REG_P (XEXP (XEXP (operand, 0), 0))))
-/* 1 if X is an rtx for a constant that is a valid address. */
-#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X))
-
/* Maximum number of registers that can appear in a valid memory address */
#define MAX_REGS_PER_ADDRESS 2
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
index 0862936b1b4..8db98fc4f46 100644
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -1399,6 +1399,30 @@ flags_in_68881 (void)
return cc_status.flags & CC_IN_68881;
}
+/* Return true if PARALLEL contains register REGNO. */
+static bool
+m68k_reg_present_p (const_rtx parallel, unsigned int regno)
+{
+ int i;
+
+ if (REG_P (parallel) && REGNO (parallel) == regno)
+ return true;
+
+ if (GET_CODE (parallel) != PARALLEL)
+ return false;
+
+ for (i = 0; i < XVECLEN (parallel, 0); ++i)
+ {
+ const_rtx x;
+
+ x = XEXP (XVECEXP (parallel, 0, i), 0);
+ if (REG_P (x) && REGNO (x) == regno)
+ return true;
+ }
+
+ return false;
+}
+
/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL_P. */
static bool
@@ -1411,6 +1435,26 @@ m68k_ok_for_sibcall_p (tree decl, tree exp)
if (CALL_EXPR_STATIC_CHAIN (exp))
return false;
+ if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+ {
+ /* Check that the return value locations are the same. For
+ example that we aren't returning a value from the sibling in
+ a D0 register but then need to transfer it to a A0 register. */
+ rtx cfun_value;
+ rtx call_value;
+
+ cfun_value = FUNCTION_VALUE (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl);
+ call_value = FUNCTION_VALUE (TREE_TYPE (exp), decl);
+
+ /* Check that the values are equal or that the result the callee
+ function returns is superset of what the current function returns. */
+ if (!(rtx_equal_p (cfun_value, call_value)
+ || (REG_P (cfun_value)
+ && m68k_reg_present_p (call_value, REGNO (cfun_value)))))
+ return false;
+ }
+
kind = m68k_get_function_kind (current_function_decl);
if (kind == m68k_fk_normal_function)
/* We can always sibcall from a normal function, because it's
@@ -5188,6 +5232,9 @@ m68k_libcall_value (enum machine_mode mode)
return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG);
}
+/* Location in which function value is returned.
+ NOTE: Due to differences in ABIs, don't call this function directly,
+ use FUNCTION_VALUE instead. */
rtx
m68k_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
{
diff --git a/gcc/config/mep/mep.h b/gcc/config/mep/mep.h
index 8b00a444ce2..9d286e33b94 100644
--- a/gcc/config/mep/mep.h
+++ b/gcc/config/mep/mep.h
@@ -567,8 +567,6 @@ typedef struct
#define TRAMPOLINE_SIZE 20
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
#define MAX_REGS_PER_ADDRESS 1
#ifdef REG_OK_STRICT
diff --git a/gcc/config/mips/iris.h b/gcc/config/mips/iris.h
index fce82174e66..373691ee6e1 100644
--- a/gcc/config/mips/iris.h
+++ b/gcc/config/mips/iris.h
@@ -63,9 +63,6 @@ along with GCC; see the file COPYING3. If not see
#undef ASM_FINISH_DECLARE_OBJECT
#define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object
-/* The linker needs a space after "-o". */
-#define SWITCHES_NEED_SPACES "o"
-
/* Specify wchar_t types. */
#undef WCHAR_TYPE
#define WCHAR_TYPE (Pmode == DImode ? "int" : "long int")
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 5005bf7f0fb..76fc37bd479 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -1930,7 +1930,7 @@
(set (match_dup 0) (match_dup 5))
(set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI))
- ;; Zero-extend OP4.
+ ;; Zero-extend OP0.
(set (match_dup 0)
(ashift:DI (match_dup 0)
(const_int 32)))
@@ -1938,7 +1938,7 @@
(lshiftrt:DI (match_dup 0)
(const_int 32)))
- ;; Shift OP0 into place.
+ ;; Shift OP4 into place.
(set (match_dup 4)
(ashift:DI (match_dup 4)
(const_int 32)))
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
index bdbc948ac12..c732aa07180 100644
--- a/gcc/config/mn10300/mn10300.h
+++ b/gcc/config/mn10300/mn10300.h
@@ -600,10 +600,6 @@ struct cum_arg {int nbytes; };
? gen_rtx_MEM (Pmode, arg_pointer_rtx) \
: (rtx) 0)
-/* 1 if X is an rtx for a constant that is a valid address. */
-
-#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X) && GET_CODE (X) != CONST_DOUBLE)
-
/* Maximum number of registers that can appear in a valid memory address. */
#define MAX_REGS_PER_ADDRESS 2
diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h
index f1b77eaf0c2..384bce4a986 100644
--- a/gcc/config/moxie/moxie.h
+++ b/gcc/config/moxie/moxie.h
@@ -475,10 +475,6 @@ enum reg_class
an immediate operand on the target machine. */
#define LEGITIMATE_CONSTANT_P(X) 1
-/* A C expression that is 1 if the RTX X is a constant which is a
- valid address. */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
-
/* A number, the maximum number of registers that can appear in a
valid memory address. */
#define MAX_REGS_PER_ADDRESS 1
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
index 8997612ba5a..fe8c9e8aed3 100644
--- a/gcc/config/pdp11/pdp11.h
+++ b/gcc/config/pdp11/pdp11.h
@@ -594,10 +594,6 @@ extern int may_call_alloca;
#define MAX_REGS_PER_ADDRESS 1
-/* Recognize any constant value that is a valid address. */
-
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
/* Nonzero if the constant value X is a legitimate general operand.
It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h
index 44559f22333..4d0c96278e0 100644
--- a/gcc/config/picochip/picochip.h
+++ b/gcc/config/picochip/picochip.h
@@ -471,8 +471,6 @@ extern const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER];
/* Addressing Modes */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X)
-
#define MAX_REGS_PER_ADDRESS 1
/* Legitimize reload address tries machine dependent means of
diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md
index f15b586afb5..52bf7df3621 100644
--- a/gcc/config/rx/constraints.md
+++ b/gcc/config/rx/constraints.md
@@ -55,7 +55,7 @@
;; This constraint is used by the SUBSI3 pattern because the
;; RX SUB instruction can only take a 4-bit unsigned integer
-;; value.
+;; value. Also used by the MVTIPL instruction.
(define_constraint "Uint04"
"@internal An unsigned 4-bit immediate value"
(and (match_code "const_int")
diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md
index 75cf8ebaed8..d7a363ebb88 100644
--- a/gcc/config/rx/predicates.md
+++ b/gcc/config/rx/predicates.md
@@ -117,16 +117,22 @@
/* Check that the next element is the first push. */
element = XVECEXP (op, 0, 1);
if ( ! SET_P (element)
+ || ! REG_P (SET_SRC (element))
+ || GET_MODE (SET_SRC (element)) != SImode
|| ! MEM_P (SET_DEST (element))
- || ! REG_P (XEXP (SET_DEST (element), 0))
- || REGNO (XEXP (SET_DEST (element), 0)) != SP_REG
- || ! REG_P (SET_SRC (element)))
+ || GET_MODE (SET_DEST (element)) != SImode
+ || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+ || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+ || REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+ || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+ || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+ != GET_MODE_SIZE (SImode))
return false;
src_regno = REGNO (SET_SRC (element));
/* Check that the remaining elements use SP-<disp>
- addressing and incremental register numbers. */
+ addressing and decreasing register numbers. */
for (i = 2; i < count; i++)
{
element = XVECEXP (op, 0, i);
@@ -134,7 +140,7 @@
if ( ! SET_P (element)
|| ! REG_P (SET_SRC (element))
|| GET_MODE (SET_SRC (element)) != SImode
- || REGNO (SET_SRC (element)) != src_regno + (i - 1)
+ || REGNO (SET_SRC (element)) != src_regno - (i - 1)
|| ! MEM_P (SET_DEST (element))
|| GET_MODE (SET_DEST (element)) != SImode
|| GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
@@ -142,7 +148,7 @@
|| REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
|| ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
|| INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
- != (i - 1) * GET_MODE_SIZE (SImode))
+ != i * GET_MODE_SIZE (SImode))
return false;
}
return true;
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index cf2b098e83c..885f52581de 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -51,6 +51,8 @@
#include "target-def.h"
#include "langhooks.h"
+enum rx_cpu_types rx_cpu_type = RX600;
+
/* Return true if OP is a reference to an object in a small data area. */
static bool
@@ -249,7 +251,6 @@ rx_is_mode_dependent_addr (rtx addr)
}
}
-
/* A C compound statement to output to stdio stream FILE the
assembler syntax for an instruction operand that is a memory
reference whose address is ADDR. */
@@ -445,8 +446,13 @@ rx_print_operand (FILE * file, rtx op, int letter)
fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
else if (CONST_INT_P (op))
{
+ HOST_WIDE_INT v = INTVAL (op);
+
fprintf (file, "#");
- rx_print_integer (file, INTVAL (op) >> 32);
+ /* Trickery to avoid problems with shifting 32 bits at a time. */
+ v = v >> 16;
+ v = v >> 16;
+ rx_print_integer (file, v);
}
else
{
@@ -840,22 +846,20 @@ has_func_attr (const_tree decl, const char * func_attr)
return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE;
}
-/* Returns true if the provided function has
- the "[fast_]interrupt" attribute. */
+/* Returns true if the provided function has the "fast_interrupt" attribute. */
static inline bool
is_fast_interrupt_func (const_tree decl)
{
- return has_func_attr (decl, "interrupt")
- || has_func_attr (decl, "fast_interrupt") ;
+ return has_func_attr (decl, "fast_interrupt");
}
-/* Returns true if the provided function has the "exception" attribute. */
+/* Returns true if the provided function has the "interrupt" attribute. */
static inline bool
-is_exception_func (const_tree decl)
+is_interrupt_func (const_tree decl)
{
- return has_func_attr (decl, "exception");
+ return has_func_attr (decl, "interrupt");
}
/* Returns true if the provided function has the "naked" attribute. */
@@ -945,8 +949,8 @@ rx_set_current_function (tree fndecl)
{
/* Remember the last target of rx_set_current_function. */
static tree rx_previous_fndecl;
- bool prev_was_interrupt;
- bool current_is_interrupt;
+ bool prev_was_fast_interrupt;
+ bool current_is_fast_interrupt;
/* Only change the context if the function changes. This hook is called
several times in the course of compiling a function, and we don't want
@@ -954,18 +958,19 @@ rx_set_current_function (tree fndecl)
if (fndecl == rx_previous_fndecl)
return;
- prev_was_interrupt
+ prev_was_fast_interrupt
= rx_previous_fndecl
? is_fast_interrupt_func (rx_previous_fndecl) : false;
- current_is_interrupt
+
+ current_is_fast_interrupt
= fndecl ? is_fast_interrupt_func (fndecl) : false;
- if (prev_was_interrupt != current_is_interrupt)
+ if (prev_was_fast_interrupt != current_is_fast_interrupt)
{
- use_fixed_regs = current_is_interrupt;
+ use_fixed_regs = current_is_fast_interrupt;
target_reinit ();
}
-
+
rx_previous_fndecl = fndecl;
}
@@ -1057,8 +1062,8 @@ rx_get_stack_layout (unsigned int * lowest,
if (df_regs_ever_live_p (reg)
&& (! call_used_regs[reg]
/* Even call clobbered registered must
- be pushed inside exception handlers. */
- || is_exception_func (NULL_TREE)))
+ be pushed inside interrupt handlers. */
+ || is_interrupt_func (NULL_TREE)))
{
if (low == 0)
low = reg;
@@ -1142,9 +1147,8 @@ rx_emit_stack_pushm (rtx * operands)
gcc_assert (REG_P (first_push));
asm_fprintf (asm_out_file, "\tpushm\t%s-%s\n",
- reg_names [REGNO (first_push)],
- reg_names [REGNO (first_push) + last_reg]);
-
+ reg_names [REGNO (first_push) - last_reg],
+ reg_names [REGNO (first_push)]);
}
/* Generate a PARALLEL that will pass the rx_store_multiple_vector predicate. */
@@ -1167,14 +1171,30 @@ gen_rx_store_vector (unsigned int low, unsigned int high)
XVECEXP (vector, 0, i + 1) =
gen_rtx_SET (SImode,
gen_rtx_MEM (SImode,
- i == 0 ? stack_pointer_rtx
- : gen_rtx_MINUS (SImode, stack_pointer_rtx,
- GEN_INT (i * UNITS_PER_WORD))),
- gen_rtx_REG (SImode, low + i));
-
+ gen_rtx_MINUS (SImode, stack_pointer_rtx,
+ GEN_INT ((i + 1) * UNITS_PER_WORD))),
+ gen_rtx_REG (SImode, high - i));
return vector;
}
+/* Mark INSN as being frame related. If it is a PARALLEL
+ then mark each element as being frame related as well. */
+
+static void
+mark_frame_related (rtx insn)
+{
+ RTX_FRAME_RELATED_P (insn) = 1;
+ insn = PATTERN (insn);
+
+ if (GET_CODE (insn) == PARALLEL)
+ {
+ unsigned int i;
+
+ for (i = 0; i < XVECLEN (insn, 0); i++)
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, i)) = 1;
+ }
+}
+
void
rx_expand_prologue (void)
{
@@ -1183,6 +1203,7 @@ rx_expand_prologue (void)
unsigned int mask;
unsigned int low;
unsigned int high;
+ unsigned int reg;
rtx insn;
/* Naked functions use their own, programmer provided prologues. */
@@ -1196,14 +1217,12 @@ rx_expand_prologue (void)
/* If we use any of the callee-saved registers, save them now. */
if (mask)
{
- unsigned int reg;
-
/* Push registers in reverse order. */
for (reg = FIRST_PSEUDO_REGISTER; reg --;)
if (mask & (1 << reg))
{
insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg)));
- RTX_FRAME_RELATED_P (insn) = 1;
+ mark_frame_related (insn);
}
}
else if (low)
@@ -1214,7 +1233,57 @@ rx_expand_prologue (void)
insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1)
* UNITS_PER_WORD),
gen_rx_store_vector (low, high)));
- RTX_FRAME_RELATED_P (insn) = 1;
+ mark_frame_related (insn);
+ }
+
+ if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER)
+ {
+ unsigned int acc_high, acc_low;
+
+ /* Interrupt handlers have to preserve the accumulator
+ register if so requested by the user. Use the first
+ two pushed register as intermediaries. */
+ if (mask)
+ {
+ acc_low = acc_high = 0;
+
+ for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++)
+ if (mask & (1 << reg))
+ {
+ if (acc_low == 0)
+ acc_low = reg;
+ else
+ {
+ acc_high = reg;
+ break;
+ }
+ }
+
+ /* We have assumed that there are at least two registers pushed... */
+ gcc_assert (acc_high != 0);
+
+ /* Note - the bottom 16 bits of the accumulator are inaccessible.
+ We just assume that they are zero. */
+ emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+ emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+ emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_low)));
+ emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_high)));
+ }
+ else
+ {
+ acc_low = low;
+ acc_high = low + 1;
+
+ /* We have assumed that there are at least two registers pushed... */
+ gcc_assert (acc_high <= high);
+
+ emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+ emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+ emit_insn (gen_stack_pushm (GEN_INT (2 * UNITS_PER_WORD),
+ gen_rx_store_vector (acc_low, acc_high)));
+ }
+
+ frame_size += 2 * UNITS_PER_WORD;
}
/* If needed, set up the frame pointer. */
@@ -1270,8 +1339,8 @@ rx_output_function_prologue (FILE * file,
if (is_fast_interrupt_func (NULL_TREE))
asm_fprintf (file, "\t; Note: Fast Interrupt Handler\n");
- if (is_exception_func (NULL_TREE))
- asm_fprintf (file, "\t; Note: Exception Handler\n");
+ if (is_interrupt_func (NULL_TREE))
+ asm_fprintf (file, "\t; Note: Interrupt Handler\n");
if (is_naked_func (NULL_TREE))
asm_fprintf (file, "\t; Note: Naked Function\n");
@@ -1382,6 +1451,7 @@ rx_expand_epilogue (bool is_sibcall)
unsigned int stack_size;
unsigned int register_mask;
unsigned int regs_size;
+ unsigned int reg;
unsigned HOST_WIDE_INT total_size;
if (is_naked_func (NULL_TREE))
@@ -1407,14 +1477,14 @@ rx_expand_epilogue (bool is_sibcall)
their caller. Instead they branch to their sibling and allow their
return instruction to return to this function's parent.
- - Fast interrupt and exception handling functions have to use special
+ - Fast and normal interrupt handling functions have to use special
return instructions.
- Functions where we have pushed a fragmented set of registers into the
call-save area must have the same set of registers popped. */
if (is_sibcall
|| is_fast_interrupt_func (NULL_TREE)
- || is_exception_func (NULL_TREE)
+ || is_interrupt_func (NULL_TREE)
|| register_mask)
{
/* Cannot use the special instructions - deconstruct by hand. */
@@ -1422,10 +1492,47 @@ rx_expand_epilogue (bool is_sibcall)
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (total_size)));
- if (register_mask)
+ if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER)
{
- unsigned int reg;
+ unsigned int acc_low, acc_high;
+
+ /* Reverse the saving of the accumulator register onto the stack.
+ Note we must adjust the saved "low" accumulator value as it
+ is really the middle 32-bits of the accumulator. */
+ if (register_mask)
+ {
+ acc_low = acc_high = 0;
+ for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++)
+ if (register_mask & (1 << reg))
+ {
+ if (acc_low == 0)
+ acc_low = reg;
+ else
+ {
+ acc_high = reg;
+ break;
+ }
+ }
+ emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_high)));
+ emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_low)));
+ }
+ else
+ {
+ acc_low = low;
+ acc_high = low + 1;
+ emit_insn (gen_stack_popm (GEN_INT (2 * UNITS_PER_WORD),
+ gen_rx_popm_vector (acc_low, acc_high)));
+ }
+
+ emit_insn (gen_ashlsi3 (gen_rtx_REG (SImode, acc_low),
+ gen_rtx_REG (SImode, acc_low),
+ GEN_INT (16)));
+ emit_insn (gen_mvtaclo (gen_rtx_REG (SImode, acc_low)));
+ emit_insn (gen_mvtachi (gen_rtx_REG (SImode, acc_high)));
+ }
+ if (register_mask)
+ {
for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg ++)
if (register_mask & (1 << reg))
emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg)));
@@ -1441,7 +1548,7 @@ rx_expand_epilogue (bool is_sibcall)
if (is_fast_interrupt_func (NULL_TREE))
emit_jump_insn (gen_fast_interrupt_return ());
- else if (is_exception_func (NULL_TREE))
+ else if (is_interrupt_func (NULL_TREE))
emit_jump_insn (gen_exception_return ());
else if (! is_sibcall)
emit_jump_insn (gen_simple_return ());
@@ -1670,6 +1777,7 @@ enum rx_builtin
RX_BUILTIN_MVTACHI,
RX_BUILTIN_MVTACLO,
RX_BUILTIN_MVTC,
+ RX_BUILTIN_MVTIPL,
RX_BUILTIN_RACW,
RX_BUILTIN_REVW,
RX_BUILTIN_RMPA,
@@ -1725,6 +1833,7 @@ rx_init_builtins (void)
ADD_RX_BUILTIN1 (RMPA, "rmpa", void, void);
ADD_RX_BUILTIN1 (MVFC, "mvfc", intSI, integer);
ADD_RX_BUILTIN2 (MVTC, "mvtc", void, integer, integer);
+ ADD_RX_BUILTIN1 (MVTIPL, "mvtipl", void, integer);
ADD_RX_BUILTIN1 (RACW, "racw", void, integer);
ADD_RX_BUILTIN1 (ROUND, "round", intSI, float);
ADD_RX_BUILTIN1 (REVW, "revw", intSI, intSI);
@@ -1733,20 +1842,6 @@ rx_init_builtins (void)
}
static rtx
-rx_expand_builtin_stz (rtx arg, rtx target, rtx (* gen_func)(rtx, rtx))
-{
- if (! CONST_INT_P (arg))
- return NULL_RTX;
-
- if (target == NULL_RTX || ! REG_P (target))
- target = gen_reg_rtx (SImode);
-
- emit_insn (gen_func (target, arg));
-
- return target;
-}
-
-static rtx
rx_expand_void_builtin_1_arg (rtx arg, rtx (* gen_func)(rtx), bool reg)
{
if (reg && ! REG_P (arg))
@@ -1791,6 +1886,21 @@ rx_expand_builtin_mvfc (tree t_arg, rtx target)
}
static rtx
+rx_expand_builtin_mvtipl (rtx arg)
+{
+ /* The RX610 does not support the MVTIPL instruction. */
+ if (rx_cpu_type == RX610)
+ return NULL_RTX;
+
+ if (! CONST_INT_P (arg) || ! IN_RANGE (arg, 0, (1 << 4) - 1))
+ return NULL_RTX;
+
+ emit_insn (gen_mvtipl (arg));
+
+ return NULL_RTX;
+}
+
+static rtx
rx_expand_builtin_mac (tree exp, rtx (* gen_func)(rtx, rtx))
{
rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
@@ -1887,6 +1997,7 @@ rx_expand_builtin (tree exp,
case RX_BUILTIN_RMPA: emit_insn (gen_rmpa ()); return NULL_RTX;
case RX_BUILTIN_MVFC: return rx_expand_builtin_mvfc (arg, target);
case RX_BUILTIN_MVTC: return rx_expand_builtin_mvtc (exp);
+ case RX_BUILTIN_MVTIPL: return rx_expand_builtin_mvtipl (op);
case RX_BUILTIN_RACW: return rx_expand_void_builtin_1_arg
(op, gen_racw, false);
case RX_BUILTIN_ROUND: return rx_expand_builtin_round (op, target);
@@ -1945,7 +2056,7 @@ rx_elf_asm_destructor (rtx symbol, int priority)
rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */false);
}
-/* Check "interrupt", "exception" and "naked" attributes. */
+/* Check "fast_interrupt", "interrupt" and "naked" attributes. */
static tree
rx_handle_func_attribute (tree * node,
@@ -1975,9 +2086,8 @@ rx_handle_func_attribute (tree * node,
const struct attribute_spec rx_attribute_table[] =
{
/* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler. */
- { "interrupt", 0, 0, true, false, false, rx_handle_func_attribute },
{ "fast_interrupt", 0, 0, true, false, false, rx_handle_func_attribute },
- { "exception", 0, 0, true, false, false, rx_handle_func_attribute },
+ { "interrupt", 0, 0, true, false, false, rx_handle_func_attribute },
{ "naked", 0, 0, true, false, false, rx_handle_func_attribute },
{ NULL, 0, 0, false, false, false, NULL }
};
@@ -1993,7 +2103,7 @@ static bool
rx_func_attr_inlinable (const_tree decl)
{
return ! is_fast_interrupt_func (decl)
- && ! is_exception_func (decl)
+ && ! is_interrupt_func (decl)
&& ! is_naked_func (decl);
}
@@ -2115,6 +2225,20 @@ rx_is_legitimate_constant (rtx x)
( 1 << (rx_max_constant_size * 8)));
}
+/* This is a tri-state variable. The default value of 0 means that the user
+ has specified neither -mfpu nor -mnofpu on the command line. In this case
+ the selection of RX FPU instructions is entirely based upon the size of
+ the floating point object and whether unsafe math optimizations were
+ enabled. If 32-bit doubles have been enabled then both floats and doubles
+ can make use of FPU instructions, otherwise only floats may do so.
+
+ If the value is 1 then the user has specified -mfpu and the FPU
+ instructions should be used. Unsafe math optimizations will automatically
+ be enabled and doubles set to 32-bits. If the value is -1 then -mnofpu
+ has been specified and FPU instructions will not be used, even if unsafe
+ math optimizations have been enabled. */
+int rx_enable_fpu = 0;
+
/* Extra processing for target specific command line options. */
static bool
@@ -2122,6 +2246,27 @@ rx_handle_option (size_t code, const char * arg ATTRIBUTE_UNUSED, int value)
{
switch (code)
{
+ /* -mfpu enables the use of RX FPU instructions. This implies the use
+ of 32-bit doubles and also the enabling of fast math optimizations.
+ (Since the RX FPU instructions are not IEEE compliant). The -mnofpu
+ option disables the use of RX FPU instructions, but does not make
+ place any constraints on the size of doubles or the use of fast math
+ optimizations.
+
+ The selection of 32-bit vs 64-bit doubles is handled by the setting
+ of the 32BIT_DOUBLES mask in the rx.opt file. Enabling fast math
+ optimizations is performed in OVERRIDE_OPTIONS since if it was done
+ here it could be overridden by a -fno-fast-math option specified
+ *earlier* on the command line. (Target specific options are
+ processed before generic ones). */
+ case OPT_fpu:
+ rx_enable_fpu = 1;
+ break;
+
+ case OPT_nofpu:
+ rx_enable_fpu = -1;
+ break;
+
case OPT_mint_register_:
switch (value)
{
@@ -2145,12 +2290,21 @@ rx_handle_option (size_t code, const char * arg ATTRIBUTE_UNUSED, int value)
break;
case OPT_mmax_constant_size_:
- /* Make sure that the the -mmax-constant_size option is in range. */
+ /* Make sure that the -mmax-constant_size option is in range. */
return IN_RANGE (value, 0, 4);
+ case OPT_mcpu_:
+ case OPT_patch_:
+ if (strcasecmp (arg, "RX610") == 0)
+ rx_cpu_type = RX610;
+ /* FIXME: Should we check for non-RX cpu names here ? */
+ break;
+
default:
- return true;
+ break;
}
+
+ return true;
}
static int
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index a01e194910b..bb7cf7f1e3e 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -24,18 +24,24 @@
{ \
builtin_define ("__RX__"); \
builtin_assert ("cpu=RX"); \
- builtin_assert ("machine=RX"); \
+ if (rx_cpu_type == RX610) \
+ builtin_assert ("machine=RX610"); \
+ else \
+ builtin_assert ("machine=RX600"); \
\
if (TARGET_BIG_ENDIAN_DATA) \
builtin_define ("__RX_BIG_ENDIAN__"); \
else \
builtin_define ("__RX_LITTLE_ENDIAN__");\
\
- if (TARGET_64BIT_DOUBLES) \
- builtin_define ("__RX_64BIT_DOUBLES__");\
- else \
+ if (TARGET_32BIT_DOUBLES) \
builtin_define ("__RX_32BIT_DOUBLES__");\
+ else \
+ builtin_define ("__RX_64BIT_DOUBLES__");\
\
+ if (ALLOW_RX_FPU_INSNS) \
+ builtin_define ("__RX_FPU_INSNS__"); \
+ \
if (TARGET_AS100_SYNTAX) \
builtin_define ("__RX_AS100_SYNTAX__"); \
else \
@@ -43,6 +49,17 @@
} \
while (0)
+enum rx_cpu_types
+{
+ RX600,
+ RX610
+};
+
+extern enum rx_cpu_types rx_cpu_type;
+
+#undef CC1_SPEC
+#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}"
+
#undef STARTFILE_SPEC
#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s"
@@ -52,7 +69,8 @@
#undef ASM_SPEC
#define ASM_SPEC "\
%{mbig-endian-data:-mbig-endian-data} \
-%{m64bit-doubles:-m64bit-doubles} \
+%{m32bit-doubles:-m32bit-doubles} \
+%{!m32bit-doubles:-m64bit-doubles} \
%{msmall-data-limit*:-msmall-data-limit} \
%{mrelax:-relax} \
"
@@ -88,16 +106,17 @@
#define LONG_LONG_TYPE_SIZE 64
#define FLOAT_TYPE_SIZE 32
-#define DOUBLE_TYPE_SIZE (TARGET_64BIT_DOUBLES ? 64 : 32)
+#define DOUBLE_TYPE_SIZE (TARGET_32BIT_DOUBLES ? 32 : 64)
#define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE
-#ifdef __RX_64BIT_DOUBLES__
-#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
-#define LIBGCC2_DOUBLE_TYPE_SIZE 64
-#define LIBGCC2_HAS_DF_MODE 1
-#else
+#ifdef __RX_32BIT_DOUBLES__
+#define LIBGCC2_HAS_DF_MODE 0
#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 32
#define LIBGCC2_DOUBLE_TYPE_SIZE 32
+#else
+#define LIBGCC2_HAS_DF_MODE 1
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#define LIBGCC2_DOUBLE_TYPE_SIZE 64
#endif
#define DEFAULT_SIGNED_CHAR 0
@@ -591,7 +610,6 @@ typedef unsigned int CUMULATIVE_ARGS;
#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
rx_print_operand_address (FILE, ADDR)
-
#define CC_NO_CARRY 0400
#define NOTICE_UPDATE_CC(EXP, INSN) rx_notice_update_cc (EXP, INSN)
@@ -614,19 +632,28 @@ extern int rx_float_compare_mode;
#define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \
? DBX_DEBUG : DWARF2_DEBUG)
-#undef CC1_SPEC
-#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}"
+#define INCOMING_FRAME_SP_OFFSET 4
+#define ARG_POINTER_CFA_OFFSET(FNDECL) 4
+#define FRAME_POINTER_CFA_OFFSET(FNDECL) 4
+
+extern int rx_enable_fpu;
/* For some unknown reason LTO compression is not working, at
least on my local system. So set the default compression
- level to none, for now. */
+ level to none, for now.
+
+ For an explanation of rx_flag_no_fpu see rx_handle_option(). */
#define OVERRIDE_OPTIONS \
do \
{ \
if (flag_lto_compression_level == -1) \
flag_lto_compression_level = 0; \
+ \
+ if (rx_enable_fpu == 1) \
+ set_fast_math_flags (true); \
} \
while (0)
/* This macro is used to decide when RX FPU instructions can be used. */
-#define ALLOW_RX_FPU_INSNS flag_unsafe_math_optimizations
+#define ALLOW_RX_FPU_INSNS ((rx_enable_fpu != -1) \
+ && flag_unsafe_math_optimizations)
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 165da4f41a1..360f6235558 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -27,8 +27,8 @@
;; This code iterator is used for sign- and zero- extensions.
(define_mode_iterator small_int_modes [(HI "") (QI "")])
-;; We do not handle DFmode here because by default it is
-;; the same as SFmode, and if -m64bit-doubles is active
+;; We do not handle DFmode here because it is either
+;; the same as SFmode, or if -m64bit-doubles is active
;; then all operations on doubles have to be handled by
;; library functions.
(define_mode_iterator register_modes
@@ -75,15 +75,14 @@
(UNSPEC_BUILTIN_MVTACHI 41)
(UNSPEC_BUILTIN_MVTACLO 42)
(UNSPEC_BUILTIN_MVTC 43)
- (UNSPEC_BUILTIN_MVTCP 44)
- (UNSPEC_BUILTIN_OPEPC 45)
- (UNSPEC_BUILTIN_RACW 46)
- (UNSPEC_BUILTIN_REVW 47)
- (UNSPEC_BUILTIN_RMPA 48)
- (UNSPEC_BUILTIN_ROUND 49)
- (UNSPEC_BUILTIN_SAT 50)
- (UNSPEC_BUILTIN_SETPSW 51)
- (UNSPEC_BUILTIN_WAIT 52)
+ (UNSPEC_BUILTIN_MVTIPL 44)
+ (UNSPEC_BUILTIN_RACW 45)
+ (UNSPEC_BUILTIN_REVW 46)
+ (UNSPEC_BUILTIN_RMPA 47)
+ (UNSPEC_BUILTIN_ROUND 48)
+ (UNSPEC_BUILTIN_SAT 49)
+ (UNSPEC_BUILTIN_SETPSW 50)
+ (UNSPEC_BUILTIN_WAIT 51)
]
)
@@ -1002,10 +1001,8 @@
(set_attr "timings" "11,11,11,11,11,33")
(set_attr "length" "3,4,5,6,7,6")]
)
-
+
;; Floating Point Instructions
-;; These patterns are only enabled with -ffast-math because the RX FPU
-;; cannot handle sub-normal values.
(define_insn "addsf3"
[(set (match_operand:SF 0 "register_operand" "=r,r,r")
@@ -1298,7 +1295,6 @@
[(set_attr "length" "3,6")
(set_attr "timings" "22")]
)
-
;; Block move functions.
@@ -1580,8 +1576,8 @@
;; Move to Accumulator (high)
(define_insn "mvtachi"
- [(unspec:SI [(match_operand:SI 0 "register_operand" "r")]
- UNSPEC_BUILTIN_MVTACHI)]
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+ UNSPEC_BUILTIN_MVTACHI)]
""
"mvtachi\t%0"
[(set_attr "length" "3")]
@@ -1589,8 +1585,8 @@
;; Move to Accumulator (low)
(define_insn "mvtaclo"
- [(unspec:SI [(match_operand:SI 0 "register_operand" "r")]
- UNSPEC_BUILTIN_MVTACLO)]
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+ UNSPEC_BUILTIN_MVTACLO)]
""
"mvtaclo\t%0"
[(set_attr "length" "3")]
@@ -1598,8 +1594,8 @@
;; Round Accumulator
(define_insn "racw"
- [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")]
- UNSPEC_BUILTIN_RACW)]
+ [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+ UNSPEC_BUILTIN_RACW)]
""
"racw\t%0"
[(set_attr "length" "3")]
@@ -1679,7 +1675,7 @@
;; Move from control register
(define_insn "mvfc"
- [(set (match_operand:SI 0 "register_operand" "=r")
+ [(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_operand:SI 1 "immediate_operand" "i")]
UNSPEC_BUILTIN_MVFC))]
""
@@ -1691,13 +1687,24 @@
(define_insn "mvtc"
[(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i")
(match_operand:SI 1 "nonmemory_operand" "r,i")]
- UNSPEC_BUILTIN_MVTC)
- (clobber (cc0))]
+ UNSPEC_BUILTIN_MVTC)]
""
"mvtc\t%1, %C0"
- [(set_attr "length" "3,7")
- (set_attr "cc" "clobber")] ;; Just in case the control
- ;; register selected is the psw.
+ [(set_attr "length" "3,7")]
+ ;; Ignore possible clobbering of the comparison flags in the
+ ;; PSW register. This is a cc0 target so any cc0 setting
+ ;; instruction will always be paired with a cc0 user, without
+ ;; the possibility of this instruction being placed in between
+ ;; them.
+)
+
+;; Move to interrupt priority level
+(define_insn "mvtipl"
+ [(unspec:SI [(match_operand:SI 0 "immediate_operand" "Uint04")]
+ UNSPEC_BUILTIN_MVTIPL)]
+ ""
+ "mvtipl\t%0"
+ [(set_attr "length" "3")]
)
;;---------- Interrupts ------------------------
@@ -1748,27 +1755,6 @@
[(set_attr "length" "5")]
)
-;; Move to co-processor register
-(define_insn "mvtcp"
- [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i")
- (match_operand:SI 1 "nonmemory_operand" "i,r")
- (match_operand:SI 2 "immediate_operand" "i,i")]
- UNSPEC_BUILTIN_MVTCP)]
- ""
- "; mvtcp\t%0, %1, %2"
- [(set_attr "length" "7,5")]
-)
-
-;; Co-processor operation
-(define_insn "opecp"
- [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")
- (match_operand:SI 1 "immediate_operand" "i")]
- UNSPEC_BUILTIN_OPEPC)]
- ""
- "; opecp\t%0, %1"
- [(set_attr "length" "5")]
-)
-
;;---------- Misc ------------------------
;; Required by cfglayout.c...
diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt
index 83e75bfba76..768d565b478 100644
--- a/gcc/config/rx/rx.opt
+++ b/gcc/config/rx/rx.opt
@@ -19,13 +19,31 @@
; <http://www.gnu.org/licenses/>.
;---------------------------------------------------
+m32bit-doubles
+Target RejectNegative Mask(32BIT_DOUBLES)
+Stores doubles in 32 bits.
+
m64bit-doubles
-Target RejectNegative Mask(64BIT_DOUBLES)
-Store doubles in 64 bits.
+Target RejectNegative InverseMask(32BIT_DOUBLES)
+Store doubles in 64 bits. This is the default.
-m32bit-doubles
-Target RejectNegative InverseMask(64BIT_DOUBLES)
-Stores doubles in 32 bits. This is the default.
+fpu
+Target RejectNegative Mask(32BIT_DOUBLES) MaskExists
+Enable the use of RX FPU instructions.
+
+nofpu
+Target RejectNegative InverseMask(32BIT_DOUBLES) MaskExists
+Disable the use of RX FPU instructions.
+
+;---------------------------------------------------
+
+mcpu=
+Target RejectNegative Joined Var(rx_cpu_name)
+Specify the target RX cpu type.
+
+patch=
+Target RejectNegative Joined Var(rx_cpu_name)
+Alias for -mcpu.
;---------------------------------------------------
@@ -72,3 +90,9 @@ Maximum size in bytes of constant values allowed as operands.
mint-register=
Target RejectNegative Joined UInteger Var(rx_interrupt_registers) Init(0)
Specifies the number of registers to reserve for interrupt handlers.
+
+;---------------------------------------------------
+
+msave-acc-in-interrupts
+Target Mask(SAVE_ACC_REGISTER)
+Specifies whether interrupt functions should save and restore the accumulator register.
diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx
index 39cda72af57..eb1ca48d3a3 100644
--- a/gcc/config/rx/t-rx
+++ b/gcc/config/rx/t-rx
@@ -20,9 +20,9 @@
# Enable multilibs:
-MULTILIB_OPTIONS = m64bit-doubles mbig-endian-data
-MULTILIB_DIRNAMES = 64fp big-endian-data
-MULTILIB_MATCHES = m64bit-doubles=mieee
+MULTILIB_OPTIONS = m32bit-doubles mbig-endian-data
+MULTILIB_DIRNAMES = 32fp big-endian-data
+MULTILIB_MATCHES = m32bit-doubles=fpu
MULTILIB_EXCEPTIONS =
MULTILIB_EXTRA_OPTS =
diff --git a/gcc/config/score/score.h b/gcc/config/score/score.h
index 0b7af7b2739..cde9c222546 100644
--- a/gcc/config/score/score.h
+++ b/gcc/config/score/score.h
@@ -688,9 +688,6 @@ typedef struct score_args
#define HAVE_PRE_MODIFY_REG 0
#define HAVE_POST_MODIFY_REG 0
-/* Recognize any constant value that is a valid address. */
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
/* Maximum number of registers that can appear in a valid memory address. */
#define MAX_REGS_PER_ADDRESS 1
diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h
index 682f7e6f466..fa97e8becdc 100644
--- a/gcc/config/stormy16/stormy16.h
+++ b/gcc/config/stormy16/stormy16.h
@@ -522,8 +522,6 @@ enum reg_class
#define HAVE_PRE_DECREMENT 1
-#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X)
-
#define MAX_REGS_PER_ADDRESS 1
#ifdef REG_OK_STRICT
diff --git a/gcc/config/vax/linux.h b/gcc/config/vax/linux.h
index 1087069adbb..dccbe9cc8ee 100644
--- a/gcc/config/vax/linux.h
+++ b/gcc/config/vax/linux.h
@@ -21,17 +21,7 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_VERSION
#define TARGET_VERSION fprintf (stderr, " (VAX GNU/Linux with ELF)");
-#define TARGET_OS_CPP_BUILTINS() \
- do \
- { \
- LINUX_TARGET_OS_CPP_BUILTINS(); \
- if (flag_pic) \
- { \
- builtin_define ("__PIC__"); \
- builtin_define ("__pic__"); \
- } \
- } \
- while (0)
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
/* We use GAS, G-float double and want new DI patterns. */
#undef TARGET_DEFAULT