diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-11-06 22:51:05 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-11-06 22:51:05 +0000 |
commit | e1647522f93999450cc558341bb2066ca26e070f (patch) | |
tree | ec9704394836b7bb5123d7d8c1d9647eace77c5d /gcc/config | |
parent | 035ef3e66f39f67a3fab95825e0fbc750bc8160d (diff) | |
download | gcc-e1647522f93999450cc558341bb2066ca26e070f.tar.gz |
2009-11-06 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 153975
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@153981 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
51 files changed, 4036 insertions, 542 deletions
diff --git a/gcc/config/alpha/osf.h b/gcc/config/alpha/osf.h index 2b5165c0754..81c12aa14fc 100644 --- a/gcc/config/alpha/osf.h +++ b/gcc/config/alpha/osf.h @@ -167,10 +167,6 @@ __enable_execute_stack (void *addr) \ #define LD_INIT_SWITCH "-init" #define LD_FINI_SWITCH "-fini" -/* The linker needs a space after "-o". This allows -oldstyle_liblookup to - be passed to ld. */ -#define SWITCHES_NEED_SPACES "o" - /* Select a format to encode pointers in exception handling data. CODE is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is true if the symbol may be affected by dynamic relocations. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 25e433cd3a4..4c7fcb65854 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -524,14 +524,11 @@ enum processor_type arm_tune = arm_none; /* The default processor used if not overridden by commandline. */ static enum processor_type arm_default_cpu = arm_none; -/* Which floating point model to use. */ -enum arm_fp_model arm_fp_model; - -/* Which floating point hardware is available. */ -enum fputype arm_fpu_arch; - /* Which floating point hardware to schedule for. */ -enum fputype arm_fpu_tune; +int arm_fpu_attr; + +/* Which floating popint hardware to use. */ +const struct arm_fpu_desc *arm_fpu_desc; /* Whether to use floating point hardware. */ enum float_abi_type arm_float_abi; @@ -809,46 +806,21 @@ static struct arm_cpu_select arm_select[] = char arm_arch_name[] = "__ARM_ARCH_0UNK__"; -struct fpu_desc -{ - const char * name; - enum fputype fpu; -}; - - /* Available values for -mfpu=. */ -static const struct fpu_desc all_fpus[] = -{ - {"fpa", FPUTYPE_FPA}, - {"fpe2", FPUTYPE_FPA_EMU2}, - {"fpe3", FPUTYPE_FPA_EMU2}, - {"maverick", FPUTYPE_MAVERICK}, - {"vfp", FPUTYPE_VFP}, - {"vfp3", FPUTYPE_VFP3}, - {"vfpv3", FPUTYPE_VFP3}, - {"vfpv3-d16", FPUTYPE_VFP3D16}, - {"neon", FPUTYPE_NEON}, - {"neon-fp16", FPUTYPE_NEON_FP16} -}; - - -/* Floating point models used by the different hardware. - See fputype in arm.h. */ - -static const enum arm_fp_model fp_model_for_fpu[] = -{ - /* No FP hardware. */ - ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */ - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */ - ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */ - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ - ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */ - ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */ +static const struct arm_fpu_desc all_fpus[] = +{ + {"fpa", ARM_FP_MODEL_FPA, 0, 0, false, false}, + {"fpe2", ARM_FP_MODEL_FPA, 2, 0, false, false}, + {"fpe3", ARM_FP_MODEL_FPA, 3, 0, false, false}, + {"maverick", ARM_FP_MODEL_MAVERICK, 0, 0, false, false}, + {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false}, + {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, + {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false}, + {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false}, + {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true }, + /* Compatibility aliases. */ + {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, }; @@ -1615,7 +1587,6 @@ arm_override_options (void) if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) error ("iwmmxt abi requires an iwmmxt capable cpu"); - arm_fp_model = ARM_FP_MODEL_UNKNOWN; if (target_fpu_name == NULL && target_fpe_name != NULL) { if (streq (target_fpe_name, "2")) @@ -1626,46 +1597,52 @@ arm_override_options (void) error ("invalid floating point emulation option: -mfpe=%s", target_fpe_name); } - if (target_fpu_name != NULL) - { - /* The user specified a FPU. */ - for (i = 0; i < ARRAY_SIZE (all_fpus); i++) - { - if (streq (all_fpus[i].name, target_fpu_name)) - { - arm_fpu_arch = all_fpus[i].fpu; - arm_fpu_tune = arm_fpu_arch; - arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; - break; - } - } - if (arm_fp_model == ARM_FP_MODEL_UNKNOWN) - error ("invalid floating point option: -mfpu=%s", target_fpu_name); - } - else + + if (target_fpu_name == NULL) { #ifdef FPUTYPE_DEFAULT - /* Use the default if it is specified for this platform. */ - arm_fpu_arch = FPUTYPE_DEFAULT; - arm_fpu_tune = FPUTYPE_DEFAULT; + target_fpu_name = FPUTYPE_DEFAULT; #else - /* Pick one based on CPU type. */ - /* ??? Some targets assume FPA is the default. - if ((insn_flags & FL_VFP) != 0) - arm_fpu_arch = FPUTYPE_VFP; - else - */ if (arm_arch_cirrus) - arm_fpu_arch = FPUTYPE_MAVERICK; + target_fpu_name = "maverick"; else - arm_fpu_arch = FPUTYPE_FPA_EMU2; + target_fpu_name = "fpe2"; #endif - if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2) - arm_fpu_tune = FPUTYPE_FPA; + } + + arm_fpu_desc = NULL; + for (i = 0; i < ARRAY_SIZE (all_fpus); i++) + { + if (streq (all_fpus[i].name, target_fpu_name)) + { + arm_fpu_desc = &all_fpus[i]; + break; + } + } + if (!arm_fpu_desc) + error ("invalid floating point option: -mfpu=%s", target_fpu_name); + + switch (arm_fpu_desc->model) + { + case ARM_FP_MODEL_FPA: + if (arm_fpu_desc->rev == 2) + arm_fpu_attr = FPU_FPE2; + else if (arm_fpu_desc->rev == 3) + arm_fpu_attr = FPU_FPE3; else - arm_fpu_tune = arm_fpu_arch; - arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; - gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN); + arm_fpu_attr = FPU_FPA; + break; + + case ARM_FP_MODEL_MAVERICK: + arm_fpu_attr = FPU_MAVERICK; + break; + + case ARM_FP_MODEL_VFP: + arm_fpu_attr = FPU_VFP; + break; + + default: + gcc_unreachable(); } if (target_float_abi_name != NULL) @@ -1687,7 +1664,7 @@ arm_override_options (void) arm_float_abi = TARGET_DEFAULT_FLOAT_ABI; if (TARGET_AAPCS_BASED - && (arm_fp_model == ARM_FP_MODEL_FPA)) + && (arm_fpu_desc->model == ARM_FP_MODEL_FPA)) error ("FPA is unsupported in the AAPCS"); if (TARGET_AAPCS_BASED) @@ -1715,7 +1692,7 @@ arm_override_options (void) /* If soft-float is specified then don't use FPU. */ if (TARGET_SOFT_FLOAT) - arm_fpu_arch = FPUTYPE_NONE; + arm_fpu_attr = FPU_NONE; if (TARGET_AAPCS_BASED) { @@ -1742,8 +1719,7 @@ arm_override_options (void) /* For arm2/3 there is no need to do any scheduling if there is only a floating point emulator, or we are doing software floating-point. */ if ((TARGET_SOFT_FLOAT - || arm_fpu_tune == FPUTYPE_FPA_EMU2 - || arm_fpu_tune == FPUTYPE_FPA_EMU3) + || (TARGET_FPA && arm_fpu_desc->rev)) && (tune_flags & FL_MODE32) == 0) flag_schedule_insns = flag_schedule_insns_after_reload = 0; @@ -13305,7 +13281,7 @@ arm_output_epilogue (rtx sibling) /* This variable is for the Virtual Frame Pointer, not VFP regs. */ int vfp_offset = offsets->frame; - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -13528,7 +13504,7 @@ arm_output_epilogue (rtx sibling) SP_REGNUM, HARD_FRAME_POINTER_REGNUM); } - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -14254,7 +14230,7 @@ arm_save_coproc_regs(void) /* Save any floating point call-saved registers used by this function. */ - if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + if (TARGET_FPA_EMU2) { for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) @@ -19736,45 +19712,8 @@ arm_file_start (void) } else { - int set_float_abi_attributes = 0; - switch (arm_fpu_arch) - { - case FPUTYPE_FPA: - fpu_name = "fpa"; - break; - case FPUTYPE_FPA_EMU2: - fpu_name = "fpe2"; - break; - case FPUTYPE_FPA_EMU3: - fpu_name = "fpe3"; - break; - case FPUTYPE_MAVERICK: - fpu_name = "maverick"; - break; - case FPUTYPE_VFP: - fpu_name = "vfp"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_VFP3D16: - fpu_name = "vfpv3-d16"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_VFP3: - fpu_name = "vfpv3"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_NEON: - fpu_name = "neon"; - set_float_abi_attributes = 1; - break; - case FPUTYPE_NEON_FP16: - fpu_name = "neon-fp16"; - set_float_abi_attributes = 1; - break; - default: - abort(); - } - if (set_float_abi_attributes) + fpu_name = arm_fpu_desc->name; + if (arm_fpu_desc->model == ARM_FP_MODEL_VFP) { if (TARGET_HARD_FLOAT) asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 9272ca51cba..2dfd22df45c 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -190,9 +190,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT) /* Use hardware floating point calling convention. */ #define TARGET_HARD_FLOAT_ABI (arm_float_abi == ARM_FLOAT_ABI_HARD) -#define TARGET_FPA (arm_fp_model == ARM_FP_MODEL_FPA) -#define TARGET_MAVERICK (arm_fp_model == ARM_FP_MODEL_MAVERICK) -#define TARGET_VFP (arm_fp_model == ARM_FP_MODEL_VFP) +#define TARGET_FPA (arm_fpu_desc->model == ARM_FP_MODEL_FPA) +#define TARGET_MAVERICK (arm_fpu_desc->model == ARM_FP_MODEL_MAVERICK) +#define TARGET_VFP (arm_fpu_desc->model == ARM_FP_MODEL_VFP) #define TARGET_IWMMXT (arm_arch_iwmmxt) #define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_32BIT) #define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT) @@ -216,6 +216,8 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_THUMB2 (TARGET_THUMB && arm_arch_thumb2) /* Thumb-1 only. */ #define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm) +/* FPA emulator without LFM. */ +#define TARGET_FPA_EMU2 (TARGET_FPA && arm_fpu_desc->rev == 2) /* The following two macros concern the ability to execute coprocessor instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently @@ -223,27 +225,21 @@ extern void (*arm_lang_output_object_attributes_hook)(void); to be more careful with TARGET_NEON as noted below. */ /* FPU is has the full VFPv3/NEON register file of 32 D registers. */ -#define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_VFP3 \ - || arm_fpu_arch == FPUTYPE_NEON \ - || arm_fpu_arch == FPUTYPE_NEON_FP16)) +#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32) /* FPU supports VFPv3 instructions. */ -#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_VFP3D16 \ - || TARGET_VFPD32)) +#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3) /* FPU supports NEON/VFP half-precision floating-point. */ -#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16) +#define TARGET_NEON_FP16 \ + (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16) /* FPU supports Neon instructions. The setting of this macro gets revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT and TARGET_HARD_FLOAT to ensure that NEON instructions are available. */ #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ - && arm_fp_model == ARM_FP_MODEL_VFP \ - && (arm_fpu_arch == FPUTYPE_NEON \ - || arm_fpu_arch == FPUTYPE_NEON_FP16)) + && TARGET_VFP && arm_fpu_desc->neon) /* "DSP" multiply instructions, eg. SMULxy. */ #define TARGET_DSP_MULTIPLY \ @@ -300,42 +296,25 @@ enum arm_fp_model ARM_FP_MODEL_VFP }; -extern enum arm_fp_model arm_fp_model; - -/* Which floating point hardware is available. Also update - fp_model_for_fpu in arm.c when adding entries to this list. */ -enum fputype +enum vfp_reg_type { - /* No FP hardware. */ - FPUTYPE_NONE, - /* Full FPA support. */ - FPUTYPE_FPA, - /* Emulated FPA hardware, Issue 2 emulator (no LFM/SFM). */ - FPUTYPE_FPA_EMU2, - /* Emulated FPA hardware, Issue 3 emulator. */ - FPUTYPE_FPA_EMU3, - /* Cirrus Maverick floating point co-processor. */ - FPUTYPE_MAVERICK, - /* VFP. */ - FPUTYPE_VFP, - /* VFPv3-D16. */ - FPUTYPE_VFP3D16, - /* VFPv3. */ - FPUTYPE_VFP3, - /* Neon. */ - FPUTYPE_NEON, - /* Neon with half-precision float extensions. */ - FPUTYPE_NEON_FP16 + VFP_REG_D16, + VFP_REG_D32, + VFP_REG_SINGLE }; -/* Recast the floating point class to be the floating point attribute. */ -#define arm_fpu_attr ((enum attr_fpu) arm_fpu_tune) - -/* What type of floating point to tune for */ -extern enum fputype arm_fpu_tune; - -/* What type of floating point instructions are available */ -extern enum fputype arm_fpu_arch; +extern const struct arm_fpu_desc +{ + const char *name; + enum arm_fp_model model; + int rev; + enum vfp_reg_type regs; + int neon; + int fp16; +} *arm_fpu_desc; + +/* Which floating point hardware to schedule for. */ +extern int arm_fpu_attr; enum float_abi_type { diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index b8bf700242b..52edcbaa17b 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -160,7 +160,7 @@ ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). -(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16" +(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp" (const (symbol_ref "arm_fpu_attr"))) ; LENGTH of an instruction (in bytes) @@ -6770,6 +6770,7 @@ (const_int 6) (const_int 8))))] ) + (define_insn "*movsi_cbranchsi4" [(set (pc) (if_then_else @@ -6833,6 +6834,45 @@ (const_int 10)))))] ) +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (match_operand:SI 1 "low_register_operand" "")) + (set (pc) + (if_then_else (match_operator 2 "arm_comparison_operator" + [(match_dup 1) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + [(parallel + [(set (pc) + (if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)]) + (label_ref (match_dup 3)) + (pc))) + (set (match_dup 0) (match_dup 1))])] + "" +) + +;; Sigh! This variant shouldn't be needed, but combine often fails to +;; merge cases like this because the op1 is a hard register in +;; CLASS_LIKELY_SPILLED_P. +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (match_operand:SI 1 "low_register_operand" "")) + (set (pc) + (if_then_else (match_operator 2 "arm_comparison_operator" + [(match_dup 0) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + [(parallel + [(set (pc) + (if_then_else (match_op_dup 2 [(match_dup 1) (const_int 0)]) + (label_ref (match_dup 3)) + (pc))) + (set (match_dup 0) (match_dup 1))])] + "" +) + (define_insn "*negated_cbranchsi4" [(set (pc) (if_then_else diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index faaaf7bca39..ccfc7426077 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -61,7 +61,7 @@ typedef __builtin_neon_uhi uint16x8_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_sf float32_t; +typedef float float32_t; typedef __builtin_neon_poly8 poly8_t; typedef __builtin_neon_poly16 poly16_t; @@ -5085,7 +5085,7 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c) { - return (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c); + return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5151,7 +5151,7 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c) { - return (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c); + return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -5283,7 +5283,7 @@ vdup_n_s32 (int32_t __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vdup_n_f32 (float32_t __a) { - return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5349,7 +5349,7 @@ vdupq_n_s32 (int32_t __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vdupq_n_f32 (float32_t __a) { - return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -5415,7 +5415,7 @@ vmov_n_s32 (int32_t __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmov_n_f32 (float32_t __a) { - return (float32x2_t)__builtin_neon_vdup_nv2sf (__a); + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -5481,7 +5481,7 @@ vmovq_n_s32 (int32_t __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmovq_n_f32 (float32_t __a) { - return (float32x4_t)__builtin_neon_vdup_nv4sf (__a); + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -6591,7 +6591,7 @@ vmul_n_s32 (int32x2_t __a, int32_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmul_n_f32 (float32x2_t __a, float32_t __b) { - return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 3); + return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6621,7 +6621,7 @@ vmulq_n_s32 (int32x4_t __a, int32_t __b) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmulq_n_f32 (float32x4_t __a, float32_t __b) { - return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 3); + return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -6735,7 +6735,7 @@ vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 3); + return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6765,7 +6765,7 @@ vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 3); + return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -6831,7 +6831,7 @@ vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) { - return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 3); + return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) @@ -6861,7 +6861,7 @@ vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) { - return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 3); + return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -7851,7 +7851,7 @@ vld1_s64 (const int64_t * __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_f32 (const float32_t * __a) { - return (float32x2_t)__builtin_neon_vld1v2sf (__a); + return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -7917,7 +7917,7 @@ vld1q_s64 (const int64_t * __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_f32 (const float32_t * __a) { - return (float32x4_t)__builtin_neon_vld1v4sf (__a); + return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -7977,7 +7977,7 @@ vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c) { - return (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c); + return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -8043,7 +8043,7 @@ vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c) { - return (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c); + return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -8109,7 +8109,7 @@ vld1_dup_s32 (const int32_t * __a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_dup_f32 (const float32_t * __a) { - return (float32x2_t)__builtin_neon_vld1_dupv2sf (__a); + return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -8175,7 +8175,7 @@ vld1q_dup_s32 (const int32_t * __a) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_dup_f32 (const float32_t * __a) { - return (float32x4_t)__builtin_neon_vld1_dupv4sf (__a); + return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -8247,7 +8247,7 @@ vst1_s64 (int64_t * __a, int64x1_t __b) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_f32 (float32_t * __a, float32x2_t __b) { - __builtin_neon_vst1v2sf (__a, __b); + __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8313,7 +8313,7 @@ vst1q_s64 (int64_t * __a, int64x2_t __b) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_f32 (float32_t * __a, float32x4_t __b) { - __builtin_neon_vst1v4sf (__a, __b); + __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8373,7 +8373,7 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) { - __builtin_neon_vst1_lanev2sf (__a, __b, __c); + __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8439,7 +8439,7 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c) __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) { - __builtin_neon_vst1_lanev4sf (__a, __b, __c); + __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8512,7 +8512,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vld2_f32 (const float32_t * __a) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2v2sf (__a); + __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8600,7 +8600,7 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vld2q_f32 (const float32_t * __a) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld2v4sf (__a); + __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8676,7 +8676,7 @@ vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -8748,7 +8748,7 @@ vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -8807,7 +8807,7 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vld2_dup_f32 (const float32_t * __a) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld2_dupv2sf (__a); + __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -8892,7 +8892,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_f32 (float32_t * __a, float32x2x2_t __b) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v2sf (__a, __bu.__o); + __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -8969,7 +8969,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_f32 (float32_t * __a, float32x4x2_t __b) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v4sf (__a, __bu.__o); + __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9032,7 +9032,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9088,7 +9088,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9140,7 +9140,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) vld3_f32 (const float32_t * __a) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3v2sf (__a); + __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9228,7 +9228,7 @@ __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) vld3q_f32 (const float32_t * __a) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld3v4sf (__a); + __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9304,7 +9304,7 @@ vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -9376,7 +9376,7 @@ vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -9435,7 +9435,7 @@ __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) vld3_dup_f32 (const float32_t * __a) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld3_dupv2sf (__a); + __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9520,7 +9520,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_f32 (float32_t * __a, float32x2x3_t __b) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v2sf (__a, __bu.__o); + __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9597,7 +9597,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_f32 (float32_t * __a, float32x4x3_t __b) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v4sf (__a, __bu.__o); + __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9660,7 +9660,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9716,7 +9716,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -9768,7 +9768,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) vld4_f32 (const float32_t * __a) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4v2sf (__a); + __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9856,7 +9856,7 @@ __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) vld4q_f32 (const float32_t * __a) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld4v4sf (__a); + __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -9932,7 +9932,7 @@ vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -10004,7 +10004,7 @@ vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); + __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); return __rv.__i; } @@ -10063,7 +10063,7 @@ __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) vld4_dup_f32 (const float32_t * __a) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld4_dupv2sf (__a); + __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10148,7 +10148,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_f32 (float32_t * __a, float32x2x4_t __b) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v2sf (__a, __bu.__o); + __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10225,7 +10225,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_f32 (float32_t * __a, float32x4x4_t __b) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v4sf (__a, __bu.__o); + __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10288,7 +10288,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); + __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) @@ -10344,7 +10344,7 @@ __extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); + __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); } __extension__ static __inline void __attribute__ ((__always_inline__)) diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h index 8d3afbf55fd..ba206022b75 100644 --- a/gcc/config/arm/bpabi.h +++ b/gcc/config/arm/bpabi.h @@ -30,7 +30,7 @@ /* Section 4.1 of the AAPCS requires the use of VFP format. */ #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" /* TARGET_BIG_ENDIAN_DEFAULT is set in config.gcc for big endian configurations. */ diff --git a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md index fcd92b002d7..515de43d28b 100644 --- a/gcc/config/arm/fpa.md +++ b/gcc/config/arm/fpa.md @@ -599,10 +599,10 @@ { default: case 0: return \"mvf%?e\\t%0, %1\"; - case 1: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + case 1: if (TARGET_FPA_EMU2) return \"ldf%?e\\t%0, %1\"; return \"lfm%?\\t%0, 1, %1\"; - case 2: if (arm_fpu_arch == FPUTYPE_FPA_EMU2) + case 2: if (TARGET_FPA_EMU2) return \"stf%?e\\t%1, %0\"; return \"sfm%?\\t%1, 1, %0\"; } diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h index 07455ee87fd..9fdca414e8e 100644 --- a/gcc/config/arm/linux-elf.h +++ b/gcc/config/arm/linux-elf.h @@ -98,7 +98,7 @@ /* NWFPE always understands FPA instructions. */ #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_FPA_EMU3 +#define FPUTYPE_DEFAULT "fpe3" /* Call the function profiler with a given profile label. */ #undef ARM_FUNCTION_PROFILER diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml index 9c8e2a89b86..112c8be6e3b 100644 --- a/gcc/config/arm/neon-gen.ml +++ b/gcc/config/arm/neon-gen.ml @@ -122,6 +122,7 @@ let rec signed_ctype = function | T_uint16 | T_int16 -> T_intHI | T_uint32 | T_int32 -> T_intSI | T_uint64 | T_int64 -> T_intDI + | T_float32 -> T_floatSF | T_poly8 -> T_intQI | T_poly16 -> T_intHI | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) @@ -320,7 +321,7 @@ let deftypes () = typeinfo; Format.print_newline (); (* Extra types not in <stdint.h>. *) - Format.printf "typedef __builtin_neon_sf float32_t;\n"; + Format.printf "typedef float float32_t;\n"; Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index 10393b33ebc..114097d22a7 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -50,7 +50,7 @@ type vectype = T_int8x8 | T_int8x16 | T_ptrto of vectype | T_const of vectype | T_void | T_intQI | T_intHI | T_intSI - | T_intDI + | T_intDI | T_floatSF (* The meanings of the following are: TImode : "Tetra", two registers (four words). @@ -1693,6 +1693,7 @@ let string_of_vectype vt = | T_intHI -> "__builtin_neon_hi" | T_intSI -> "__builtin_neon_si" | T_intDI -> "__builtin_neon_di" + | T_floatSF -> "__builtin_neon_sf" | T_arrayof (num, base) -> let basename = name (fun x -> x) base in affix (Printf.sprintf "%sx%d" basename num) diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h index 4c06fa1cb3b..9cf186b338d 100644 --- a/gcc/config/arm/netbsd-elf.h +++ b/gcc/config/arm/netbsd-elf.h @@ -153,5 +153,5 @@ do \ while (0) #undef FPUTYPE_DEFAULT -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h index 8879fedb7d7..aa7e197bc5d 100644 --- a/gcc/config/arm/vxworks.h +++ b/gcc/config/arm/vxworks.h @@ -97,7 +97,7 @@ along with GCC; see the file COPYING3. If not see /* There is no default multilib. */ #undef MULTILIB_DEFAULTS -#define FPUTYPE_DEFAULT FPUTYPE_VFP +#define FPUTYPE_DEFAULT "vfp" #undef FUNCTION_PROFILER #define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h index 0927e3928c1..782ad11627b 100644 --- a/gcc/config/avr/avr.h +++ b/gcc/config/avr/avr.h @@ -406,8 +406,6 @@ extern int avr_reg_order[]; #define HAVE_POST_INCREMENT 1 #define HAVE_PRE_DECREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - #define MAX_REGS_PER_ADDRESS 1 #define REG_OK_FOR_BASE_NOSTRICT_P(X) \ diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h index 03a279036f3..365680ee9fa 100644 --- a/gcc/config/bfin/bfin.h +++ b/gcc/config/bfin/bfin.h @@ -911,9 +911,6 @@ typedef struct { /* Addressing Modes */ -/* Recognize any constant value that is a valid address. */ -#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X)) - /* Nonzero if the constant value X is a legitimate general operand. symbol_ref are not legitimate and will be put into constant pool. See force_const_mem(). diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h index 0fea7d77b39..3c426b74ae5 100644 --- a/gcc/config/cris/cris.h +++ b/gcc/config/cris/cris.h @@ -950,8 +950,6 @@ struct cum_args {int regs;}; #define HAVE_POST_INCREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* Must be a compile-time constant, so we go with the highest value among all CRIS variants. */ #define MAX_REGS_PER_ADDRESS 2 diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h index 20e157173d8..5e6237895b5 100644 --- a/gcc/config/fr30/fr30.h +++ b/gcc/config/fr30/fr30.h @@ -741,16 +741,6 @@ enum reg_class /*}}}*/ /*{{{ Addressing Modes. */ -/* A C expression that is 1 if the RTX X is a constant which is a valid - address. On most machines, this can be defined as `CONSTANT_P (X)', but a - few machines are more restrictive in which constant addresses are supported. - - `CONSTANT_P' accepts integer-values expressions whose values are not - explicitly known, such as `symbol_ref', `label_ref', and `high' expressions - and `const' arithmetic expressions, in addition to `const_int' and - `const_double' expressions. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* A number, the maximum number of registers that can appear in a valid memory address. Note that it is up to you to specify a value equal to the maximum number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept. */ diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h index d48aa1ef17d..d5a7a4a6670 100644 --- a/gcc/config/frv/frv.h +++ b/gcc/config/frv/frv.h @@ -1927,16 +1927,6 @@ __asm__("\n" \ /* Addressing Modes. */ -/* A C expression that is 1 if the RTX X is a constant which is a valid - address. On most machines, this can be defined as `CONSTANT_P (X)', but a - few machines are more restrictive in which constant addresses are supported. - - `CONSTANT_P' accepts integer-values expressions whose values are not - explicitly known, such as `symbol_ref', `label_ref', and `high' expressions - and `const' arithmetic expressions, in addition to `const_int' and - `const_double' expressions. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* A number, the maximum number of registers that can appear in a valid memory address. Note that it is up to you to specify a value equal to the maximum number that `TARGET_LEGITIMATE_ADDRESS_P' would ever accept. */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 49acfa780e4..21f0e3184ef 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -46,9 +46,11 @@ /* Extended Features */ /* %ecx */ +#define bit_FMA4 (1 << 16) #define bit_LAHF_LM (1 << 0) +#define bit_LWP (1 << 15) #define bit_SSE4a (1 << 6) -#define bit_FMA4 (1 << 16) +#define bit_XOP (1 << 11) /* %edx */ #define bit_LM (1 << 29) diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h index 42782ade0ed..2bd411a0f05 100644 --- a/gcc/config/i386/fma4intrin.h +++ b/gcc/config/i386/fma4intrin.h @@ -35,15 +35,6 @@ /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */ #include <ammintrin.h> -/* Internal data types for implementing the intrinsics. */ -typedef float __v8sf __attribute__ ((__vector_size__ (32))); -typedef double __v4df __attribute__ ((__vector_size__ (32))); - -typedef float __m256 __attribute__ ((__vector_size__ (32), - __may_alias__)); -typedef double __m256d __attribute__ ((__vector_size__ (32), - __may_alias__)); - /* 128b Floating point multiply/add type instructions. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 12a3f1759a8..5a5311fba0f 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -232,6 +232,10 @@ ix86_target_macros_internal (int isa_flag, def_or_undef (parse_in, "__SSE4A__"); if (isa_flag & OPTION_MASK_ISA_FMA4) def_or_undef (parse_in, "__FMA4__"); + if (isa_flag & OPTION_MASK_ISA_XOP) + def_or_undef (parse_in, "__XOP__"); + if (isa_flag & OPTION_MASK_ISA_LWP) + def_or_undef (parse_in, "__LWP__"); if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) def_or_undef (parse_in, "__SSE_MATH__"); if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c29a7848ae1..2031dfb6e98 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1964,6 +1964,10 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_FMA4_SET \ (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \ | OPTION_MASK_ISA_AVX_SET) +#define OPTION_MASK_ISA_XOP_SET \ + (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET) +#define OPTION_MASK_ISA_LWP_SET \ + OPTION_MASK_ISA_LWP /* AES and PCLMUL need SSE2 because they use xmm registers */ #define OPTION_MASK_ISA_AES_SET \ @@ -2015,7 +2019,10 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_SSE4A_UNSET \ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET) -#define OPTION_MASK_ISA_FMA4_UNSET OPTION_MASK_ISA_FMA4 +#define OPTION_MASK_ISA_FMA4_UNSET \ + (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET) +#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP +#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL @@ -2263,6 +2270,32 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) } return true; + case OPT_mxop: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET; + } + return true; + + case OPT_mlwp: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET; + } + return true; + case OPT_mabm: if (value) { @@ -2391,6 +2424,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, { { "-m64", OPTION_MASK_ISA_64BIT }, { "-mfma4", OPTION_MASK_ISA_FMA4 }, + { "-mxop", OPTION_MASK_ISA_XOP }, + { "-mlwp", OPTION_MASK_ISA_LWP }, { "-msse4a", OPTION_MASK_ISA_SSE4A }, { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, @@ -2621,7 +2656,9 @@ override_options (bool main_args_p) PTA_AVX = 1 << 18, PTA_FMA = 1 << 19, PTA_MOVBE = 1 << 20, - PTA_FMA4 = 1 << 21 + PTA_FMA4 = 1 << 21, + PTA_XOP = 1 << 22, + PTA_LWP = 1 << 23 }; static struct pta @@ -2967,6 +3004,12 @@ override_options (bool main_args_p) if (processor_alias_table[i].flags & PTA_FMA4 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) ix86_isa_flags |= OPTION_MASK_ISA_FMA4; + if (processor_alias_table[i].flags & PTA_XOP + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) + ix86_isa_flags |= OPTION_MASK_ISA_XOP; + if (processor_alias_table[i].flags & PTA_LWP + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) + ix86_isa_flags |= OPTION_MASK_ISA_LWP; if (processor_alias_table[i].flags & PTA_ABM && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) ix86_isa_flags |= OPTION_MASK_ISA_ABM; @@ -3649,6 +3692,8 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) IX86_ATTR_ISA ("sse4a", OPT_msse4a), IX86_ATTR_ISA ("ssse3", OPT_mssse3), IX86_ATTR_ISA ("fma4", OPT_mfma4), + IX86_ATTR_ISA ("xop", OPT_mxop), + IX86_ATTR_ISA ("lwp", OPT_mlwp), /* string options */ IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), @@ -11290,6 +11335,7 @@ get_some_local_dynamic_name (void) X -- don't print any sort of PIC '@' suffix for a symbol. & -- print some in-use local-dynamic symbol name. H -- print a memory address offset by 8; used for sse high-parts + Y -- print condition for XOP pcom* instruction. + -- print a branch hint as 'cs' or 'ds' prefix ; -- print a semicolon (after prefixes due to bug in older gas). */ @@ -11707,6 +11753,61 @@ print_operand (FILE *file, rtx x, int code) return; } + case 'Y': + switch (GET_CODE (x)) + { + case NE: + fputs ("neq", file); + break; + case EQ: + fputs ("eq", file); + break; + case GE: + case GEU: + fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); + break; + case GT: + case GTU: + fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); + break; + case LE: + case LEU: + fputs ("le", file); + break; + case LT: + case LTU: + fputs ("lt", file); + break; + case UNORDERED: + fputs ("unord", file); + break; + case ORDERED: + fputs ("ord", file); + break; + case UNEQ: + fputs ("ueq", file); + break; + case UNGE: + fputs ("nlt", file); + break; + case UNGT: + fputs ("nle", file); + break; + case UNLE: + fputs ("ule", file); + break; + case UNLT: + fputs ("ult", file); + break; + case LTGT: + fputs ("une", file); + break; + default: + output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); + return; + } + return; + case ';': #if TARGET_MACHO fputs (" ; ", file); @@ -15916,6 +16017,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) x = gen_rtx_AND (mode, x, op_false); emit_insn (gen_rtx_SET (VOIDmode, dest, x)); } + else if (TARGET_XOP) + { + rtx pcmov = gen_rtx_SET (mode, dest, + gen_rtx_IF_THEN_ELSE (mode, cmp, + op_true, + op_false)); + emit_insn (pcmov); + } else { op_true = force_reg (mode, op_true); @@ -16038,6 +16147,9 @@ ix86_expand_int_vcond (rtx operands[]) cop0 = operands[4]; cop1 = operands[5]; + /* XOP supports all of the comparisons on all vector int types. */ + if (!TARGET_XOP) + { /* Canonicalize the comparison to EQ, GT, GTU. */ switch (code) { @@ -16148,6 +16260,7 @@ ix86_expand_int_vcond (rtx operands[]) cop0 = x; cop1 = CONST0_RTX (mode); } + } x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, operands[1+negate], operands[2-negate]); @@ -20806,7 +20919,7 @@ enum ix86_builtins IX86_BUILTIN_CVTUDQ2PS, - /* FMA4 instructions. */ + /* FMA4 and XOP instructions. */ IX86_BUILTIN_VFMADDSS, IX86_BUILTIN_VFMADDSD, IX86_BUILTIN_VFMADDPS, @@ -20839,6 +20952,164 @@ enum ix86_builtins IX86_BUILTIN_VFNMADDPD256, IX86_BUILTIN_VFNMSUBPS256, IX86_BUILTIN_VFNMSUBPD256, + + IX86_BUILTIN_VPCMOV, + IX86_BUILTIN_VPCMOV_V2DI, + IX86_BUILTIN_VPCMOV_V4SI, + IX86_BUILTIN_VPCMOV_V8HI, + IX86_BUILTIN_VPCMOV_V16QI, + IX86_BUILTIN_VPCMOV_V4SF, + IX86_BUILTIN_VPCMOV_V2DF, + IX86_BUILTIN_VPCMOV256, + IX86_BUILTIN_VPCMOV_V4DI256, + IX86_BUILTIN_VPCMOV_V8SI256, + IX86_BUILTIN_VPCMOV_V16HI256, + IX86_BUILTIN_VPCMOV_V32QI256, + IX86_BUILTIN_VPCMOV_V8SF256, + IX86_BUILTIN_VPCMOV_V4DF256, + + IX86_BUILTIN_VPPERM, + + IX86_BUILTIN_VPMACSSWW, + IX86_BUILTIN_VPMACSWW, + IX86_BUILTIN_VPMACSSWD, + IX86_BUILTIN_VPMACSWD, + IX86_BUILTIN_VPMACSSDD, + IX86_BUILTIN_VPMACSDD, + IX86_BUILTIN_VPMACSSDQL, + IX86_BUILTIN_VPMACSSDQH, + IX86_BUILTIN_VPMACSDQL, + IX86_BUILTIN_VPMACSDQH, + IX86_BUILTIN_VPMADCSSWD, + IX86_BUILTIN_VPMADCSWD, + + IX86_BUILTIN_VPHADDBW, + IX86_BUILTIN_VPHADDBD, + IX86_BUILTIN_VPHADDBQ, + IX86_BUILTIN_VPHADDWD, + IX86_BUILTIN_VPHADDWQ, + IX86_BUILTIN_VPHADDDQ, + IX86_BUILTIN_VPHADDUBW, + IX86_BUILTIN_VPHADDUBD, + IX86_BUILTIN_VPHADDUBQ, + IX86_BUILTIN_VPHADDUWD, + IX86_BUILTIN_VPHADDUWQ, + IX86_BUILTIN_VPHADDUDQ, + IX86_BUILTIN_VPHSUBBW, + IX86_BUILTIN_VPHSUBWD, + IX86_BUILTIN_VPHSUBDQ, + + IX86_BUILTIN_VPROTB, + IX86_BUILTIN_VPROTW, + IX86_BUILTIN_VPROTD, + IX86_BUILTIN_VPROTQ, + IX86_BUILTIN_VPROTB_IMM, + IX86_BUILTIN_VPROTW_IMM, + IX86_BUILTIN_VPROTD_IMM, + IX86_BUILTIN_VPROTQ_IMM, + + IX86_BUILTIN_VPSHLB, + IX86_BUILTIN_VPSHLW, + IX86_BUILTIN_VPSHLD, + IX86_BUILTIN_VPSHLQ, + IX86_BUILTIN_VPSHAB, + IX86_BUILTIN_VPSHAW, + IX86_BUILTIN_VPSHAD, + IX86_BUILTIN_VPSHAQ, + + IX86_BUILTIN_VFRCZSS, + IX86_BUILTIN_VFRCZSD, + IX86_BUILTIN_VFRCZPS, + IX86_BUILTIN_VFRCZPD, + IX86_BUILTIN_VFRCZPS256, + IX86_BUILTIN_VFRCZPD256, + + IX86_BUILTIN_VPCOMEQUB, + IX86_BUILTIN_VPCOMNEUB, + IX86_BUILTIN_VPCOMLTUB, + IX86_BUILTIN_VPCOMLEUB, + IX86_BUILTIN_VPCOMGTUB, + IX86_BUILTIN_VPCOMGEUB, + IX86_BUILTIN_VPCOMFALSEUB, + IX86_BUILTIN_VPCOMTRUEUB, + + IX86_BUILTIN_VPCOMEQUW, + IX86_BUILTIN_VPCOMNEUW, + IX86_BUILTIN_VPCOMLTUW, + IX86_BUILTIN_VPCOMLEUW, + IX86_BUILTIN_VPCOMGTUW, + IX86_BUILTIN_VPCOMGEUW, + IX86_BUILTIN_VPCOMFALSEUW, + IX86_BUILTIN_VPCOMTRUEUW, + + IX86_BUILTIN_VPCOMEQUD, + IX86_BUILTIN_VPCOMNEUD, + IX86_BUILTIN_VPCOMLTUD, + IX86_BUILTIN_VPCOMLEUD, + IX86_BUILTIN_VPCOMGTUD, + IX86_BUILTIN_VPCOMGEUD, + IX86_BUILTIN_VPCOMFALSEUD, + IX86_BUILTIN_VPCOMTRUEUD, + + IX86_BUILTIN_VPCOMEQUQ, + IX86_BUILTIN_VPCOMNEUQ, + IX86_BUILTIN_VPCOMLTUQ, + IX86_BUILTIN_VPCOMLEUQ, + IX86_BUILTIN_VPCOMGTUQ, + IX86_BUILTIN_VPCOMGEUQ, + IX86_BUILTIN_VPCOMFALSEUQ, + IX86_BUILTIN_VPCOMTRUEUQ, + + IX86_BUILTIN_VPCOMEQB, + IX86_BUILTIN_VPCOMNEB, + IX86_BUILTIN_VPCOMLTB, + IX86_BUILTIN_VPCOMLEB, + IX86_BUILTIN_VPCOMGTB, + IX86_BUILTIN_VPCOMGEB, + IX86_BUILTIN_VPCOMFALSEB, + IX86_BUILTIN_VPCOMTRUEB, + + IX86_BUILTIN_VPCOMEQW, + IX86_BUILTIN_VPCOMNEW, + IX86_BUILTIN_VPCOMLTW, + IX86_BUILTIN_VPCOMLEW, + IX86_BUILTIN_VPCOMGTW, + IX86_BUILTIN_VPCOMGEW, + IX86_BUILTIN_VPCOMFALSEW, + IX86_BUILTIN_VPCOMTRUEW, + + IX86_BUILTIN_VPCOMEQD, + IX86_BUILTIN_VPCOMNED, + IX86_BUILTIN_VPCOMLTD, + IX86_BUILTIN_VPCOMLED, + IX86_BUILTIN_VPCOMGTD, + IX86_BUILTIN_VPCOMGED, + IX86_BUILTIN_VPCOMFALSED, + IX86_BUILTIN_VPCOMTRUED, + + IX86_BUILTIN_VPCOMEQQ, + IX86_BUILTIN_VPCOMNEQ, + IX86_BUILTIN_VPCOMLTQ, + IX86_BUILTIN_VPCOMLEQ, + IX86_BUILTIN_VPCOMGTQ, + IX86_BUILTIN_VPCOMGEQ, + IX86_BUILTIN_VPCOMFALSEQ, + IX86_BUILTIN_VPCOMTRUEQ, + + /* LWP instructions. */ + IX86_BUILTIN_LLWPCB16, + IX86_BUILTIN_LLWPCB32, + IX86_BUILTIN_LLWPCB64, + IX86_BUILTIN_SLWPCB16, + IX86_BUILTIN_SLWPCB32, + IX86_BUILTIN_SLWPCB64, + IX86_BUILTIN_LWPVAL16, + IX86_BUILTIN_LWPVAL32, + IX86_BUILTIN_LWPVAL64, + IX86_BUILTIN_LWPINS16, + IX86_BUILTIN_LWPINS32, + IX86_BUILTIN_LWPINS64, + IX86_BUILTIN_MAX }; @@ -21052,7 +21323,13 @@ enum ix86_special_builtin_type VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF, VOID_FTYPE_PV4SF_V4SF_V4SF, - VOID_FTYPE_PV2DF_V2DF_V2DF + VOID_FTYPE_PV2DF_V2DF_V2DF, + VOID_FTYPE_USHORT_UINT_USHORT, + VOID_FTYPE_UINT_UINT_UINT, + VOID_FTYPE_UINT64_UINT_UINT, + UCHAR_FTYPE_USHORT_UINT_USHORT, + UCHAR_FTYPE_UINT_UINT_UINT, + UCHAR_FTYPE_UINT64_UINT_UINT }; /* Builtin types */ @@ -21299,6 +21576,22 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, + + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbhi1, "__builtin_ia32_llwpcb16", IX86_BUILTIN_LLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbsi1, "__builtin_ia32_llwpcb32", IX86_BUILTIN_LLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcbdi1, "__builtin_ia32_llwpcb64", IX86_BUILTIN_LLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID }, + + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbhi1, "__builtin_ia32_slwpcb16", IX86_BUILTIN_SLWPCB16, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbsi1, "__builtin_ia32_slwpcb32", IX86_BUILTIN_SLWPCB32, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcbdi1, "__builtin_ia32_slwpcb64", IX86_BUILTIN_SLWPCB64, UNKNOWN, (int) VOID_FTYPE_VOID }, + + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalhi3, "__builtin_ia32_lwpval16", IX86_BUILTIN_LWPVAL16, UNKNOWN, (int) VOID_FTYPE_USHORT_UINT_USHORT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinshi3, "__builtin_ia32_lwpins16", IX86_BUILTIN_LWPINS16, UNKNOWN, (int) UCHAR_FTYPE_USHORT_UINT_USHORT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT }, + }; /* Builtins with variable number of arguments. */ @@ -21912,13 +22205,58 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF }, }; -/* FMA4. */ +/* FMA4 and XOP. */ enum multi_arg_type { MULTI_ARG_UNKNOWN, MULTI_ARG_3_SF, MULTI_ARG_3_DF, MULTI_ARG_3_SF2, - MULTI_ARG_3_DF2 + MULTI_ARG_3_DF2, + MULTI_ARG_3_DI, + MULTI_ARG_3_SI, + MULTI_ARG_3_SI_DI, + MULTI_ARG_3_HI, + MULTI_ARG_3_HI_SI, + MULTI_ARG_3_QI, + MULTI_ARG_3_DI2, + MULTI_ARG_3_SI2, + MULTI_ARG_3_HI2, + MULTI_ARG_3_QI2, + MULTI_ARG_2_SF, + MULTI_ARG_2_DF, + MULTI_ARG_2_DI, + MULTI_ARG_2_SI, + MULTI_ARG_2_HI, + MULTI_ARG_2_QI, + MULTI_ARG_2_DI_IMM, + MULTI_ARG_2_SI_IMM, + MULTI_ARG_2_HI_IMM, + MULTI_ARG_2_QI_IMM, + MULTI_ARG_2_DI_CMP, + MULTI_ARG_2_SI_CMP, + MULTI_ARG_2_HI_CMP, + MULTI_ARG_2_QI_CMP, + MULTI_ARG_2_DI_TF, + MULTI_ARG_2_SI_TF, + MULTI_ARG_2_HI_TF, + MULTI_ARG_2_QI_TF, + MULTI_ARG_2_SF_TF, + MULTI_ARG_2_DF_TF, + MULTI_ARG_1_SF, + MULTI_ARG_1_DF, + MULTI_ARG_1_SF2, + MULTI_ARG_1_DF2, + MULTI_ARG_1_DI, + MULTI_ARG_1_SI, + MULTI_ARG_1_HI, + MULTI_ARG_1_QI, + MULTI_ARG_1_SI_DI, + MULTI_ARG_1_HI_DI, + MULTI_ARG_1_HI_SI, + MULTI_ARG_1_QI_DI, + MULTI_ARG_1_QI_SI, + MULTI_ARG_1_QI_HI + }; static const struct builtin_description bdesc_multi_arg[] = @@ -21959,7 +22297,160 @@ static const struct builtin_description bdesc_multi_arg[] = { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 } + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, }; @@ -22341,51 +22832,6 @@ ix86_init_mmx_sse_builtins (void) integer_type_node, NULL_TREE); - - tree v2di_ftype_v2di - = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); - - tree v16qi_ftype_v8hi_v8hi - = build_function_type_list (V16QI_type_node, - V8HI_type_node, V8HI_type_node, - NULL_TREE); - tree v8hi_ftype_v4si_v4si - = build_function_type_list (V8HI_type_node, - V4SI_type_node, V4SI_type_node, - NULL_TREE); - tree v8hi_ftype_v16qi_v16qi - = build_function_type_list (V8HI_type_node, - V16QI_type_node, V16QI_type_node, - NULL_TREE); - tree v4hi_ftype_v8qi_v8qi - = build_function_type_list (V4HI_type_node, - V8QI_type_node, V8QI_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_uchar - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - unsigned_char_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_ushort - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - short_unsigned_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_unsigned - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - unsigned_type_node, - NULL_TREE); - tree uint64_ftype_uint64_uint64 - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - long_long_unsigned_type_node, - NULL_TREE); - tree float_ftype_float - = build_function_type_list (float_type_node, - float_type_node, - NULL_TREE); - /* AVX builtins */ tree V32QI_type_node = build_vector_type_for_mode (char_type_node, V32QImode); @@ -22397,6 +22843,8 @@ ix86_init_mmx_sse_builtins (void) V4DImode); tree V4DF_type_node = build_vector_type_for_mode (double_type_node, V4DFmode); + tree V16HI_type_node = build_vector_type_for_mode (intHI_type_node, + V16HImode); tree v8sf_ftype_v8sf = build_function_type_list (V8SF_type_node, V8SF_type_node, @@ -22641,6 +23089,138 @@ ix86_init_mmx_sse_builtins (void) = build_function_type_list (V2DF_type_node, V2DF_type_node, V2DI_type_node, NULL_TREE); + /* XOP instructions */ + tree v2di_ftype_v2di_v2di_v2di + = build_function_type_list (V2DI_type_node, + V2DI_type_node, + V2DI_type_node, + V2DI_type_node, + NULL_TREE); + + tree v4di_ftype_v4di_v4di_v4di + = build_function_type_list (V4DI_type_node, + V4DI_type_node, + V4DI_type_node, + V4DI_type_node, + NULL_TREE); + + tree v4si_ftype_v4si_v4si_v4si + = build_function_type_list (V4SI_type_node, + V4SI_type_node, + V4SI_type_node, + V4SI_type_node, + NULL_TREE); + + tree v8si_ftype_v8si_v8si_v8si + = build_function_type_list (V8SI_type_node, + V8SI_type_node, + V8SI_type_node, + V8SI_type_node, + NULL_TREE); + + tree v32qi_ftype_v32qi_v32qi_v32qi + = build_function_type_list (V32QI_type_node, + V32QI_type_node, + V32QI_type_node, + V32QI_type_node, + NULL_TREE); + + tree v4si_ftype_v4si_v4si_v2di + = build_function_type_list (V4SI_type_node, + V4SI_type_node, + V4SI_type_node, + V2DI_type_node, + NULL_TREE); + + tree v8hi_ftype_v8hi_v8hi_v8hi + = build_function_type_list (V8HI_type_node, + V8HI_type_node, + V8HI_type_node, + V8HI_type_node, + NULL_TREE); + + tree v16hi_ftype_v16hi_v16hi_v16hi + = build_function_type_list (V16HI_type_node, + V16HI_type_node, + V16HI_type_node, + V16HI_type_node, + NULL_TREE); + + tree v8hi_ftype_v8hi_v8hi_v4si + = build_function_type_list (V8HI_type_node, + V8HI_type_node, + V8HI_type_node, + V4SI_type_node, + NULL_TREE); + + tree v2di_ftype_v2di_si + = build_function_type_list (V2DI_type_node, + V2DI_type_node, + integer_type_node, + NULL_TREE); + + tree v4si_ftype_v4si_si + = build_function_type_list (V4SI_type_node, + V4SI_type_node, + integer_type_node, + NULL_TREE); + + tree v8hi_ftype_v8hi_si + = build_function_type_list (V8HI_type_node, + V8HI_type_node, + integer_type_node, + NULL_TREE); + + tree v16qi_ftype_v16qi_si + = build_function_type_list (V16QI_type_node, + V16QI_type_node, + integer_type_node, + NULL_TREE); + + tree v2di_ftype_v2di + = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); + + tree v16qi_ftype_v8hi_v8hi + = build_function_type_list (V16QI_type_node, + V8HI_type_node, V8HI_type_node, + NULL_TREE); + tree v8hi_ftype_v4si_v4si + = build_function_type_list (V8HI_type_node, + V4SI_type_node, V4SI_type_node, + NULL_TREE); + tree v8hi_ftype_v16qi_v16qi + = build_function_type_list (V8HI_type_node, + V16QI_type_node, V16QI_type_node, + NULL_TREE); + tree v4hi_ftype_v8qi_v8qi + = build_function_type_list (V4HI_type_node, + V8QI_type_node, V8QI_type_node, + NULL_TREE); + tree unsigned_ftype_unsigned_uchar + = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_char_type_node, + NULL_TREE); + tree unsigned_ftype_unsigned_ushort + = build_function_type_list (unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + tree unsigned_ftype_unsigned_unsigned + = build_function_type_list (unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + tree uint64_ftype_uint64_uint64 + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, + NULL_TREE); + tree float_ftype_float + = build_function_type_list (float_type_node, + float_type_node, + NULL_TREE); + /* Integer intrinsics. */ tree uint64_ftype_void = build_function_type (long_long_unsigned_type_node, @@ -22670,6 +23250,50 @@ ix86_init_mmx_sse_builtins (void) integer_type_node, NULL_TREE); + /* LWP instructions. */ + + tree void_ftype_ushort_unsigned_ushort + = build_function_type_list (void_type_node, + short_unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + + tree void_ftype_unsigned_unsigned_unsigned + = build_function_type_list (void_type_node, + unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + + tree void_ftype_uint64_unsigned_unsigned + = build_function_type_list (void_type_node, + long_long_unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + + tree uchar_ftype_ushort_unsigned_ushort + = build_function_type_list (unsigned_char_type_node, + short_unsigned_type_node, + unsigned_type_node, + short_unsigned_type_node, + NULL_TREE); + + tree uchar_ftype_unsigned_unsigned_unsigned + = build_function_type_list (unsigned_char_type_node, + unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + + tree uchar_ftype_uint64_unsigned_unsigned + = build_function_type_list (unsigned_char_type_node, + long_long_unsigned_type_node, + unsigned_type_node, + unsigned_type_node, + NULL_TREE); + tree ftype; /* Add all special builtins with variable number of operands. */ @@ -22783,6 +23407,25 @@ ix86_init_mmx_sse_builtins (void) case VOID_FTYPE_PV2DF_V2DF_V2DF: type = void_ftype_pv2df_v2df_v2df; break; + case VOID_FTYPE_USHORT_UINT_USHORT: + type = void_ftype_ushort_unsigned_ushort; + break; + case VOID_FTYPE_UINT_UINT_UINT: + type = void_ftype_unsigned_unsigned_unsigned; + break; + case VOID_FTYPE_UINT64_UINT_UINT: + type = void_ftype_uint64_unsigned_unsigned; + break; + case UCHAR_FTYPE_USHORT_UINT_USHORT: + type = uchar_ftype_ushort_unsigned_ushort; + break; + case UCHAR_FTYPE_UINT_UINT_UINT: + type = uchar_ftype_unsigned_unsigned_unsigned; + break; + case UCHAR_FTYPE_UINT64_UINT_UINT: + type = uchar_ftype_uint64_unsigned_unsigned; + break; + default: gcc_unreachable (); } @@ -23409,6 +24052,50 @@ ix86_init_mmx_sse_builtins (void) case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break; case MULTI_ARG_3_SF2: mtype = v8sf_ftype_v8sf_v8sf_v8sf; break; case MULTI_ARG_3_DF2: mtype = v4df_ftype_v4df_v4df_v4df; break; + case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break; + case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break; + case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break; + case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break; + case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break; + case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break; + case MULTI_ARG_3_DI2: mtype = v4di_ftype_v4di_v4di_v4di; break; + case MULTI_ARG_3_SI2: mtype = v8si_ftype_v8si_v8si_v8si; break; + case MULTI_ARG_3_HI2: mtype = v16hi_ftype_v16hi_v16hi_v16hi; break; + case MULTI_ARG_3_QI2: mtype = v32qi_ftype_v32qi_v32qi_v32qi; break; + case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break; + case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break; + case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break; + case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break; + case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break; + case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break; + case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break; + case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break; + case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break; + case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break; + case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break; + case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break; + case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break; + case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break; + case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break; + case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break; + case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break; + case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break; + case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break; + case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break; + case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break; + case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break; + case MULTI_ARG_1_SF2: mtype = v8sf_ftype_v8sf; break; + case MULTI_ARG_1_DF2: mtype = v4df_ftype_v4df; break; + case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break; + case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break; + case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break; + case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break; + case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break; + case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break; + case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break; + case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break; + case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break; + case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break; case MULTI_ARG_UNKNOWN: default: @@ -23628,9 +24315,71 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, case MULTI_ARG_3_DF: case MULTI_ARG_3_SF2: case MULTI_ARG_3_DF2: + case MULTI_ARG_3_DI: + case MULTI_ARG_3_SI: + case MULTI_ARG_3_SI_DI: + case MULTI_ARG_3_HI: + case MULTI_ARG_3_HI_SI: + case MULTI_ARG_3_QI: + case MULTI_ARG_3_DI2: + case MULTI_ARG_3_SI2: + case MULTI_ARG_3_HI2: + case MULTI_ARG_3_QI2: nargs = 3; break; + case MULTI_ARG_2_SF: + case MULTI_ARG_2_DF: + case MULTI_ARG_2_DI: + case MULTI_ARG_2_SI: + case MULTI_ARG_2_HI: + case MULTI_ARG_2_QI: + nargs = 2; + break; + + case MULTI_ARG_2_DI_IMM: + case MULTI_ARG_2_SI_IMM: + case MULTI_ARG_2_HI_IMM: + case MULTI_ARG_2_QI_IMM: + nargs = 2; + last_arg_constant = true; + break; + + case MULTI_ARG_1_SF: + case MULTI_ARG_1_DF: + case MULTI_ARG_1_SF2: + case MULTI_ARG_1_DF2: + case MULTI_ARG_1_DI: + case MULTI_ARG_1_SI: + case MULTI_ARG_1_HI: + case MULTI_ARG_1_QI: + case MULTI_ARG_1_SI_DI: + case MULTI_ARG_1_HI_DI: + case MULTI_ARG_1_HI_SI: + case MULTI_ARG_1_QI_DI: + case MULTI_ARG_1_QI_SI: + case MULTI_ARG_1_QI_HI: + nargs = 1; + break; + + case MULTI_ARG_2_DI_CMP: + case MULTI_ARG_2_SI_CMP: + case MULTI_ARG_2_HI_CMP: + case MULTI_ARG_2_QI_CMP: + nargs = 2; + comparison_p = true; + break; + + case MULTI_ARG_2_SF_TF: + case MULTI_ARG_2_DF_TF: + case MULTI_ARG_2_DI_TF: + case MULTI_ARG_2_SI_TF: + case MULTI_ARG_2_HI_TF: + case MULTI_ARG_2_QI_TF: + nargs = 2; + tf_p = true; + break; + case MULTI_ARG_UNKNOWN: default: gcc_unreachable (); @@ -24568,6 +25317,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, /* Reserve memory operand for target. */ memory = ARRAY_SIZE (args); break; + case VOID_FTYPE_USHORT_UINT_USHORT: + case VOID_FTYPE_UINT_UINT_UINT: + case VOID_FTYPE_UINT64_UINT_UINT: + case UCHAR_FTYPE_USHORT_UINT_USHORT: + case UCHAR_FTYPE_UINT_UINT_UINT: + case UCHAR_FTYPE_UINT64_UINT_UINT: + nargs = 3; + klass = store; + memory = 0; + break; default: gcc_unreachable (); } @@ -25311,7 +26070,7 @@ static tree ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool sqrt ATTRIBUTE_UNUSED) { - if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () + if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations)) return NULL_TREE; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b412604dbd8..4bc8ef18500 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -55,6 +55,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_FMA OPTION_ISA_FMA #define TARGET_SSE4A OPTION_ISA_SSE4A #define TARGET_FMA4 OPTION_ISA_FMA4 +#define TARGET_XOP OPTION_ISA_XOP +#define TARGET_LWP OPTION_ISA_LWP #define TARGET_ROUND OPTION_ISA_ROUND #define TARGET_ABM OPTION_ISA_ABM #define TARGET_POPCNT OPTION_ISA_POPCNT diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index dc605abde06..82f5352597c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -57,6 +57,7 @@ ;; X -- don't print any sort of PIC '@' suffix for a symbol. ;; & -- print some in-use local-dynamic symbol name. ;; H -- print a memory address offset by 8; used for sse high-parts +;; Y -- print condition for XOP pcom* instruction. ;; + -- print a branch hint as 'cs' or 'ds' prefix ;; ; -- print a semicolon (after prefixes due to bug in older gas). @@ -199,6 +200,15 @@ (UNSPEC_FMA4_INTRINSIC 150) (UNSPEC_FMA4_FMADDSUB 151) (UNSPEC_FMA4_FMSUBADD 152) + (UNSPEC_XOP_UNSIGNED_CMP 151) + (UNSPEC_XOP_TRUEFALSE 152) + (UNSPEC_XOP_PERMUTE 153) + (UNSPEC_FRCZ 154) + (UNSPEC_LLWP_INTRINSIC 155) + (UNSPEC_SLWP_INTRINSIC 156) + (UNSPECV_LWPVAL_INTRINSIC 157) + (UNSPECV_LWPINS_INTRINSIC 158) + ; For AES support (UNSPEC_AESENC 159) (UNSPEC_AESENCLAST 160) @@ -254,6 +264,20 @@ (COM_TRUE_P 5) ]) +;; Constants used in the XOP pperm instruction +(define_constants + [(PPERM_SRC 0x00) /* copy source */ + (PPERM_INVERT 0x20) /* invert source */ + (PPERM_REVERSE 0x40) /* bit reverse source */ + (PPERM_REV_INV 0x60) /* bit reverse & invert src */ + (PPERM_ZERO 0x80) /* all 0's */ + (PPERM_ONES 0xa0) /* all 1's */ + (PPERM_SIGN 0xc0) /* propagate sign bit */ + (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ + (PPERM_SRC1 0x00) /* use first source byte */ + (PPERM_SRC2 0x10) /* use second source byte */ + ]) + ;; Registers by name. (define_constants [(AX_REG 0) @@ -333,7 +357,7 @@ fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, - ssemuladd,sse4arg, + ssemuladd,sse4arg,lwp, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -19676,6 +19700,20 @@ [(set_attr "type" "fcmov") (set_attr "mode" "XF")]) +;; All moves in XOP pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; XOP conditional move +(define_insn "*xop_pcmov_<mode>" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "register_operand" "x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + ;; These versions of the min/max patterns are intentionally ignorant of ;; their behavior wrt -0.0 and NaN (via the commutative operand mark). ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator @@ -19985,6 +20023,18 @@ DONE; }) +;; Use IOR for stack probes, this is shorter. +(define_expand "probe_stack" + [(match_operand 0 "memory_operand" "")] + "" +{ + if (GET_MODE (operands[0]) == DImode) + emit_insn (gen_iordi3 (operands[0], operands[0], const0_rtx)); + else + emit_insn (gen_iorsi3 (operands[0], operands[0], const0_rtx)); + DONE; +}) + (define_expand "builtin_setjmp_receiver" [(label_ref (match_operand 0 "" ""))] "!TARGET_64BIT && flag_pic" @@ -20488,7 +20538,9 @@ [(match_dup 0) (match_operand:SI 1 "nonmemory_operand" "")])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 2) (match_dup 1)])) @@ -20503,7 +20555,9 @@ [(match_operand:SI 1 "nonmemory_operand" "") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] - "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) @@ -21252,19 +21306,19 @@ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) (match_operand:DI 2 "" ""))) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) - (clobber (reg:TI 27)) - (clobber (reg:TI 28)) - (clobber (reg:TI 45)) - (clobber (reg:TI 46)) - (clobber (reg:TI 47)) - (clobber (reg:TI 48)) - (clobber (reg:TI 49)) - (clobber (reg:TI 50)) - (clobber (reg:TI 51)) - (clobber (reg:TI 52)) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) (clobber (reg:DI SI_REG)) (clobber (reg:DI DI_REG))] - "!SIBLING_CALL_P (insn) && TARGET_64BIT" + "TARGET_64BIT && !SIBLING_CALL_P (insn)" { if (constant_call_address_operand (operands[1], Pmode)) return "call\t%P1"; @@ -21303,14 +21357,14 @@ (define_expand "sse_prologue_save" [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (unspec:BLK [(reg:DI XMM0_REG) + (reg:DI XMM1_REG) + (reg:DI XMM2_REG) + (reg:DI XMM3_REG) + (reg:DI XMM4_REG) + (reg:DI XMM5_REG) + (reg:DI XMM6_REG) + (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "")) (use (match_operand:DI 2 "immediate_operand" "")) (use (label_ref:DI (match_operand 3 "" "")))])] @@ -21320,14 +21374,14 @@ (define_insn "*sse_prologue_save_insn" [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") (match_operand:DI 4 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI 21) - (reg:DI 22) - (reg:DI 23) - (reg:DI 24) - (reg:DI 25) - (reg:DI 26) - (reg:DI 27) - (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (unspec:BLK [(reg:DI XMM0_REG) + (reg:DI XMM1_REG) + (reg:DI XMM2_REG) + (reg:DI XMM3_REG) + (reg:DI XMM4_REG) + (reg:DI XMM5_REG) + (reg:DI XMM6_REG) + (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "r")) (use (match_operand:DI 2 "const_int_operand" "i")) (use (label_ref:DI (match_operand 3 "" "X")))] @@ -21804,6 +21858,120 @@ [(set_attr "type" "other") (set_attr "length" "3")]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; LWP instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "lwp_llwpcbhi1" + [(unspec [(match_operand:HI 0 "register_operand" "r")] + UNSPEC_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_llwpcbsi1" + [(unspec [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_llwpcbdi1" + [(unspec [(match_operand:DI 0 "register_operand" "r")] + UNSPEC_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + +(define_insn "lwp_slwpcbhi1" + [(unspec [(match_operand:HI 0 "register_operand" "r")] + UNSPEC_SLWP_INTRINSIC)] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_slwpcbsi1" + [(unspec [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_SLWP_INTRINSIC)] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_slwpcbdi1" + [(unspec [(match_operand:DI 0 "register_operand" "r")] + UNSPEC_SLWP_INTRINSIC)] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + +(define_insn "lwp_lwpvalhi3" + [(unspec_volatile [(match_operand:HI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:HI 2 "const_int_operand" "")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_lwpvalsi3" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_lwpvaldi3" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + +(define_insn "lwp_lwpinshi3" + [(unspec_volatile [(match_operand:HI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:HI 2 "const_int_operand" "")] + UNSPECV_LWPINS_INTRINSIC)] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "HI")]) + +(define_insn "lwp_lwpinssi3" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPINS_INTRINSIC)] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "SI")]) + +(define_insn "lwp_lwpinsdi3" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "")] + UNSPECV_LWPINS_INTRINSIC)] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "DI")]) + (include "mmx.md") (include "sse.md") (include "sync.md") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 9668ff6504d..dd47b7d1dc5 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -314,6 +314,14 @@ mfma4 Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) VarExists Save Support FMA4 built-in functions and code generation +mxop +Target Report Mask(ISA_XOP) Var(ix86_isa_flags) VarExists Save +Support XOP built-in functions and code generation + +mlwp +Target Report Mask(ISA_LWP) Var(ix86_isa_flags) VarExists Save +Support LWP built-in functions and code generation + mabm Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save Support code generation of Advanced Bit Manipulation (ABM) instructions. diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h index e701b19e2a8..540bc3f09ee 100644 --- a/gcc/config/i386/ia32intrin.h +++ b/gcc/config/i386/ia32intrin.h @@ -49,6 +49,7 @@ __bswapd (int __X) return __builtin_bswap32 (__X); } +#ifdef __SSE4_2__ /* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */ extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -70,6 +71,7 @@ __crc32d (unsigned int __C, unsigned int __V) { return __builtin_ia32_crc32si (__C, __V); } +#endif /* SSE4.2 */ /* 32bit popcnt */ extern __inline int diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h index 5e2e0136fcb..5d8e5ad2cbe 100644 --- a/gcc/config/i386/linux.h +++ b/gcc/config/i386/linux.h @@ -207,6 +207,9 @@ along with GCC; see the file COPYING3. If not see #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h" +/* The stack pointer needs to be moved while checking the stack. */ +#define STACK_CHECK_MOVING_SP 1 + /* This macro may be overridden in i386/k*bsd-gnu.h. */ #define REG_NAME(reg) reg diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h index cfa3f49e870..d07547a804f 100644 --- a/gcc/config/i386/linux64.h +++ b/gcc/config/i386/linux64.h @@ -110,6 +110,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h" +/* The stack pointer needs to be moved while checking the stack. */ +#define STACK_CHECK_MOVING_SP 1 + /* This macro may be overridden in i386/k*bsd-gnu.h. */ #define REG_NAME(reg) reg diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h new file mode 100644 index 00000000000..e5137ec24f4 --- /dev/null +++ b/gcc/config/i386/lwpintrin.h @@ -0,0 +1,109 @@ +/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86INTRIN_H_INCLUDED +# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _LWPINTRIN_H_INCLUDED +#define _LWPINTRIN_H_INCLUDED + +#ifndef __LWP__ +# error "LWP instruction set not enabled" +#else + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__llwpcb16 (void *pcbAddress) +{ + __builtin_ia32_llwpcb16 (pcbAddress); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__llwpcb32 (void *pcbAddress) +{ + __builtin_ia32_llwpcb32 (pcbAddress); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__llwpcb64 (void *pcbAddress) +{ + __builtin_ia32_llwpcb64 (pcbAddress); +} + +extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__slwpcb16 (void) +{ + return __builtin_ia32_slwpcb16 (); +} + +extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__slwpcb32 (void) +{ + return __builtin_ia32_slwpcb32 (); +} + +extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__slwpcb64 (void) +{ + return __builtin_ia32_slwpcb64 (); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpval16 (unsigned short data2, unsigned int data1, unsigned short flags) +{ + __builtin_ia32_lwpval16 (data2, data1, flags); +} +/* +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags) +{ + __builtin_ia32_lwpval32 (data2, data1, flags); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpval64 (unsigned __int64 data2, unsigned int data1, unsigned int flags) +{ + __builtin_ia32_lwpval64 (data2, data1, flags); +} + +extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpins16 (unsigned short data2, unsigned int data1, unsigned short flags) +{ + return __builtin_ia32_lwpins16 (data2, data1, flags); +} + +extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags) +{ + return __builtin_ia32_lwpins32 (data2, data1, flags); +} + +extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__lwpins64 (unsigned __int64 data2, unsigned int data1, unsigned int flags) +{ + return __builtin_ia32_lwpins64 (data2, data1, flags); +} +*/ +#endif /* __LWP__ */ + +#endif /* _LWPINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e90296512ad..bad39bb69c8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -86,6 +86,9 @@ (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) +;; Mapping of the max integer size for xop rotate immediate constraint +(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) + ;; Mapping of vector modes back to the scalar modes (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF") (V16QI "QI") (V8HI "HI") @@ -1455,7 +1458,8 @@ (match_operator:SSEMODEF4 3 "sse_comparison_operator" [(match_operand:SSEMODEF4 1 "register_operand" "0") (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] - "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))" + "!TARGET_XOP + && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))" "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -5614,7 +5618,7 @@ (match_operand:V4SI 2 "register_operand" "")))] "TARGET_SSE2" { - if (TARGET_SSE4_1) + if (TARGET_SSE4_1 || TARGET_XOP) ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); }) @@ -5643,7 +5647,7 @@ [(set (match_operand:V4SI 0 "register_operand" "") (mult:V4SI (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")))] - "TARGET_SSE2 && !TARGET_SSE4_1 + "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP && can_create_pseudo_p ()" "#" "&& 1" @@ -5705,6 +5709,42 @@ rtx t1, t2, t3, t4, t5, t6, thirtytwo; rtx op0, op1, op2; + if (TARGET_XOP) + { + /* op1: A,B,C,D, op2: E,F,G,H */ + op0 = operands[0]; + op1 = gen_lowpart (V4SImode, operands[1]); + op2 = gen_lowpart (V4SImode, operands[2]); + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + t3 = gen_reg_rtx (V4SImode); + t4 = gen_reg_rtx (V2DImode); + t5 = gen_reg_rtx (V2DImode); + + /* t1: B,A,D,C */ + emit_insn (gen_sse2_pshufd_1 (t1, op1, + GEN_INT (1), + GEN_INT (0), + GEN_INT (3), + GEN_INT (2))); + + /* t2: 0 */ + emit_move_insn (t2, CONST0_RTX (V4SImode)); + + /* t3: (B*E),(A*F),(D*G),(C*H) */ + emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2)); + + /* t4: (B*E)+(A*F), (D*G)+(C*H) */ + emit_insn (gen_xop_phadddq (t4, t3)); + + /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ + emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32))); + + /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ + emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5)); + DONE; + } + op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; @@ -5820,6 +5860,56 @@ DONE; }) +(define_expand "vec_widen_smult_hi_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx t1, t2; + + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_pshufd_1 (t1, operands[1], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_sse2_pshufd_1 (t2, operands[2], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); + DONE; +}) + +(define_expand "vec_widen_smult_lo_v4si" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx t1, t2; + + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_pshufd_1 (t1, operands[1], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_sse2_pshufd_1 (t2, operands[2], + GEN_INT (0), + GEN_INT (2), + GEN_INT (1), + GEN_INT (3))); + emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); + DONE; +}) + (define_expand "vec_widen_umult_hi_v4si" [(match_operand:V2DI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") @@ -6217,7 +6307,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "") (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_XOP " "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") (define_insn "*avx_eq<mode>3" @@ -6240,7 +6330,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 + "TARGET_SSE2 && !TARGET_XOP && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") @@ -6286,7 +6376,7 @@ (gt:SSEMODE124 (match_operand:SSEMODE124 1 "register_operand" "0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_XOP" "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "prefix_data16" "1") @@ -10364,6 +10454,1445 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; XOP instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; XOP parallel integer multiply/add instructions. +;; Note the instruction does not allow the value being added to be a memory +;; operation. However by pretending via the nonimmediate_operand predicate +;; that it does and splitting it later allows the following to be recognized: +;; a[i] = b[i] * c[i] + d[i]; +(define_insn "xop_pmacsww" + [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") + (plus:V8HI + (mult:V8HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm") + (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")) + (match_operand:V8HI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)" + "@ + vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; Split pmacsww with two memory operands into a load and the pmacsww. +(define_split + [(set (match_operand:V8HI 0 "register_operand" "") + (plus:V8HI + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") + (match_operand:V8HI 2 "nonimmediate_operand" "")) + (match_operand:V8HI 3 "nonimmediate_operand" "")))] + "TARGET_XOP + && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true) + && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true) + && !reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2]) + && !reg_mentioned_p (operands[0], operands[3])" + [(const_int 0)] +{ + ix86_expand_fma4_multiple_memory (operands, 4, V8HImode); + emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_insn "xop_pmacssww" + [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") + (ss_plus:V8HI + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) + (match_operand:V8HI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; Note the instruction does not allow the value being added to be a memory +;; operation. However by pretending via the nonimmediate_operand predicate +;; that it does and splitting it later allows the following to be recognized: +;; a[i] = b[i] * c[i] + d[i]; +(define_insn "xop_pmacsdd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (plus:V4SI + (mult:V4SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)" + "@ + vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; Split pmacsdd with two memory operands into a load and the pmacsdd. +(define_split + [(set (match_operand:V4SI 0 "register_operand" "") + (plus:V4SI + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") + (match_operand:V4SI 2 "nonimmediate_operand" "")) + (match_operand:V4SI 3 "nonimmediate_operand" "")))] + "TARGET_XOP + && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true) + && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true) + && !reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2]) + && !reg_mentioned_p (operands[0], operands[3])" + [(const_int 0)] +{ + ix86_expand_fma4_multiple_memory (operands, 4, V4SImode); + emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_insn "xop_pmacssdd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (ss_plus:V4SI + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacssdql" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (ss_plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3)]))) + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3)]))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacssdqh" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (ss_plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2)])))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacsdql" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3)])))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn_and_split "*xop_pmacsdql_mem" + [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3)])))) + (match_operand:V2DI 3 "memory_operand" "m,m,m")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3)])))) + (match_dup 0)))]) + +;; We don't have a straight 32-bit parallel multiply and extend on XOP, so +;; fake it with a multiply/add. In general, we expect the define_split to +;; occur before register allocation, so we have to handle the corner case where +;; the target is the same as operands 1/2 +(define_insn_and_split "xop_mulv2div2di3_low" + [(set (match_operand:V2DI 0 "register_operand" "=&x") + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3)])))) + (match_dup 0)))] +{ + operands[3] = CONST0_RTX (V2DImode); +} + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacsdqh" + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2)])))) + (match_operand:V2DI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn_and_split "*xop_pmacsdqh_mem" + [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2)])))) + (match_operand:V2DI 3 "memory_operand" "m,m,m")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 0) + (const_int 2)])))) + (match_dup 0)))]) + +;; We don't have a straight 32-bit parallel multiply and extend on XOP, so +;; fake it with a multiply/add. In general, we expect the define_split to +;; occur before register allocation, so we have to handle the corner case where +;; the target is the same as either operands[1] or operands[2] +(define_insn_and_split "xop_mulv2div2di3_high" + [(set (match_operand:V2DI 0 "register_operand" "=&x") + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)])))))] + "TARGET_XOP" + "#" + "&& (reload_completed + || (!reg_mentioned_p (operands[0], operands[1]) + && !reg_mentioned_p (operands[0], operands[2])))" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 2) + (parallel [(const_int 0) + (const_int 2)])))) + (match_dup 0)))] +{ + operands[3] = CONST0_RTX (V2DImode); +} + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; XOP parallel integer multiply/add instructions for the intrinisics +(define_insn "xop_pmacsswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (ss_plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmacswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmadcsswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (ss_plus:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +(define_insn "xop_pmadcswd" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (plus:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))) + (match_operand:V4SI 3 "register_operand" "x,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" + "@ + vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "TI")]) + +;; XOP parallel XMM conditional moves +(define_insn "xop_pcmov_<mode>" + [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x") + (if_then_else:SSEMODE + (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,xm") + (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,x") + (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "@ + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg")]) + +(define_insn "xop_pcmov_<mode>256" + [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") + (if_then_else:AVX256MODE + (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,xm") + (match_operand:AVX256MODE 1 "vector_move_operand" "x,xm,x") + (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "@ + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} + vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg")]) + +;; XOP horizontal add/subtract instructions +(define_insn "xop_phaddbw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (plus:V8HI + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)])))))] + "TARGET_XOP" + "vphaddbw\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddbd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (plus:V4SI + (sign_extend:V4SI + (vec_select:V4QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4) + (const_int 8) + (const_int 12)]))) + (sign_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5) + (const_int 9) + (const_int 13)])))) + (plus:V4SI + (sign_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6) + (const_int 10) + (const_int 14)]))) + (sign_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7) + (const_int 11) + (const_int 15)]))))))] + "TARGET_XOP" + "vphaddbd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddbq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))) + (plus:V2DI + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 8) + (const_int 12)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 9) + (const_int 13)])))) + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 10) + (const_int 14)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 11) + (const_int 15)])))))))] + "TARGET_XOP" + "vphaddbq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))))] + "TARGET_XOP" + "vphaddwd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddwq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (sign_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (sign_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))))] + "TARGET_XOP" + "vphaddwq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phadddq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "vphadddq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddubw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (plus:V8HI + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (zero_extend:V8HI + (vec_select:V8QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)])))))] + "TARGET_XOP" + "vphaddubw\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddubd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (plus:V4SI + (zero_extend:V4SI + (vec_select:V4QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4) + (const_int 8) + (const_int 12)]))) + (zero_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5) + (const_int 9) + (const_int 13)])))) + (plus:V4SI + (zero_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6) + (const_int 10) + (const_int 14)]))) + (zero_extend:V4SI + (vec_select:V4QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7) + (const_int 11) + (const_int 15)]))))))] + "TARGET_XOP" + "vphaddubd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddubq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))) + (plus:V2DI + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 8) + (const_int 12)]))) + (sign_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 9) + (const_int 13)])))) + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 10) + (const_int 14)]))) + (zero_extend:V2DI + (vec_select:V2QI + (match_dup 1) + (parallel [(const_int 11) + (const_int 15)])))))))] + "TARGET_XOP" + "vphaddubq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phadduwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (zero_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (zero_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))))] + "TARGET_XOP" + "vphadduwd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phadduwq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 4)]))) + (zero_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 5)])))) + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 2) + (const_int 6)]))) + (zero_extend:V2DI + (vec_select:V2HI + (match_dup 1) + (parallel [(const_int 3) + (const_int 7)]))))))] + "TARGET_XOP" + "vphadduwq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phaddudq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)]))) + (zero_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "vphaddudq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phsubbw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (minus:V8HI + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)])))))] + "TARGET_XOP" + "vphsubbw\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phsubwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (minus:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))))] + "TARGET_XOP" + "vphsubwd\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +(define_insn "xop_phsubdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (minus:V2DI + (sign_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2DI + (vec_select:V2SI + (match_dup 1) + (parallel [(const_int 1) + (const_int 3)])))))] + "TARGET_XOP" + "vphsubdq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseiadd1")]) + +;; XOP permute instructions +(define_insn "xop_pperm" + [(set (match_operand:V16QI 0 "register_operand" "=x,x,x") + (unspec:V16QI + [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,xm") + (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,x") + (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")] + UNSPEC_XOP_PERMUTE))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +;; XOP pack instructions that combine two vectors into a smaller vector +(define_insn "xop_pperm_pack_v2di_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + (vec_concat:V4SI + (truncate:V2SI + (match_operand:V2DI 1 "nonimmediate_operand" "x,x,xm")) + (truncate:V2SI + (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,x")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +(define_insn "xop_pperm_pack_v4si_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") + (vec_concat:V8HI + (truncate:V4HI + (match_operand:V4SI 1 "nonimmediate_operand" "x,x,xm")) + (truncate:V4HI + (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,x")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +(define_insn "xop_pperm_pack_v8hi_v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=x,x,x") + (vec_concat:V16QI + (truncate:V8QI + (match_operand:V8HI 1 "nonimmediate_operand" "x,x,xm")) + (truncate:V8QI + (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + +;; XOP packed rotate instructions +(define_expand "rotl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "") + (rotate:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand")))] + "TARGET_XOP" +{ + /* If we were given a scalar, convert it to parallel */ + if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) + { + rtvec vs = rtvec_alloc (<ssescalarnum>); + rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); + rtx reg = gen_reg_rtx (<MODE>mode); + rtx op2 = operands[2]; + int i; + + if (GET_MODE (op2) != <ssescalarmode>mode) + { + op2 = gen_reg_rtx (<ssescalarmode>mode); + convert_move (op2, operands[2], false); + } + + for (i = 0; i < <ssescalarnum>; i++) + RTVEC_ELT (vs, i) = op2; + + emit_insn (gen_vec_init<mode> (reg, par)); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); + DONE; + } +}) + +(define_expand "rotr<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "") + (rotatert:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand")))] + "TARGET_XOP" +{ + /* If we were given a scalar, convert it to parallel */ + if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) + { + rtvec vs = rtvec_alloc (<ssescalarnum>); + rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); + rtx neg = gen_reg_rtx (<MODE>mode); + rtx reg = gen_reg_rtx (<MODE>mode); + rtx op2 = operands[2]; + int i; + + if (GET_MODE (op2) != <ssescalarmode>mode) + { + op2 = gen_reg_rtx (<ssescalarmode>mode); + convert_move (op2, operands[2], false); + } + + for (i = 0; i < <ssescalarnum>; i++) + RTVEC_ELT (vs, i) = op2; + + emit_insn (gen_vec_init<mode> (reg, par)); + emit_insn (gen_neg<mode>2 (neg, reg)); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); + DONE; + } +}) + +(define_insn "xop_rotl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (rotate:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] + "TARGET_XOP" + "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_insn "xop_rotr<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (rotatert:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] + "TARGET_XOP" +{ + operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); + return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; +} + [(set_attr "type" "sseishft") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_expand "vrotr<mode>3" + [(match_operand:SSEMODE1248 0 "register_operand" "") + (match_operand:SSEMODE1248 1 "register_operand" "") + (match_operand:SSEMODE1248 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx reg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (reg, operands[2])); + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); + DONE; +}) + +(define_expand "vrotl<mode>3" + [(match_operand:SSEMODE1248 0 "register_operand" "") + (match_operand:SSEMODE1248 1 "register_operand" "") + (match_operand:SSEMODE1248 2 "register_operand" "")] + "TARGET_XOP" +{ + emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "xop_vrotl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") + (if_then_else:SSEMODE1248 + (ge:SSEMODE1248 + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (const_int 0)) + (rotate:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_dup 2)) + (rotatert:SSEMODE1248 + (match_dup 1) + (neg:SSEMODE1248 (match_dup 2)))))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +;; XOP packed shift instructions. +;; FIXME: add V2DI back in +(define_expand "vlshr<mode>3" + [(match_operand:SSEMODE124 0 "register_operand" "") + (match_operand:SSEMODE124 1 "register_operand" "") + (match_operand:SSEMODE124 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx neg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (neg, operands[2])); + emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg)); + DONE; +}) + +(define_expand "vashr<mode>3" + [(match_operand:SSEMODE124 0 "register_operand" "") + (match_operand:SSEMODE124 1 "register_operand" "") + (match_operand:SSEMODE124 2 "register_operand" "")] + "TARGET_XOP" +{ + rtx neg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_neg<mode>2 (neg, operands[2])); + emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg)); + DONE; +}) + +(define_expand "vashl<mode>3" + [(match_operand:SSEMODE124 0 "register_operand" "") + (match_operand:SSEMODE124 1 "register_operand" "") + (match_operand:SSEMODE124 2 "register_operand" "")] + "TARGET_XOP" +{ + emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "xop_ashl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") + (if_then_else:SSEMODE1248 + (ge:SSEMODE1248 + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (const_int 0)) + (ashift:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_dup 2)) + (ashiftrt:SSEMODE1248 + (match_dup 1) + (neg:SSEMODE1248 (match_dup 2)))))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +(define_insn "xop_lshl<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") + (if_then_else:SSEMODE1248 + (ge:SSEMODE1248 + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") + (const_int 0)) + (ashift:SSEMODE1248 + (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") + (match_dup 2)) + (lshiftrt:SSEMODE1248 + (match_dup 1) + (neg:SSEMODE1248 (match_dup 2)))))] + "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "mode" "TI")]) + +;; SSE2 doesn't have some shift varients, so define versions for XOP +(define_expand "ashlv16qi3" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + int i; + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = operands[2]; + + emit_insn (gen_vec_initv16qi (reg, par)); + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); + DONE; +}) + +(define_expand "lshlv16qi3" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + int i; + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = operands[2]; + + emit_insn (gen_vec_initv16qi (reg, par)); + emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg)); + DONE; +}) + +(define_expand "ashrv16qi3" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + int i; + rtx ele = ((CONST_INT_P (operands[2])) + ? GEN_INT (- INTVAL (operands[2])) + : operands[2]); + + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = ele; + + emit_insn (gen_vec_initv16qi (reg, par)); + + if (!CONST_INT_P (operands[2])) + { + rtx neg = gen_reg_rtx (V16QImode); + emit_insn (gen_negv16qi2 (neg, reg)); + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg)); + } + else + emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); + + DONE; +}) + +(define_expand "ashrv2di3" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:V2DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")] + "TARGET_XOP" +{ + rtvec vs = rtvec_alloc (2); + rtx par = gen_rtx_PARALLEL (V2DImode, vs); + rtx reg = gen_reg_rtx (V2DImode); + rtx ele; + + if (CONST_INT_P (operands[2])) + ele = GEN_INT (- INTVAL (operands[2])); + else if (GET_MODE (operands[2]) != DImode) + { + rtx move = gen_reg_rtx (DImode); + ele = gen_reg_rtx (DImode); + convert_move (move, operands[2], false); + emit_insn (gen_negdi2 (ele, move)); + } + else + { + ele = gen_reg_rtx (DImode); + emit_insn (gen_negdi2 (ele, operands[2])); + } + + RTVEC_ELT (vs, 0) = ele; + RTVEC_ELT (vs, 1) = ele; + emit_insn (gen_vec_initv2di (reg, par)); + emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg)); + DONE; +}) + +;; XOP FRCZ support +;; parallel insns +(define_insn "xop_frcz<mode>2" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ))] + "TARGET_XOP" + "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt1") + (set_attr "mode" "<MODE>")]) + +;; scalar insns +(define_insn "xop_vmfrcz<mode>2" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ) + (match_operand:SSEMODEF2P 1 "register_operand" "0") + (const_int 1)))] + "TARGET_XOP" + "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt1") + (set_attr "mode" "<MODE>")]) + +(define_insn "xop_frcz<mode>2256" + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x") + (unspec:FMA4MODEF4 + [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ))] + "TARGET_XOP" + "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt1") + (set_attr "mode" "<MODE>")]) + +(define_insn "xop_maskcmp<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" + [(match_operand:SSEMODE1248 2 "register_operand" "x") + (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] + "TARGET_XOP" + "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "prefix_data16" "0") + (set_attr "prefix_rep" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_insn "xop_maskcmp_uns<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" + [(match_operand:SSEMODE1248 2 "register_operand" "x") + (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] + "TARGET_XOP" + "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_rep" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* +;; and pcomneu* not to be converted to the signed ones in case somebody needs +;; the exact instruction generated for the intrinsic. +(define_insn "xop_maskcmp_uns2<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (unspec:SSEMODE1248 + [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" + [(match_operand:SSEMODE1248 2 "register_operand" "x") + (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] + UNSPEC_XOP_UNSIGNED_CMP))] + "TARGET_XOP" + "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +;; Pcomtrue and pcomfalse support. These are useless instructions, but are +;; being added here to be complete. +(define_insn "xop_pcom_tf<mode>3" + [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") + (unspec:SSEMODE1248 + [(match_operand:SSEMODE1248 1 "register_operand" "x") + (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_int_operand" "n")] + UNSPEC_XOP_TRUEFALSE))] + "TARGET_XOP" +{ + return ((INTVAL (operands[3]) != 0) + ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" + : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); +} + [(set_attr "type" "ssecmp") + (set_attr "prefix_data16" "0") + (set_attr "prefix_extra" "2") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "*avx_aesenc" [(set (match_operand:V2DI 0 "register_operand" "=x") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h index 7bc47f8f15d..ac7e21fd6f7 100644 --- a/gcc/config/i386/x86intrin.h +++ b/gcc/config/i386/x86intrin.h @@ -54,10 +54,6 @@ #include <smmintrin.h> #endif -#ifdef __FMA4__ -#include <fma4intrin.h> -#endif - #if defined (__AES__) || defined (__PCLMUL__) #include <wmmintrin.h> #endif @@ -69,4 +65,16 @@ #include <mm3dnow.h> #endif +#ifdef __FMA4__ +#include <fma4intrin.h> +#endif + +#ifdef __XOP__ +#include <xopintrin.h> +#endif + +#ifdef __LWP__ +#include <lwpintrin.h> +#endif + #endif /* _X86INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h new file mode 100644 index 00000000000..803417a6a45 --- /dev/null +++ b/gcc/config/i386/xopintrin.h @@ -0,0 +1,771 @@ +/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86INTRIN_H_INCLUDED +# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _XOPMMINTRIN_H_INCLUDED +#define _XOPMMINTRIN_H_INCLUDED + +#ifndef __XOP__ +# error "XOP instruction set not enabled" +#else + +#include <fma4intrin.h> + +/* Integer multiply/add intructions. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); +} + +/* Packed Integer Horizontal Add and Subtract */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddw_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epi32(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddw_epu8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epu8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epu8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddd_epu16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epu16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_haddq_epu32(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubw_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubd_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_hsubq_epi32(__m128i __A) +{ + return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A); +} + +/* Vector conditional move and permute */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); +} + +/* Packed Integer Rotates and Shifts + Rotates - Non-Immediate form */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rot_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B); +} + +/* Rotates - Immediate form */ + +#ifdef __OPTIMIZE__ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi8(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi16(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi32(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_roti_epi64(__m128i __A, const int __B) +{ + return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B); +} +#else +#define _mm_roti_epi8(A, N) \ + ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N))) +#define _mm_roti_epi16(A, N) \ + ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N))) +#define _mm_roti_epi32(A, N) \ + ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N))) +#define _mm_roti_epi64(A, N) \ + ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N))) +#endif + +/* Shifts */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shl_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B); +} + + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sha_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B); +} + +/* Compare and Predicate Generation + pcom (integer, unsinged bytes) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B); +} + +/*pcom (integer, unsinged words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B); +} + +/*pcom (integer, unsinged double words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B); +} + +/*pcom (integer, unsinged quad words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epu64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B); +} + +/*pcom (integer, signed bytes) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B); +} + +/*pcom (integer, signed words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi16(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B); +} + +/*pcom (integer, signed double words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi32(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B); +} + +/*pcom (integer, signed quad words) */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comlt_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comle_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comgt_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comge_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comeq_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comneq_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comfalse_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comtrue_epi64(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B); +} + +/* FRCZ */ + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_pd (__m128d __A) +{ + return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_ss (__m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_frcz_sd (__m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B); +} + +extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_frcz_ps (__m256 __A) +{ + return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A); +} + +extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_frcz_pd (__m256d __A) +{ + return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A); +} + +#endif /* __XOP__ */ + +#endif /* _XOPMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h index c0914d3be02..78e3115c291 100644 --- a/gcc/config/m32c/m32c.h +++ b/gcc/config/m32c/m32c.h @@ -560,7 +560,6 @@ typedef struct m32c_cumulative_args #define HAVE_PRE_DECREMENT 1 #define HAVE_POST_INCREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X) #define MAX_REGS_PER_ADDRESS 1 /* This is passed to the macros below, so that they can be implemented diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h index ee0f9f67fca..278ba15c4fa 100644 --- a/gcc/config/m68hc11/m68hc11.h +++ b/gcc/config/m68hc11/m68hc11.h @@ -1108,9 +1108,6 @@ extern unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER]; && (GET_CODE (XEXP (operand, 0)) == POST_INC) \ && (SP_REG_P (XEXP (XEXP (operand, 0), 0)))) -/* 1 if X is an rtx for a constant that is a valid address. */ -#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X)) - /* Maximum number of registers that can appear in a valid memory address */ #define MAX_REGS_PER_ADDRESS 2 diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c index 0862936b1b4..8db98fc4f46 100644 --- a/gcc/config/m68k/m68k.c +++ b/gcc/config/m68k/m68k.c @@ -1399,6 +1399,30 @@ flags_in_68881 (void) return cc_status.flags & CC_IN_68881; } +/* Return true if PARALLEL contains register REGNO. */ +static bool +m68k_reg_present_p (const_rtx parallel, unsigned int regno) +{ + int i; + + if (REG_P (parallel) && REGNO (parallel) == regno) + return true; + + if (GET_CODE (parallel) != PARALLEL) + return false; + + for (i = 0; i < XVECLEN (parallel, 0); ++i) + { + const_rtx x; + + x = XEXP (XVECEXP (parallel, 0, i), 0); + if (REG_P (x) && REGNO (x) == regno) + return true; + } + + return false; +} + /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL_P. */ static bool @@ -1411,6 +1435,26 @@ m68k_ok_for_sibcall_p (tree decl, tree exp) if (CALL_EXPR_STATIC_CHAIN (exp)) return false; + if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) + { + /* Check that the return value locations are the same. For + example that we aren't returning a value from the sibling in + a D0 register but then need to transfer it to a A0 register. */ + rtx cfun_value; + rtx call_value; + + cfun_value = FUNCTION_VALUE (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl); + call_value = FUNCTION_VALUE (TREE_TYPE (exp), decl); + + /* Check that the values are equal or that the result the callee + function returns is superset of what the current function returns. */ + if (!(rtx_equal_p (cfun_value, call_value) + || (REG_P (cfun_value) + && m68k_reg_present_p (call_value, REGNO (cfun_value))))) + return false; + } + kind = m68k_get_function_kind (current_function_decl); if (kind == m68k_fk_normal_function) /* We can always sibcall from a normal function, because it's @@ -5188,6 +5232,9 @@ m68k_libcall_value (enum machine_mode mode) return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG); } +/* Location in which function value is returned. + NOTE: Due to differences in ABIs, don't call this function directly, + use FUNCTION_VALUE instead. */ rtx m68k_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED) { diff --git a/gcc/config/mep/mep.h b/gcc/config/mep/mep.h index 8b00a444ce2..9d286e33b94 100644 --- a/gcc/config/mep/mep.h +++ b/gcc/config/mep/mep.h @@ -567,8 +567,6 @@ typedef struct #define TRAMPOLINE_SIZE 20 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - #define MAX_REGS_PER_ADDRESS 1 #ifdef REG_OK_STRICT diff --git a/gcc/config/mips/iris.h b/gcc/config/mips/iris.h index fce82174e66..373691ee6e1 100644 --- a/gcc/config/mips/iris.h +++ b/gcc/config/mips/iris.h @@ -63,9 +63,6 @@ along with GCC; see the file COPYING3. If not see #undef ASM_FINISH_DECLARE_OBJECT #define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object -/* The linker needs a space after "-o". */ -#define SWITCHES_NEED_SPACES "o" - /* Specify wchar_t types. */ #undef WCHAR_TYPE #define WCHAR_TYPE (Pmode == DImode ? "int" : "long int") diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 5005bf7f0fb..76fc37bd479 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -1930,7 +1930,7 @@ (set (match_dup 0) (match_dup 5)) (set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI)) - ;; Zero-extend OP4. + ;; Zero-extend OP0. (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32))) @@ -1938,7 +1938,7 @@ (lshiftrt:DI (match_dup 0) (const_int 32))) - ;; Shift OP0 into place. + ;; Shift OP4 into place. (set (match_dup 4) (ashift:DI (match_dup 4) (const_int 32))) diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h index bdbc948ac12..c732aa07180 100644 --- a/gcc/config/mn10300/mn10300.h +++ b/gcc/config/mn10300/mn10300.h @@ -600,10 +600,6 @@ struct cum_arg {int nbytes; }; ? gen_rtx_MEM (Pmode, arg_pointer_rtx) \ : (rtx) 0) -/* 1 if X is an rtx for a constant that is a valid address. */ - -#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X) && GET_CODE (X) != CONST_DOUBLE) - /* Maximum number of registers that can appear in a valid memory address. */ #define MAX_REGS_PER_ADDRESS 2 diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h index f1b77eaf0c2..384bce4a986 100644 --- a/gcc/config/moxie/moxie.h +++ b/gcc/config/moxie/moxie.h @@ -475,10 +475,6 @@ enum reg_class an immediate operand on the target machine. */ #define LEGITIMATE_CONSTANT_P(X) 1 -/* A C expression that is 1 if the RTX X is a constant which is a - valid address. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X) - /* A number, the maximum number of registers that can appear in a valid memory address. */ #define MAX_REGS_PER_ADDRESS 1 diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h index 8997612ba5a..fe8c9e8aed3 100644 --- a/gcc/config/pdp11/pdp11.h +++ b/gcc/config/pdp11/pdp11.h @@ -594,10 +594,6 @@ extern int may_call_alloca; #define MAX_REGS_PER_ADDRESS 1 -/* Recognize any constant value that is a valid address. */ - -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* Nonzero if the constant value X is a legitimate general operand. It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h index 44559f22333..4d0c96278e0 100644 --- a/gcc/config/picochip/picochip.h +++ b/gcc/config/picochip/picochip.h @@ -471,8 +471,6 @@ extern const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER]; /* Addressing Modes */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P(X) - #define MAX_REGS_PER_ADDRESS 1 /* Legitimize reload address tries machine dependent means of diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md index f15b586afb5..52bf7df3621 100644 --- a/gcc/config/rx/constraints.md +++ b/gcc/config/rx/constraints.md @@ -55,7 +55,7 @@ ;; This constraint is used by the SUBSI3 pattern because the ;; RX SUB instruction can only take a 4-bit unsigned integer -;; value. +;; value. Also used by the MVTIPL instruction. (define_constraint "Uint04" "@internal An unsigned 4-bit immediate value" (and (match_code "const_int") diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md index 75cf8ebaed8..d7a363ebb88 100644 --- a/gcc/config/rx/predicates.md +++ b/gcc/config/rx/predicates.md @@ -117,16 +117,22 @@ /* Check that the next element is the first push. */ element = XVECEXP (op, 0, 1); if ( ! SET_P (element) + || ! REG_P (SET_SRC (element)) + || GET_MODE (SET_SRC (element)) != SImode || ! MEM_P (SET_DEST (element)) - || ! REG_P (XEXP (SET_DEST (element), 0)) - || REGNO (XEXP (SET_DEST (element), 0)) != SP_REG - || ! REG_P (SET_SRC (element))) + || GET_MODE (SET_DEST (element)) != SImode + || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS + || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0)) + || REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG + || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1)) + || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1)) + != GET_MODE_SIZE (SImode)) return false; src_regno = REGNO (SET_SRC (element)); /* Check that the remaining elements use SP-<disp> - addressing and incremental register numbers. */ + addressing and decreasing register numbers. */ for (i = 2; i < count; i++) { element = XVECEXP (op, 0, i); @@ -134,7 +140,7 @@ if ( ! SET_P (element) || ! REG_P (SET_SRC (element)) || GET_MODE (SET_SRC (element)) != SImode - || REGNO (SET_SRC (element)) != src_regno + (i - 1) + || REGNO (SET_SRC (element)) != src_regno - (i - 1) || ! MEM_P (SET_DEST (element)) || GET_MODE (SET_DEST (element)) != SImode || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS @@ -142,7 +148,7 @@ || REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1)) || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1)) - != (i - 1) * GET_MODE_SIZE (SImode)) + != i * GET_MODE_SIZE (SImode)) return false; } return true; diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c index cf2b098e83c..885f52581de 100644 --- a/gcc/config/rx/rx.c +++ b/gcc/config/rx/rx.c @@ -51,6 +51,8 @@ #include "target-def.h" #include "langhooks.h" +enum rx_cpu_types rx_cpu_type = RX600; + /* Return true if OP is a reference to an object in a small data area. */ static bool @@ -249,7 +251,6 @@ rx_is_mode_dependent_addr (rtx addr) } } - /* A C compound statement to output to stdio stream FILE the assembler syntax for an instruction operand that is a memory reference whose address is ADDR. */ @@ -445,8 +446,13 @@ rx_print_operand (FILE * file, rtx op, int letter) fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 0 : 1)]); else if (CONST_INT_P (op)) { + HOST_WIDE_INT v = INTVAL (op); + fprintf (file, "#"); - rx_print_integer (file, INTVAL (op) >> 32); + /* Trickery to avoid problems with shifting 32 bits at a time. */ + v = v >> 16; + v = v >> 16; + rx_print_integer (file, v); } else { @@ -840,22 +846,20 @@ has_func_attr (const_tree decl, const char * func_attr) return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE; } -/* Returns true if the provided function has - the "[fast_]interrupt" attribute. */ +/* Returns true if the provided function has the "fast_interrupt" attribute. */ static inline bool is_fast_interrupt_func (const_tree decl) { - return has_func_attr (decl, "interrupt") - || has_func_attr (decl, "fast_interrupt") ; + return has_func_attr (decl, "fast_interrupt"); } -/* Returns true if the provided function has the "exception" attribute. */ +/* Returns true if the provided function has the "interrupt" attribute. */ static inline bool -is_exception_func (const_tree decl) +is_interrupt_func (const_tree decl) { - return has_func_attr (decl, "exception"); + return has_func_attr (decl, "interrupt"); } /* Returns true if the provided function has the "naked" attribute. */ @@ -945,8 +949,8 @@ rx_set_current_function (tree fndecl) { /* Remember the last target of rx_set_current_function. */ static tree rx_previous_fndecl; - bool prev_was_interrupt; - bool current_is_interrupt; + bool prev_was_fast_interrupt; + bool current_is_fast_interrupt; /* Only change the context if the function changes. This hook is called several times in the course of compiling a function, and we don't want @@ -954,18 +958,19 @@ rx_set_current_function (tree fndecl) if (fndecl == rx_previous_fndecl) return; - prev_was_interrupt + prev_was_fast_interrupt = rx_previous_fndecl ? is_fast_interrupt_func (rx_previous_fndecl) : false; - current_is_interrupt + + current_is_fast_interrupt = fndecl ? is_fast_interrupt_func (fndecl) : false; - if (prev_was_interrupt != current_is_interrupt) + if (prev_was_fast_interrupt != current_is_fast_interrupt) { - use_fixed_regs = current_is_interrupt; + use_fixed_regs = current_is_fast_interrupt; target_reinit (); } - + rx_previous_fndecl = fndecl; } @@ -1057,8 +1062,8 @@ rx_get_stack_layout (unsigned int * lowest, if (df_regs_ever_live_p (reg) && (! call_used_regs[reg] /* Even call clobbered registered must - be pushed inside exception handlers. */ - || is_exception_func (NULL_TREE))) + be pushed inside interrupt handlers. */ + || is_interrupt_func (NULL_TREE))) { if (low == 0) low = reg; @@ -1142,9 +1147,8 @@ rx_emit_stack_pushm (rtx * operands) gcc_assert (REG_P (first_push)); asm_fprintf (asm_out_file, "\tpushm\t%s-%s\n", - reg_names [REGNO (first_push)], - reg_names [REGNO (first_push) + last_reg]); - + reg_names [REGNO (first_push) - last_reg], + reg_names [REGNO (first_push)]); } /* Generate a PARALLEL that will pass the rx_store_multiple_vector predicate. */ @@ -1167,14 +1171,30 @@ gen_rx_store_vector (unsigned int low, unsigned int high) XVECEXP (vector, 0, i + 1) = gen_rtx_SET (SImode, gen_rtx_MEM (SImode, - i == 0 ? stack_pointer_rtx - : gen_rtx_MINUS (SImode, stack_pointer_rtx, - GEN_INT (i * UNITS_PER_WORD))), - gen_rtx_REG (SImode, low + i)); - + gen_rtx_MINUS (SImode, stack_pointer_rtx, + GEN_INT ((i + 1) * UNITS_PER_WORD))), + gen_rtx_REG (SImode, high - i)); return vector; } +/* Mark INSN as being frame related. If it is a PARALLEL + then mark each element as being frame related as well. */ + +static void +mark_frame_related (rtx insn) +{ + RTX_FRAME_RELATED_P (insn) = 1; + insn = PATTERN (insn); + + if (GET_CODE (insn) == PARALLEL) + { + unsigned int i; + + for (i = 0; i < XVECLEN (insn, 0); i++) + RTX_FRAME_RELATED_P (XVECEXP (insn, 0, i)) = 1; + } +} + void rx_expand_prologue (void) { @@ -1183,6 +1203,7 @@ rx_expand_prologue (void) unsigned int mask; unsigned int low; unsigned int high; + unsigned int reg; rtx insn; /* Naked functions use their own, programmer provided prologues. */ @@ -1196,14 +1217,12 @@ rx_expand_prologue (void) /* If we use any of the callee-saved registers, save them now. */ if (mask) { - unsigned int reg; - /* Push registers in reverse order. */ for (reg = FIRST_PSEUDO_REGISTER; reg --;) if (mask & (1 << reg)) { insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg))); - RTX_FRAME_RELATED_P (insn) = 1; + mark_frame_related (insn); } } else if (low) @@ -1214,7 +1233,57 @@ rx_expand_prologue (void) insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1) * UNITS_PER_WORD), gen_rx_store_vector (low, high))); - RTX_FRAME_RELATED_P (insn) = 1; + mark_frame_related (insn); + } + + if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER) + { + unsigned int acc_high, acc_low; + + /* Interrupt handlers have to preserve the accumulator + register if so requested by the user. Use the first + two pushed register as intermediaries. */ + if (mask) + { + acc_low = acc_high = 0; + + for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++) + if (mask & (1 << reg)) + { + if (acc_low == 0) + acc_low = reg; + else + { + acc_high = reg; + break; + } + } + + /* We have assumed that there are at least two registers pushed... */ + gcc_assert (acc_high != 0); + + /* Note - the bottom 16 bits of the accumulator are inaccessible. + We just assume that they are zero. */ + emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high))); + emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_high))); + } + else + { + acc_low = low; + acc_high = low + 1; + + /* We have assumed that there are at least two registers pushed... */ + gcc_assert (acc_high <= high); + + emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high))); + emit_insn (gen_stack_pushm (GEN_INT (2 * UNITS_PER_WORD), + gen_rx_store_vector (acc_low, acc_high))); + } + + frame_size += 2 * UNITS_PER_WORD; } /* If needed, set up the frame pointer. */ @@ -1270,8 +1339,8 @@ rx_output_function_prologue (FILE * file, if (is_fast_interrupt_func (NULL_TREE)) asm_fprintf (file, "\t; Note: Fast Interrupt Handler\n"); - if (is_exception_func (NULL_TREE)) - asm_fprintf (file, "\t; Note: Exception Handler\n"); + if (is_interrupt_func (NULL_TREE)) + asm_fprintf (file, "\t; Note: Interrupt Handler\n"); if (is_naked_func (NULL_TREE)) asm_fprintf (file, "\t; Note: Naked Function\n"); @@ -1382,6 +1451,7 @@ rx_expand_epilogue (bool is_sibcall) unsigned int stack_size; unsigned int register_mask; unsigned int regs_size; + unsigned int reg; unsigned HOST_WIDE_INT total_size; if (is_naked_func (NULL_TREE)) @@ -1407,14 +1477,14 @@ rx_expand_epilogue (bool is_sibcall) their caller. Instead they branch to their sibling and allow their return instruction to return to this function's parent. - - Fast interrupt and exception handling functions have to use special + - Fast and normal interrupt handling functions have to use special return instructions. - Functions where we have pushed a fragmented set of registers into the call-save area must have the same set of registers popped. */ if (is_sibcall || is_fast_interrupt_func (NULL_TREE) - || is_exception_func (NULL_TREE) + || is_interrupt_func (NULL_TREE) || register_mask) { /* Cannot use the special instructions - deconstruct by hand. */ @@ -1422,10 +1492,47 @@ rx_expand_epilogue (bool is_sibcall) emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (total_size))); - if (register_mask) + if (is_interrupt_func (NULL_TREE) && TARGET_SAVE_ACC_REGISTER) { - unsigned int reg; + unsigned int acc_low, acc_high; + + /* Reverse the saving of the accumulator register onto the stack. + Note we must adjust the saved "low" accumulator value as it + is really the middle 32-bits of the accumulator. */ + if (register_mask) + { + acc_low = acc_high = 0; + for (reg = 1; reg < FIRST_PSEUDO_REGISTER; reg ++) + if (register_mask & (1 << reg)) + { + if (acc_low == 0) + acc_low = reg; + else + { + acc_high = reg; + break; + } + } + emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_high))); + emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_low))); + } + else + { + acc_low = low; + acc_high = low + 1; + emit_insn (gen_stack_popm (GEN_INT (2 * UNITS_PER_WORD), + gen_rx_popm_vector (acc_low, acc_high))); + } + + emit_insn (gen_ashlsi3 (gen_rtx_REG (SImode, acc_low), + gen_rtx_REG (SImode, acc_low), + GEN_INT (16))); + emit_insn (gen_mvtaclo (gen_rtx_REG (SImode, acc_low))); + emit_insn (gen_mvtachi (gen_rtx_REG (SImode, acc_high))); + } + if (register_mask) + { for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg ++) if (register_mask & (1 << reg)) emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg))); @@ -1441,7 +1548,7 @@ rx_expand_epilogue (bool is_sibcall) if (is_fast_interrupt_func (NULL_TREE)) emit_jump_insn (gen_fast_interrupt_return ()); - else if (is_exception_func (NULL_TREE)) + else if (is_interrupt_func (NULL_TREE)) emit_jump_insn (gen_exception_return ()); else if (! is_sibcall) emit_jump_insn (gen_simple_return ()); @@ -1670,6 +1777,7 @@ enum rx_builtin RX_BUILTIN_MVTACHI, RX_BUILTIN_MVTACLO, RX_BUILTIN_MVTC, + RX_BUILTIN_MVTIPL, RX_BUILTIN_RACW, RX_BUILTIN_REVW, RX_BUILTIN_RMPA, @@ -1725,6 +1833,7 @@ rx_init_builtins (void) ADD_RX_BUILTIN1 (RMPA, "rmpa", void, void); ADD_RX_BUILTIN1 (MVFC, "mvfc", intSI, integer); ADD_RX_BUILTIN2 (MVTC, "mvtc", void, integer, integer); + ADD_RX_BUILTIN1 (MVTIPL, "mvtipl", void, integer); ADD_RX_BUILTIN1 (RACW, "racw", void, integer); ADD_RX_BUILTIN1 (ROUND, "round", intSI, float); ADD_RX_BUILTIN1 (REVW, "revw", intSI, intSI); @@ -1733,20 +1842,6 @@ rx_init_builtins (void) } static rtx -rx_expand_builtin_stz (rtx arg, rtx target, rtx (* gen_func)(rtx, rtx)) -{ - if (! CONST_INT_P (arg)) - return NULL_RTX; - - if (target == NULL_RTX || ! REG_P (target)) - target = gen_reg_rtx (SImode); - - emit_insn (gen_func (target, arg)); - - return target; -} - -static rtx rx_expand_void_builtin_1_arg (rtx arg, rtx (* gen_func)(rtx), bool reg) { if (reg && ! REG_P (arg)) @@ -1791,6 +1886,21 @@ rx_expand_builtin_mvfc (tree t_arg, rtx target) } static rtx +rx_expand_builtin_mvtipl (rtx arg) +{ + /* The RX610 does not support the MVTIPL instruction. */ + if (rx_cpu_type == RX610) + return NULL_RTX; + + if (! CONST_INT_P (arg) || ! IN_RANGE (arg, 0, (1 << 4) - 1)) + return NULL_RTX; + + emit_insn (gen_mvtipl (arg)); + + return NULL_RTX; +} + +static rtx rx_expand_builtin_mac (tree exp, rtx (* gen_func)(rtx, rtx)) { rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0)); @@ -1887,6 +1997,7 @@ rx_expand_builtin (tree exp, case RX_BUILTIN_RMPA: emit_insn (gen_rmpa ()); return NULL_RTX; case RX_BUILTIN_MVFC: return rx_expand_builtin_mvfc (arg, target); case RX_BUILTIN_MVTC: return rx_expand_builtin_mvtc (exp); + case RX_BUILTIN_MVTIPL: return rx_expand_builtin_mvtipl (op); case RX_BUILTIN_RACW: return rx_expand_void_builtin_1_arg (op, gen_racw, false); case RX_BUILTIN_ROUND: return rx_expand_builtin_round (op, target); @@ -1945,7 +2056,7 @@ rx_elf_asm_destructor (rtx symbol, int priority) rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */false); } -/* Check "interrupt", "exception" and "naked" attributes. */ +/* Check "fast_interrupt", "interrupt" and "naked" attributes. */ static tree rx_handle_func_attribute (tree * node, @@ -1975,9 +2086,8 @@ rx_handle_func_attribute (tree * node, const struct attribute_spec rx_attribute_table[] = { /* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler. */ - { "interrupt", 0, 0, true, false, false, rx_handle_func_attribute }, { "fast_interrupt", 0, 0, true, false, false, rx_handle_func_attribute }, - { "exception", 0, 0, true, false, false, rx_handle_func_attribute }, + { "interrupt", 0, 0, true, false, false, rx_handle_func_attribute }, { "naked", 0, 0, true, false, false, rx_handle_func_attribute }, { NULL, 0, 0, false, false, false, NULL } }; @@ -1993,7 +2103,7 @@ static bool rx_func_attr_inlinable (const_tree decl) { return ! is_fast_interrupt_func (decl) - && ! is_exception_func (decl) + && ! is_interrupt_func (decl) && ! is_naked_func (decl); } @@ -2115,6 +2225,20 @@ rx_is_legitimate_constant (rtx x) ( 1 << (rx_max_constant_size * 8))); } +/* This is a tri-state variable. The default value of 0 means that the user + has specified neither -mfpu nor -mnofpu on the command line. In this case + the selection of RX FPU instructions is entirely based upon the size of + the floating point object and whether unsafe math optimizations were + enabled. If 32-bit doubles have been enabled then both floats and doubles + can make use of FPU instructions, otherwise only floats may do so. + + If the value is 1 then the user has specified -mfpu and the FPU + instructions should be used. Unsafe math optimizations will automatically + be enabled and doubles set to 32-bits. If the value is -1 then -mnofpu + has been specified and FPU instructions will not be used, even if unsafe + math optimizations have been enabled. */ +int rx_enable_fpu = 0; + /* Extra processing for target specific command line options. */ static bool @@ -2122,6 +2246,27 @@ rx_handle_option (size_t code, const char * arg ATTRIBUTE_UNUSED, int value) { switch (code) { + /* -mfpu enables the use of RX FPU instructions. This implies the use + of 32-bit doubles and also the enabling of fast math optimizations. + (Since the RX FPU instructions are not IEEE compliant). The -mnofpu + option disables the use of RX FPU instructions, but does not make + place any constraints on the size of doubles or the use of fast math + optimizations. + + The selection of 32-bit vs 64-bit doubles is handled by the setting + of the 32BIT_DOUBLES mask in the rx.opt file. Enabling fast math + optimizations is performed in OVERRIDE_OPTIONS since if it was done + here it could be overridden by a -fno-fast-math option specified + *earlier* on the command line. (Target specific options are + processed before generic ones). */ + case OPT_fpu: + rx_enable_fpu = 1; + break; + + case OPT_nofpu: + rx_enable_fpu = -1; + break; + case OPT_mint_register_: switch (value) { @@ -2145,12 +2290,21 @@ rx_handle_option (size_t code, const char * arg ATTRIBUTE_UNUSED, int value) break; case OPT_mmax_constant_size_: - /* Make sure that the the -mmax-constant_size option is in range. */ + /* Make sure that the -mmax-constant_size option is in range. */ return IN_RANGE (value, 0, 4); + case OPT_mcpu_: + case OPT_patch_: + if (strcasecmp (arg, "RX610") == 0) + rx_cpu_type = RX610; + /* FIXME: Should we check for non-RX cpu names here ? */ + break; + default: - return true; + break; } + + return true; } static int diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h index a01e194910b..bb7cf7f1e3e 100644 --- a/gcc/config/rx/rx.h +++ b/gcc/config/rx/rx.h @@ -24,18 +24,24 @@ { \ builtin_define ("__RX__"); \ builtin_assert ("cpu=RX"); \ - builtin_assert ("machine=RX"); \ + if (rx_cpu_type == RX610) \ + builtin_assert ("machine=RX610"); \ + else \ + builtin_assert ("machine=RX600"); \ \ if (TARGET_BIG_ENDIAN_DATA) \ builtin_define ("__RX_BIG_ENDIAN__"); \ else \ builtin_define ("__RX_LITTLE_ENDIAN__");\ \ - if (TARGET_64BIT_DOUBLES) \ - builtin_define ("__RX_64BIT_DOUBLES__");\ - else \ + if (TARGET_32BIT_DOUBLES) \ builtin_define ("__RX_32BIT_DOUBLES__");\ + else \ + builtin_define ("__RX_64BIT_DOUBLES__");\ \ + if (ALLOW_RX_FPU_INSNS) \ + builtin_define ("__RX_FPU_INSNS__"); \ + \ if (TARGET_AS100_SYNTAX) \ builtin_define ("__RX_AS100_SYNTAX__"); \ else \ @@ -43,6 +49,17 @@ } \ while (0) +enum rx_cpu_types +{ + RX600, + RX610 +}; + +extern enum rx_cpu_types rx_cpu_type; + +#undef CC1_SPEC +#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}" + #undef STARTFILE_SPEC #define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s" @@ -52,7 +69,8 @@ #undef ASM_SPEC #define ASM_SPEC "\ %{mbig-endian-data:-mbig-endian-data} \ -%{m64bit-doubles:-m64bit-doubles} \ +%{m32bit-doubles:-m32bit-doubles} \ +%{!m32bit-doubles:-m64bit-doubles} \ %{msmall-data-limit*:-msmall-data-limit} \ %{mrelax:-relax} \ " @@ -88,16 +106,17 @@ #define LONG_LONG_TYPE_SIZE 64 #define FLOAT_TYPE_SIZE 32 -#define DOUBLE_TYPE_SIZE (TARGET_64BIT_DOUBLES ? 64 : 32) +#define DOUBLE_TYPE_SIZE (TARGET_32BIT_DOUBLES ? 32 : 64) #define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE -#ifdef __RX_64BIT_DOUBLES__ -#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 -#define LIBGCC2_DOUBLE_TYPE_SIZE 64 -#define LIBGCC2_HAS_DF_MODE 1 -#else +#ifdef __RX_32BIT_DOUBLES__ +#define LIBGCC2_HAS_DF_MODE 0 #define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 32 #define LIBGCC2_DOUBLE_TYPE_SIZE 32 +#else +#define LIBGCC2_HAS_DF_MODE 1 +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#define LIBGCC2_DOUBLE_TYPE_SIZE 64 #endif #define DEFAULT_SIGNED_CHAR 0 @@ -591,7 +610,6 @@ typedef unsigned int CUMULATIVE_ARGS; #define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ rx_print_operand_address (FILE, ADDR) - #define CC_NO_CARRY 0400 #define NOTICE_UPDATE_CC(EXP, INSN) rx_notice_update_cc (EXP, INSN) @@ -614,19 +632,28 @@ extern int rx_float_compare_mode; #define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \ ? DBX_DEBUG : DWARF2_DEBUG) -#undef CC1_SPEC -#define CC1_SPEC "%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}}" +#define INCOMING_FRAME_SP_OFFSET 4 +#define ARG_POINTER_CFA_OFFSET(FNDECL) 4 +#define FRAME_POINTER_CFA_OFFSET(FNDECL) 4 + +extern int rx_enable_fpu; /* For some unknown reason LTO compression is not working, at least on my local system. So set the default compression - level to none, for now. */ + level to none, for now. + + For an explanation of rx_flag_no_fpu see rx_handle_option(). */ #define OVERRIDE_OPTIONS \ do \ { \ if (flag_lto_compression_level == -1) \ flag_lto_compression_level = 0; \ + \ + if (rx_enable_fpu == 1) \ + set_fast_math_flags (true); \ } \ while (0) /* This macro is used to decide when RX FPU instructions can be used. */ -#define ALLOW_RX_FPU_INSNS flag_unsafe_math_optimizations +#define ALLOW_RX_FPU_INSNS ((rx_enable_fpu != -1) \ + && flag_unsafe_math_optimizations) diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index 165da4f41a1..360f6235558 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -27,8 +27,8 @@ ;; This code iterator is used for sign- and zero- extensions. (define_mode_iterator small_int_modes [(HI "") (QI "")]) -;; We do not handle DFmode here because by default it is -;; the same as SFmode, and if -m64bit-doubles is active +;; We do not handle DFmode here because it is either +;; the same as SFmode, or if -m64bit-doubles is active ;; then all operations on doubles have to be handled by ;; library functions. (define_mode_iterator register_modes @@ -75,15 +75,14 @@ (UNSPEC_BUILTIN_MVTACHI 41) (UNSPEC_BUILTIN_MVTACLO 42) (UNSPEC_BUILTIN_MVTC 43) - (UNSPEC_BUILTIN_MVTCP 44) - (UNSPEC_BUILTIN_OPEPC 45) - (UNSPEC_BUILTIN_RACW 46) - (UNSPEC_BUILTIN_REVW 47) - (UNSPEC_BUILTIN_RMPA 48) - (UNSPEC_BUILTIN_ROUND 49) - (UNSPEC_BUILTIN_SAT 50) - (UNSPEC_BUILTIN_SETPSW 51) - (UNSPEC_BUILTIN_WAIT 52) + (UNSPEC_BUILTIN_MVTIPL 44) + (UNSPEC_BUILTIN_RACW 45) + (UNSPEC_BUILTIN_REVW 46) + (UNSPEC_BUILTIN_RMPA 47) + (UNSPEC_BUILTIN_ROUND 48) + (UNSPEC_BUILTIN_SAT 49) + (UNSPEC_BUILTIN_SETPSW 50) + (UNSPEC_BUILTIN_WAIT 51) ] ) @@ -1002,10 +1001,8 @@ (set_attr "timings" "11,11,11,11,11,33") (set_attr "length" "3,4,5,6,7,6")] ) - + ;; Floating Point Instructions -;; These patterns are only enabled with -ffast-math because the RX FPU -;; cannot handle sub-normal values. (define_insn "addsf3" [(set (match_operand:SF 0 "register_operand" "=r,r,r") @@ -1298,7 +1295,6 @@ [(set_attr "length" "3,6") (set_attr "timings" "22")] ) - ;; Block move functions. @@ -1580,8 +1576,8 @@ ;; Move to Accumulator (high) (define_insn "mvtachi" - [(unspec:SI [(match_operand:SI 0 "register_operand" "r")] - UNSPEC_BUILTIN_MVTACHI)] + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_BUILTIN_MVTACHI)] "" "mvtachi\t%0" [(set_attr "length" "3")] @@ -1589,8 +1585,8 @@ ;; Move to Accumulator (low) (define_insn "mvtaclo" - [(unspec:SI [(match_operand:SI 0 "register_operand" "r")] - UNSPEC_BUILTIN_MVTACLO)] + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_BUILTIN_MVTACLO)] "" "mvtaclo\t%0" [(set_attr "length" "3")] @@ -1598,8 +1594,8 @@ ;; Round Accumulator (define_insn "racw" - [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i")] - UNSPEC_BUILTIN_RACW)] + [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")] + UNSPEC_BUILTIN_RACW)] "" "racw\t%0" [(set_attr "length" "3")] @@ -1679,7 +1675,7 @@ ;; Move from control register (define_insn "mvfc" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_BUILTIN_MVFC))] "" @@ -1691,13 +1687,24 @@ (define_insn "mvtc" [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i") (match_operand:SI 1 "nonmemory_operand" "r,i")] - UNSPEC_BUILTIN_MVTC) - (clobber (cc0))] + UNSPEC_BUILTIN_MVTC)] "" "mvtc\t%1, %C0" - [(set_attr "length" "3,7") - (set_attr "cc" "clobber")] ;; Just in case the control - ;; register selected is the psw. + [(set_attr "length" "3,7")] + ;; Ignore possible clobbering of the comparison flags in the + ;; PSW register. This is a cc0 target so any cc0 setting + ;; instruction will always be paired with a cc0 user, without + ;; the possibility of this instruction being placed in between + ;; them. +) + +;; Move to interrupt priority level +(define_insn "mvtipl" + [(unspec:SI [(match_operand:SI 0 "immediate_operand" "Uint04")] + UNSPEC_BUILTIN_MVTIPL)] + "" + "mvtipl\t%0" + [(set_attr "length" "3")] ) ;;---------- Interrupts ------------------------ @@ -1748,27 +1755,6 @@ [(set_attr "length" "5")] ) -;; Move to co-processor register -(define_insn "mvtcp" - [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i") - (match_operand:SI 1 "nonmemory_operand" "i,r") - (match_operand:SI 2 "immediate_operand" "i,i")] - UNSPEC_BUILTIN_MVTCP)] - "" - "; mvtcp\t%0, %1, %2" - [(set_attr "length" "7,5")] -) - -;; Co-processor operation -(define_insn "opecp" - [(unspec:SI [(match_operand:SI 0 "immediate_operand" "i") - (match_operand:SI 1 "immediate_operand" "i")] - UNSPEC_BUILTIN_OPEPC)] - "" - "; opecp\t%0, %1" - [(set_attr "length" "5")] -) - ;;---------- Misc ------------------------ ;; Required by cfglayout.c... diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt index 83e75bfba76..768d565b478 100644 --- a/gcc/config/rx/rx.opt +++ b/gcc/config/rx/rx.opt @@ -19,13 +19,31 @@ ; <http://www.gnu.org/licenses/>. ;--------------------------------------------------- +m32bit-doubles +Target RejectNegative Mask(32BIT_DOUBLES) +Stores doubles in 32 bits. + m64bit-doubles -Target RejectNegative Mask(64BIT_DOUBLES) -Store doubles in 64 bits. +Target RejectNegative InverseMask(32BIT_DOUBLES) +Store doubles in 64 bits. This is the default. -m32bit-doubles -Target RejectNegative InverseMask(64BIT_DOUBLES) -Stores doubles in 32 bits. This is the default. +fpu +Target RejectNegative Mask(32BIT_DOUBLES) MaskExists +Enable the use of RX FPU instructions. + +nofpu +Target RejectNegative InverseMask(32BIT_DOUBLES) MaskExists +Disable the use of RX FPU instructions. + +;--------------------------------------------------- + +mcpu= +Target RejectNegative Joined Var(rx_cpu_name) +Specify the target RX cpu type. + +patch= +Target RejectNegative Joined Var(rx_cpu_name) +Alias for -mcpu. ;--------------------------------------------------- @@ -72,3 +90,9 @@ Maximum size in bytes of constant values allowed as operands. mint-register= Target RejectNegative Joined UInteger Var(rx_interrupt_registers) Init(0) Specifies the number of registers to reserve for interrupt handlers. + +;--------------------------------------------------- + +msave-acc-in-interrupts +Target Mask(SAVE_ACC_REGISTER) +Specifies whether interrupt functions should save and restore the accumulator register. diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx index 39cda72af57..eb1ca48d3a3 100644 --- a/gcc/config/rx/t-rx +++ b/gcc/config/rx/t-rx @@ -20,9 +20,9 @@ # Enable multilibs: -MULTILIB_OPTIONS = m64bit-doubles mbig-endian-data -MULTILIB_DIRNAMES = 64fp big-endian-data -MULTILIB_MATCHES = m64bit-doubles=mieee +MULTILIB_OPTIONS = m32bit-doubles mbig-endian-data +MULTILIB_DIRNAMES = 32fp big-endian-data +MULTILIB_MATCHES = m32bit-doubles=fpu MULTILIB_EXCEPTIONS = MULTILIB_EXTRA_OPTS = diff --git a/gcc/config/score/score.h b/gcc/config/score/score.h index 0b7af7b2739..cde9c222546 100644 --- a/gcc/config/score/score.h +++ b/gcc/config/score/score.h @@ -688,9 +688,6 @@ typedef struct score_args #define HAVE_PRE_MODIFY_REG 0 #define HAVE_POST_MODIFY_REG 0 -/* Recognize any constant value that is a valid address. */ -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - /* Maximum number of registers that can appear in a valid memory address. */ #define MAX_REGS_PER_ADDRESS 1 diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h index 682f7e6f466..fa97e8becdc 100644 --- a/gcc/config/stormy16/stormy16.h +++ b/gcc/config/stormy16/stormy16.h @@ -522,8 +522,6 @@ enum reg_class #define HAVE_PRE_DECREMENT 1 -#define CONSTANT_ADDRESS_P(X) CONSTANT_P (X) - #define MAX_REGS_PER_ADDRESS 1 #ifdef REG_OK_STRICT diff --git a/gcc/config/vax/linux.h b/gcc/config/vax/linux.h index 1087069adbb..dccbe9cc8ee 100644 --- a/gcc/config/vax/linux.h +++ b/gcc/config/vax/linux.h @@ -21,17 +21,7 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_VERSION #define TARGET_VERSION fprintf (stderr, " (VAX GNU/Linux with ELF)"); -#define TARGET_OS_CPP_BUILTINS() \ - do \ - { \ - LINUX_TARGET_OS_CPP_BUILTINS(); \ - if (flag_pic) \ - { \ - builtin_define ("__PIC__"); \ - builtin_define ("__pic__"); \ - } \ - } \ - while (0) +#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS() /* We use GAS, G-float double and want new DI patterns. */ #undef TARGET_DEFAULT |