From f733cf303bcdc952c92b81dd62199a40a1f555ec Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Mon, 22 Aug 2016 10:27:46 +0000 Subject: gcc-6.2.0 --- gcc/config/aarch64/aarch64-builtins.c | 29 +- gcc/config/aarch64/aarch64-simd-builtins.def | 8 +- gcc/config/aarch64/aarch64-simd.md | 10 - gcc/config/aarch64/aarch64.c | 15 +- gcc/config/aarch64/aarch64.h | 5 + gcc/config/aarch64/arm_neon.h | 12 +- gcc/config/alpha/alpha.md | 5 +- gcc/config/arm/arm-builtins.c | 6 +- gcc/config/arm/arm.c | 37 +- gcc/config/arm/freebsd.h | 4 + gcc/config/avr/avr.c | 87 +- gcc/config/avr/avr.h | 2 +- gcc/config/avr/avr.md | 20 +- gcc/config/avr/gen-avr-mmcu-specs.c | 7 +- gcc/config/darwin.h | 1 + gcc/config/i386/avx512bwintrin.h | 44 +- gcc/config/i386/avx512dqintrin.h | 39 +- gcc/config/i386/avx512fintrin.h | 261 ++-- gcc/config/i386/avx512ifmaintrin.h | 4 +- gcc/config/i386/avx512ifmavlintrin.h | 8 +- gcc/config/i386/avx512vbmiintrin.h | 7 +- gcc/config/i386/avx512vbmivlintrin.h | 6 +- gcc/config/i386/avx512vlbwintrin.h | 70 +- gcc/config/i386/avx512vldqintrin.h | 27 +- gcc/config/i386/avx512vlintrin.h | 97 +- gcc/config/i386/constraints.md | 6 +- gcc/config/i386/driver-i386.c | 73 +- gcc/config/i386/i386-builtin-types.def | 4 + gcc/config/i386/i386.c | 132 +- gcc/config/i386/i386.h | 2 + gcc/config/i386/i386.md | 65 +- gcc/config/i386/predicates.md | 2 +- gcc/config/i386/sse.md | 58 +- gcc/config/microblaze/rtems.h | 7 + gcc/config/nvptx/nvptx.c | 3 +- gcc/config/pa/pa.c | 103 +- gcc/config/pa/pa.md | 164 +++ gcc/config/rl78/rl78-expand.md | 2 +- gcc/config/rl78/rl78-real.md | 4 +- gcc/config/rl78/rl78-virt.md | 4 +- gcc/config/rs6000/40x.md | 2 +- gcc/config/rs6000/440.md | 2 +- gcc/config/rs6000/476.md | 2 +- gcc/config/rs6000/601.md | 2 +- gcc/config/rs6000/603.md | 2 +- gcc/config/rs6000/6xx.md | 2 +- gcc/config/rs6000/7450.md | 6 +- gcc/config/rs6000/7xx.md | 4 +- gcc/config/rs6000/8540.md | 4 +- gcc/config/rs6000/a2.md | 2 +- gcc/config/rs6000/altivec.h | 29 +- gcc/config/rs6000/altivec.md | 222 ++- gcc/config/rs6000/cell.md | 6 +- gcc/config/rs6000/constraints.md | 28 + gcc/config/rs6000/crypto.md | 2 +- gcc/config/rs6000/dfp.md | 101 +- gcc/config/rs6000/e300c2c3.md | 2 +- gcc/config/rs6000/e6500.md | 2 +- gcc/config/rs6000/htm.md | 14 +- gcc/config/rs6000/mpc.md | 2 +- gcc/config/rs6000/power4.md | 6 +- gcc/config/rs6000/power5.md | 2 +- gcc/config/rs6000/power6.md | 6 +- gcc/config/rs6000/power7.md | 4 +- gcc/config/rs6000/power8.md | 5 +- gcc/config/rs6000/power9.md | 477 +++++++ gcc/config/rs6000/predicates.md | 110 +- gcc/config/rs6000/rs6000-builtin.def | 295 +++- gcc/config/rs6000/rs6000-c.c | 234 ++++ gcc/config/rs6000/rs6000-cpus.def | 22 +- gcc/config/rs6000/rs6000-protos.h | 4 + gcc/config/rs6000/rs6000.c | 1950 ++++++++++++++++++++++---- gcc/config/rs6000/rs6000.h | 69 +- gcc/config/rs6000/rs6000.md | 505 ++++--- gcc/config/rs6000/rs6000.opt | 22 +- gcc/config/rs6000/rs64.md | 2 +- gcc/config/rs6000/sysv4.h | 17 +- gcc/config/rs6000/t-rs6000 | 1 + gcc/config/rs6000/titan.md | 2 +- gcc/config/rs6000/vector.md | 19 + gcc/config/rs6000/vsx.md | 501 +++++-- gcc/config/rs6000/xfpu.md | 6 +- gcc/config/rtems.h | 2 +- gcc/config/s390/s390-builtin-types.def | 423 +++--- gcc/config/s390/s390-builtins.def | 69 +- gcc/config/s390/s390.c | 61 +- gcc/config/s390/s390.md | 17 +- gcc/config/s390/vecintrin.h | 18 +- gcc/config/s390/vx-builtins.md | 113 +- gcc/config/sparc/driver-sparc.c | 2 + gcc/config/sparc/linux64.h | 26 +- gcc/config/sparc/niagara4.md | 7 + gcc/config/sparc/niagara7.md | 136 ++ gcc/config/sparc/sol2.h | 40 +- gcc/config/sparc/sparc-c.c | 7 +- gcc/config/sparc/sparc-opts.h | 1 + gcc/config/sparc/sparc.c | 232 ++- gcc/config/sparc/sparc.h | 16 +- gcc/config/sparc/sparc.md | 164 ++- gcc/config/sparc/sparc.opt | 7 + gcc/config/sparc/visintrin.h | 186 +++ gcc/config/visium/visium-protos.h | 3 +- gcc/config/visium/visium.c | 107 +- gcc/config/visium/visium.md | 98 +- 104 files changed, 5984 insertions(+), 1886 deletions(-) create mode 100644 gcc/config/rs6000/power9.md create mode 100644 gcc/config/sparc/niagara7.md (limited to 'gcc/config') diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 5573903fe0..2c4eac08e4 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -434,13 +434,15 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = { }; #undef ENTRY -/* This type is not SIMD-specific; it is the user-visible __fp16. */ -static tree aarch64_fp16_type_node = NULL_TREE; - static tree aarch64_simd_intOI_type_node = NULL_TREE; static tree aarch64_simd_intCI_type_node = NULL_TREE; static tree aarch64_simd_intXI_type_node = NULL_TREE; +/* The user-visible __fp16 type, and a pointer to that type. Used + across the back-end. */ +tree aarch64_fp16_type_node = NULL_TREE; +tree aarch64_fp16_ptr_type_node = NULL_TREE; + static const char * aarch64_mangle_builtin_scalar_type (const_tree type) { @@ -874,6 +876,21 @@ aarch64_init_builtin_rsqrt (void) } } +/* Initialize the backend types that support the user-visible __fp16 + type, also initialize a pointer to that type, to be used when + forming HFAs. */ + +static void +aarch64_init_fp16_types (void) +{ + aarch64_fp16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (aarch64_fp16_type_node) = 16; + layout_type (aarch64_fp16_type_node); + + (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); + aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); +} + void aarch64_init_builtins (void) { @@ -895,11 +912,7 @@ aarch64_init_builtins (void) = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); - aarch64_fp16_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (aarch64_fp16_type_node) = 16; - layout_type (aarch64_fp16_type_node); - - (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); + aarch64_init_fp16_types (); if (TARGET_SIMD) aarch64_init_simd_builtins (); diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index dd045792b2..f440907aed 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -244,13 +244,17 @@ /* Implemented by 3. smax variants map to fmaxnm, smax_nan variants map to fmax. */ - BUILTIN_VDQIF (BINOP, smax, 3) - BUILTIN_VDQIF (BINOP, smin, 3) + BUILTIN_VDQ_BHSI (BINOP, smax, 3) + BUILTIN_VDQ_BHSI (BINOP, smin, 3) BUILTIN_VDQ_BHSI (BINOP, umax, 3) BUILTIN_VDQ_BHSI (BINOP, umin, 3) BUILTIN_VDQF (BINOP, smax_nan, 3) BUILTIN_VDQF (BINOP, smin_nan, 3) + /* Implemented by 3. */ + BUILTIN_VDQF (BINOP, fmax, 3) + BUILTIN_VDQF (BINOP, fmin, 3) + /* Implemented by aarch64_p. */ BUILTIN_VDQ_BHSI (BINOP, smaxp, 0) BUILTIN_VDQ_BHSI (BINOP, sminp, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bd73bce644..ded8bff097 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1919,16 +1919,6 @@ } ) -(define_insn "aarch64_vmls" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (minus:VDQF (match_operand:VDQF 1 "register_operand" "0") - (mult:VDQF (match_operand:VDQF 2 "register_operand" "w") - (match_operand:VDQF 3 "register_operand" "w"))))] - "TARGET_SIMD" - "fmls\\t%0., %2., %3." - [(set_attr "type" "neon_fp_mla__scalar")] -) - ;; FP Max/Min ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An ;; expression like: diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 999549426e..199147adb9 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9771,15 +9771,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, field_t = long_double_type_node; field_ptr_t = long_double_ptr_type_node; break; -/* The half precision and quad precision are not fully supported yet. Enable - the following code after the support is complete. Need to find the correct - type node for __fp16 *. */ -#if 0 case HFmode: - field_t = float_type_node; - field_ptr_t = float_ptr_type_node; + field_t = aarch64_fp16_type_node; + field_ptr_t = aarch64_fp16_ptr_type_node; break; -#endif case V2SImode: case V4SImode: { @@ -9934,7 +9929,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) { case REAL_TYPE: mode = TYPE_MODE (type); - if (mode != DFmode && mode != SFmode && mode != TFmode) + if (mode != DFmode && mode != SFmode + && mode != TFmode && mode != HFmode) return -1; if (*modep == VOIDmode) @@ -9947,7 +9943,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) case COMPLEX_TYPE: mode = TYPE_MODE (TREE_TYPE (type)); - if (mode != DFmode && mode != SFmode && mode != TFmode) + if (mode != DFmode && mode != SFmode + && mode != TFmode && mode != HFmode) return -1; if (*modep == VOIDmode) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 15d7e4019a..b9bf9793e4 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -930,4 +930,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define ASM_OUTPUT_POOL_EPILOGUE aarch64_asm_output_pool_epilogue +/* This type is the user-visible __fp16, and a pointer to that type. We + need it in many places in the backend. Defined in aarch64-builtins.c. */ +extern tree aarch64_fp16_type_node; +extern tree aarch64_fp16_ptr_type_node; + #endif /* GCC_AARCH64_H */ diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 2612a32571..ec543684ed 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -17856,19 +17856,19 @@ vpminnms_f32 (float32x2_t a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmaxnm_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_smaxv2sf (__a, __b); + return __builtin_aarch64_fmaxv2sf (__a, __b); } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_smaxv4sf (__a, __b); + return __builtin_aarch64_fmaxv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_smaxv2df (__a, __b); + return __builtin_aarch64_fmaxv2df (__a, __b); } /* vmaxv */ @@ -18086,19 +18086,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vminnm_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_sminv2sf (__a, __b); + return __builtin_aarch64_fminv2sf (__a, __b); } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vminnmq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_sminv4sf (__a, __b); + return __builtin_aarch64_fminv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vminnmq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_sminv2df (__a, __b); + return __builtin_aarch64_fminv2df (__a, __b); } /* vminv */ diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 932608b0fe..3e4594bf4c 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -3738,7 +3738,8 @@ ;; BUGCHK is documented common to OSF/1 and VMS PALcode. (define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] + [(trap_if (const_int 1) (const_int 0)) + (use (reg:DI 29))] "" "call_pal 0x81" [(set_attr "type" "callpal")]) @@ -5157,7 +5158,7 @@ "TARGET_ABI_OSF" { if (TARGET_EXPLICIT_RELOCS) - return "ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"; + return "#"; else return "ldgp $29,0($26)"; } diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 90fb40fed2..68b2839879 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -2861,6 +2861,10 @@ arm_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in) int in_n, out_n; bool out_unsigned_p = TYPE_UNSIGNED (type_out); + /* Can't provide any vectorized builtins when we can't use NEON. */ + if (!TARGET_NEON) + return NULL_TREE; + if (TREE_CODE (type_out) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; @@ -2875,7 +2879,7 @@ arm_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in) NULL_TREE is returned if no such builtin is available. */ #undef ARM_CHECK_BUILTIN_MODE #define ARM_CHECK_BUILTIN_MODE(C) \ - (TARGET_NEON && TARGET_FPU_ARMV8 \ + (TARGET_FPU_ARMV8 \ && flag_unsafe_math_optimizations \ && ARM_CHECK_BUILTIN_MODE_1 (C)) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 5974c65d31..24b204a1d4 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -6704,7 +6704,7 @@ arm_function_ok_for_sibcall (tree decl, tree exp) /* The PIC register is live on entry to VxWorks PLT entries, so we must make the call before restoring the PIC register. */ - if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) + if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl)) return false; /* If we are interworking and the function is not declared static @@ -17755,6 +17755,7 @@ arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse, int num_saves = XVECLEN (operands[0], 0); unsigned int regno; unsigned int regno_base = REGNO (operands[1]); + bool interrupt_p = IS_INTERRUPT (arm_current_func_type ()); offset = 0; offset += update ? 1 : 0; @@ -17772,20 +17773,16 @@ arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse, } conditional = reverse ? "%?%D0" : "%?%d0"; - if ((regno_base == SP_REGNUM) && update) - { - sprintf (pattern, "pop%s\t{", conditional); - } + /* Can't use POP if returning from an interrupt. */ + if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc)) + sprintf (pattern, "pop%s\t{", conditional); else { /* Output ldmfd when the base register is SP, otherwise output ldmia. It's just a convention, their semantics are identical. */ if (regno_base == SP_REGNUM) - /* update is never true here, hence there is no need to handle - pop here. */ - sprintf (pattern, "ldmfd%s", conditional); - - if (update) + sprintf (pattern, "ldmfd%s\t", conditional); + else if (update) sprintf (pattern, "ldmia%s\t", conditional); else sprintf (pattern, "ldm%s\t", conditional); @@ -17811,7 +17808,7 @@ arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse, strcat (pattern, "}"); - if (IS_INTERRUPT (arm_current_func_type ()) && return_pc) + if (interrupt_p && return_pc) strcat (pattern, "^"); output_asm_insn (pattern, &cond); @@ -19622,8 +19619,12 @@ output_return_instruction (rtx operand, bool really_return, bool reverse, sprintf (instr, "ldmfd%s\t%%|sp, {", conditional); } } + /* For interrupt returns we have to use an LDM rather than + a POP so that we can use the exception return variant. */ + else if (IS_INTERRUPT (func_type)) + sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional); else - sprintf (instr, "pop%s\t{", conditional); + sprintf (instr, "pop%s\t{", conditional); p = instr + strlen (instr); @@ -21461,7 +21462,11 @@ arm_expand_prologue (void) /* Naked functions don't have prologues. */ if (IS_NAKED (func_type)) - return; + { + if (flag_stack_usage_info) + current_function_static_stack_size = 0; + return; + } /* Make a copy of c_f_p_a_s as we may need to modify it locally. */ args_to_push = crtl->args.pretend_args_size; @@ -24715,7 +24720,11 @@ thumb1_expand_prologue (void) /* Naked functions don't have prologues. */ if (IS_NAKED (func_type)) - return; + { + if (flag_stack_usage_info) + current_function_static_stack_size = 0; + return; + } if (IS_INTERRUPT (func_type)) { diff --git a/gcc/config/arm/freebsd.h b/gcc/config/arm/freebsd.h index 948fdd6843..0ade4e99be 100644 --- a/gcc/config/arm/freebsd.h +++ b/gcc/config/arm/freebsd.h @@ -120,6 +120,9 @@ #define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm9 #endif +/* FreeBSD 10 does not support unaligned access for armv6 and up. + Unaligned access support was added in FreeBSD 11. */ +#if FBSD_MAJOR < 11 #define SUBTARGET_OVERRIDE_INTERNAL_OPTIONS \ do { \ if (opts_set->x_unaligned_access == 1) \ @@ -127,6 +130,7 @@ do { \ if (opts->x_unaligned_access) \ opts->x_unaligned_access = 0; \ } while (0) +#endif #undef MAX_SYNC_LIBFUNC_SIZE #define MAX_SYNC_LIBFUNC_SIZE 4 /* UNITS_PER_WORD not defined yet. */ diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index a7728e3b10..0ae913c73b 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -203,9 +203,6 @@ static GTY(()) rtx xstring_e; /* Current architecture. */ const avr_arch_t *avr_arch; -/* Section to put switch tables in. */ -static GTY(()) section *progmem_swtable_section; - /* Unnamed sections associated to __attribute__((progmem)) aka. PROGMEM or to address space __flash* or __memx. Only used as singletons inside avr_asm_select_section, but it must not be local there because of GTY. */ @@ -9461,24 +9458,6 @@ avr_output_progmem_section_asm_op (const void *data) static void avr_asm_init_sections (void) { - /* Set up a section for jump tables. Alignment is handled by - ASM_OUTPUT_BEFORE_CASE_LABEL. */ - - if (AVR_HAVE_JMP_CALL) - { - progmem_swtable_section - = get_unnamed_section (0, output_section_asm_op, - "\t.section\t.progmem.gcc_sw_table" - ",\"a\",@progbits"); - } - else - { - progmem_swtable_section - = get_unnamed_section (SECTION_CODE, output_section_asm_op, - "\t.section\t.progmem.gcc_sw_table" - ",\"ax\",@progbits"); - } - /* Override section callbacks to keep track of `avr_need_clear_bss_p' resp. `avr_need_copy_data_p'. */ @@ -9488,65 +9467,6 @@ avr_asm_init_sections (void) } -/* Implement `TARGET_ASM_FUNCTION_RODATA_SECTION'. */ - -static section* -avr_asm_function_rodata_section (tree decl) -{ - /* If a function is unused and optimized out by -ffunction-sections - and --gc-sections, ensure that the same will happen for its jump - tables by putting them into individual sections. */ - - unsigned int flags; - section * frodata; - - /* Get the frodata section from the default function in varasm.c - but treat function-associated data-like jump tables as code - rather than as user defined data. AVR has no constant pools. */ - { - int fdata = flag_data_sections; - - flag_data_sections = flag_function_sections; - frodata = default_function_rodata_section (decl); - flag_data_sections = fdata; - flags = frodata->common.flags; - } - - if (frodata != readonly_data_section - && flags & SECTION_NAMED) - { - /* Adjust section flags and replace section name prefix. */ - - unsigned int i; - - static const char* const prefix[] = - { - ".rodata", ".progmem.gcc_sw_table", - ".gnu.linkonce.r.", ".gnu.linkonce.t." - }; - - for (i = 0; i < sizeof (prefix) / sizeof (*prefix); i += 2) - { - const char * old_prefix = prefix[i]; - const char * new_prefix = prefix[i+1]; - const char * name = frodata->named.name; - - if (STR_PREFIX_P (name, old_prefix)) - { - const char *rname = ACONCAT ((new_prefix, - name + strlen (old_prefix), NULL)); - flags &= ~SECTION_CODE; - flags |= AVR_HAVE_JMP_CALL ? 0 : SECTION_CODE; - - return get_section (rname, flags, frodata->named.decl); - } - } - } - - return progmem_swtable_section; -} - - /* Implement `TARGET_ASM_NAMED_SECTION'. */ /* Track need of __do_clear_bss, __do_copy_data for named sections. */ @@ -9721,7 +9641,9 @@ avr_asm_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) { const char *sname = ACONCAT ((new_prefix, name + strlen (old_prefix), NULL)); - return get_section (sname, sect->common.flags, sect->named.decl); + return get_section (sname, + sect->common.flags & ~SECTION_DECLARED, + sect->named.decl); } } @@ -13747,9 +13669,6 @@ avr_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *arg, #undef TARGET_FOLD_BUILTIN #define TARGET_FOLD_BUILTIN avr_fold_builtin -#undef TARGET_ASM_FUNCTION_RODATA_SECTION -#define TARGET_ASM_FUNCTION_RODATA_SECTION avr_asm_function_rodata_section - #undef TARGET_SCALAR_MODE_SUPPORTED_P #define TARGET_SCALAR_MODE_SUPPORTED_P avr_scalar_mode_supported_p diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h index 01da70867e..ab5e465114 100644 --- a/gcc/config/avr/avr.h +++ b/gcc/config/avr/avr.h @@ -391,7 +391,7 @@ typedef struct avr_args #define SUPPORTS_INIT_PRIORITY 0 -#define JUMP_TABLES_IN_TEXT_SECTION 0 +#define JUMP_TABLES_IN_TEXT_SECTION 1 #define ASM_COMMENT_START " ; " diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index c988446184..2a2b620cd1 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -112,12 +112,12 @@ (define_attr "length" "" (cond [(eq_attr "type" "branch") (if_then_else (and (ge (minus (pc) (match_dup 0)) - (const_int -63)) + (const_int -62)) (le (minus (pc) (match_dup 0)) (const_int 62))) (const_int 1) (if_then_else (and (ge (minus (pc) (match_dup 0)) - (const_int -2045)) + (const_int -2044)) (le (minus (pc) (match_dup 0)) (const_int 2045))) (const_int 2) @@ -641,6 +641,22 @@ if (avr_mem_flash_p (dest)) DONE; + if (QImode == mode + && SUBREG_P (src) + && CONSTANT_ADDRESS_P (SUBREG_REG (src)) + && can_create_pseudo_p()) + { + // store_bitfield may want to store a SYMBOL_REF or CONST in a + // structure that's represented as PSImode. As the upper 16 bits + // of PSImode cannot be expressed as an HImode subreg, the rhs is + // decomposed into QImode (word_mode) subregs of SYMBOL_REF, + // CONST or LABEL_REF; cf. PR71103. + + rtx const_addr = SUBREG_REG (src); + operands[1] = src = copy_rtx (src); + SUBREG_REG (src) = copy_to_mode_reg (GET_MODE (const_addr), const_addr); + } + /* One of the operands has to be in a register. */ if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) diff --git a/gcc/config/avr/gen-avr-mmcu-specs.c b/gcc/config/avr/gen-avr-mmcu-specs.c index de8680a8a1..fabe8c2c59 100644 --- a/gcc/config/avr/gen-avr-mmcu-specs.c +++ b/gcc/config/avr/gen-avr-mmcu-specs.c @@ -27,7 +27,7 @@ #include "avr-devices.c" -// Get rid of "defaults.h". We just need tm.h for `WITH_AVRLIBS' and +// Get rid of "defaults.h". We just need tm.h for `WITH_AVRLIBC' and // and `WITH_RTEMS'. */ #define GCC_DEFAULTS_H @@ -242,12 +242,13 @@ print_mcu (const avr_mcu_t *mcu) fprintf (f, "*link_data_start:\n"); if (mcu->data_section_start != arch->default_data_section_start) - fprintf (f, "\t-Tdata 0x%lX", 0x800000UL + mcu->data_section_start); + fprintf (f, "\t%%{!Tdata:-Tdata 0x%lX}", + 0x800000UL + mcu->data_section_start); fprintf (f, "\n\n"); fprintf (f, "*link_text_start:\n"); if (mcu->text_section_start != 0x0) - fprintf (f, "\t-Ttext 0x%lX", 0UL + mcu->text_section_start); + fprintf (f, "\t%%{!Ttext:-Ttext 0x%lX}", 0UL + mcu->text_section_start); fprintf (f, "\n\n"); } diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index c9981b815c..0277cf2238 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -179,6 +179,7 @@ extern GTY(()) int darwin_ms_struct; %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \ %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*:%*} 1): \ %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \ + %{fcilkplus:%:include(libcilkrts.spec)%(link_cilkrts)} \ %{fgnu-tm: \ %{static|static-libgcc|static-libstdc++|static-libgfortran: libitm.a%s; : -litm } } \ %{!nostdlib:%{!nodefaultlibs:\ diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index f40a7d91df..6ef63e9fa4 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -270,9 +270,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcastb_epi8 (__m128i __A) { return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, - (__v64qi)_mm512_undefined_si512(), - (__mmask64) - - 1); + (__v64qi)_mm512_undefined_epi32(), + (__mmask64) -1); } extern __inline __m512i @@ -318,8 +317,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcastw_epi16 (__m128i __A) { return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, - (__v32hi)_mm512_undefined_si512(), - (__mmask32)-1); + (__v32hi)_mm512_undefined_epi32(), + (__mmask32) -1); } extern __inline __m512i @@ -588,8 +587,7 @@ _mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B) /* idx */ , (__v32hi) __A, (__v32hi) __B, - (__mmask32) - - 1); + (__mmask32) -1); } extern __inline __m512i @@ -2284,7 +2282,7 @@ _mm512_cmpneq_epu8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 4, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask64 @@ -2293,7 +2291,7 @@ _mm512_cmplt_epu8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 1, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask64 @@ -2302,7 +2300,7 @@ _mm512_cmpge_epu8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 5, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask64 @@ -2311,7 +2309,7 @@ _mm512_cmple_epu8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 2, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask32 @@ -2320,7 +2318,7 @@ _mm512_cmpneq_epu16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 4, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2329,7 +2327,7 @@ _mm512_cmplt_epu16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 1, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2338,7 +2336,7 @@ _mm512_cmpge_epu16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 5, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2347,7 +2345,7 @@ _mm512_cmple_epu16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 2, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask64 @@ -2356,7 +2354,7 @@ _mm512_cmpneq_epi8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 4, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask64 @@ -2365,7 +2363,7 @@ _mm512_cmplt_epi8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 1, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask64 @@ -2374,7 +2372,7 @@ _mm512_cmpge_epi8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 5, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask64 @@ -2383,7 +2381,7 @@ _mm512_cmple_epi8_mask (__m512i __X, __m512i __Y) { return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X, (__v64qi) __Y, 2, - (__mmask64) - 1); + (__mmask64) -1); } extern __inline __mmask32 @@ -2392,7 +2390,7 @@ _mm512_cmpneq_epi16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 4, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2401,7 +2399,7 @@ _mm512_cmplt_epi16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 1, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2410,7 +2408,7 @@ _mm512_cmpge_epi16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 5, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2419,7 +2417,7 @@ _mm512_cmple_epi16_mask (__m512i __X, __m512i __Y) { return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X, (__v32hi) __Y, 2, - (__mmask32) - 1); + (__mmask32) -1); } #ifdef __OPTIMIZE__ diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 14a4e8869a..1dbb6b04e4 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -41,8 +41,7 @@ _mm512_broadcast_f64x2 (__m128d __A) return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, _mm512_undefined_pd(), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m512d @@ -72,9 +71,8 @@ _mm512_broadcast_i64x2 (__m128i __A) { return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, - _mm512_undefined_si512(), - (__mmask8) - - 1); + _mm512_undefined_epi32(), + (__mmask8) -1); } extern __inline __m512i @@ -104,8 +102,7 @@ _mm512_broadcast_f32x2 (__m128 __A) { return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, (__v16sf)_mm512_undefined_ps(), - (__mmask16) - - 1); + (__mmask16) -1); } extern __inline __m512 @@ -133,9 +130,8 @@ _mm512_broadcast_i32x2 (__m128i __A) { return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si)_mm512_undefined_si512(), - (__mmask16) - -1); + (__v16si)_mm512_undefined_epi32(), + (__mmask16) -1); } extern __inline __m512i @@ -165,8 +161,7 @@ _mm512_broadcast_f32x8 (__m256 __A) { return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, _mm512_undefined_ps(), - (__mmask16) - - 1); + (__mmask16) -1); } extern __inline __m512 @@ -194,9 +189,8 @@ _mm512_broadcast_i32x8 (__m256i __A) { return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, - (__v16si)_mm512_undefined_si512(), - (__mmask16) - -1); + (__v16si)_mm512_undefined_epi32(), + (__mmask16) -1); } extern __inline __m512i @@ -1569,8 +1563,7 @@ _mm512_extractf64x2_pd (__m512d __A, const int __imm) __imm, (__v2df) _mm_setzero_pd (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128d @@ -1640,8 +1633,7 @@ _mm512_extracti64x2_epi64 (__m512i __A, const int __imm) __imm, (__v2di) _mm_setzero_di (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128i @@ -1829,8 +1821,7 @@ _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm) __imm, (__v8di) _mm512_setzero_si512 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m512i @@ -1869,8 +1860,7 @@ _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm) __imm, (__v8df) _mm512_setzero_pd (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m512d @@ -1933,8 +1923,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) { return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A, __imm, - (__mmask16) - - 1); + (__mmask16) -1); } #else diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index e009d8c55b..305ed555d4 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -130,12 +130,14 @@ _mm512_undefined_pd (void) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_undefined_si512 (void) +_mm512_undefined_epi32 (void) { __m512i __Y = __Y; return __Y; } +#define _mm512_undefined_si512 _mm512_undefined_epi32 + extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_set1_epi8 (char __A) @@ -549,7 +551,7 @@ _mm512_sllv_epi32 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, (__v16si) __Y, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -581,7 +583,7 @@ _mm512_srav_epi32 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, (__v16si) __Y, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -613,7 +615,7 @@ _mm512_srlv_epi32 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, (__v16si) __Y, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -733,7 +735,7 @@ _mm512_srav_epi64 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, (__v8di) __Y, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -765,7 +767,7 @@ _mm512_srlv_epi64 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, (__v8di) __Y, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -825,7 +827,7 @@ _mm512_mul_epi32 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, (__v16si) __Y, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -884,7 +886,7 @@ _mm512_mul_epu32 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, (__v16si) __Y, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -915,7 +917,7 @@ _mm512_slli_epi64 (__m512i __A, unsigned int __B) { return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -941,7 +943,7 @@ _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) #else #define _mm512_slli_epi64(X, C) \ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ - (__v8di)(__m512i)_mm512_undefined_si512 (),\ + (__v8di)(__m512i)_mm512_undefined_epi32 (),\ (__mmask8)-1)) #define _mm512_mask_slli_epi64(W, U, X, C) \ @@ -962,7 +964,7 @@ _mm512_sll_epi64 (__m512i __A, __m128i __B) return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, (__v2di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -994,7 +996,7 @@ _mm512_srli_epi64 (__m512i __A, unsigned int __B) { return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1020,7 +1022,7 @@ _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) #else #define _mm512_srli_epi64(X, C) \ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ - (__v8di)(__m512i)_mm512_undefined_si512 (),\ + (__v8di)(__m512i)_mm512_undefined_epi32 (),\ (__mmask8)-1)) #define _mm512_mask_srli_epi64(W, U, X, C) \ @@ -1041,7 +1043,7 @@ _mm512_srl_epi64 (__m512i __A, __m128i __B) return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, (__v2di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1073,7 +1075,7 @@ _mm512_srai_epi64 (__m512i __A, unsigned int __B) { return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1099,7 +1101,7 @@ _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) #else #define _mm512_srai_epi64(X, C) \ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ - (__v8di)(__m512i)_mm512_undefined_si512 (),\ + (__v8di)(__m512i)_mm512_undefined_epi32 (),\ (__mmask8)-1)) #define _mm512_mask_srai_epi64(W, U, X, C) \ @@ -1120,7 +1122,7 @@ _mm512_sra_epi64 (__m512i __A, __m128i __B) return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, (__v2di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1152,7 +1154,7 @@ _mm512_slli_epi32 (__m512i __A, unsigned int __B) { return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1178,7 +1180,7 @@ _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) #else #define _mm512_slli_epi32(X, C) \ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ - (__v16si)(__m512i)_mm512_undefined_si512 (),\ + (__v16si)(__m512i)_mm512_undefined_epi32 (),\ (__mmask16)-1)) #define _mm512_mask_slli_epi32(W, U, X, C) \ @@ -1199,7 +1201,7 @@ _mm512_sll_epi32 (__m512i __A, __m128i __B) return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, (__v4si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1231,7 +1233,7 @@ _mm512_srli_epi32 (__m512i __A, unsigned int __B) { return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1257,7 +1259,7 @@ _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) #else #define _mm512_srli_epi32(X, C) \ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ - (__v16si)(__m512i)_mm512_undefined_si512 (),\ + (__v16si)(__m512i)_mm512_undefined_epi32 (),\ (__mmask16)-1)) #define _mm512_mask_srli_epi32(W, U, X, C) \ @@ -1278,7 +1280,7 @@ _mm512_srl_epi32 (__m512i __A, __m128i __B) return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, (__v4si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1310,7 +1312,7 @@ _mm512_srai_epi32 (__m512i __A, unsigned int __B) { return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1336,7 +1338,7 @@ _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) #else #define _mm512_srai_epi32(X, C) \ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ - (__v16si)(__m512i)_mm512_undefined_si512 (),\ + (__v16si)(__m512i)_mm512_undefined_epi32 (),\ (__mmask16)-1)) #define _mm512_mask_srai_epi32(W, U, X, C) \ @@ -1357,7 +1359,7 @@ _mm512_sra_epi32 (__m512i __A, __m128i __B) return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, (__v4si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1778,7 +1780,7 @@ _mm512_cvtepi8_epi32 (__m128i __A) { return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1807,7 +1809,7 @@ _mm512_cvtepi8_epi64 (__m128i __A) { return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1836,7 +1838,7 @@ _mm512_cvtepi16_epi32 (__m256i __A) { return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1865,7 +1867,7 @@ _mm512_cvtepi16_epi64 (__m128i __A) { return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1894,7 +1896,7 @@ _mm512_cvtepi32_epi64 (__m256i __X) { return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1923,7 +1925,7 @@ _mm512_cvtepu8_epi32 (__m128i __A) { return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -1952,7 +1954,7 @@ _mm512_cvtepu8_epi64 (__m128i __A) { return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -1981,7 +1983,7 @@ _mm512_cvtepu16_epi32 (__m256i __A) { return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -2010,7 +2012,7 @@ _mm512_cvtepu16_epi64 (__m128i __A) { return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -2039,7 +2041,7 @@ _mm512_cvtepu32_epi64 (__m256i __X) { return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -3407,7 +3409,7 @@ _mm512_abs_epi64 (__m512i __A) { return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -3436,7 +3438,7 @@ _mm512_abs_epi32 (__m512i __A) { return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -3521,7 +3523,7 @@ _mm512_broadcastd_epi32 (__m128i __A) { return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -3549,7 +3551,7 @@ _mm512_set1_epi32 (int __A) { return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16)(-1)); } @@ -3577,7 +3579,7 @@ _mm512_broadcastq_epi64 (__m128i __A) { return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -3605,7 +3607,7 @@ _mm512_set1_epi64 (long long __A) { return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8)(-1)); } @@ -3662,7 +3664,7 @@ _mm512_broadcast_i32x4 (__m128i __A) { return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -3720,7 +3722,7 @@ _mm512_broadcast_i64x4 (__m256i __A) { return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -3841,7 +3843,7 @@ _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask) return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, __mask, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -3874,7 +3876,7 @@ _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm) return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, (__v8di) __B, __imm, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -3909,7 +3911,7 @@ _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm) (__v16si) __B, __imm, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -4009,7 +4011,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, #else #define _mm512_shuffle_epi32(X, C) \ ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ - (__v16si)(__m512i)_mm512_undefined_si512 (),\ + (__v16si)(__m512i)_mm512_undefined_epi32 (),\ (__mmask16)-1)) #define _mm512_mask_shuffle_epi32(W, U, X, C) \ @@ -4025,7 +4027,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, #define _mm512_shuffle_i64x2(X, Y, C) \ ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ (__v8di)(__m512i)(Y), (int)(C),\ - (__v8di)(__m512i)_mm512_undefined_si512 (),\ + (__v8di)(__m512i)_mm512_undefined_epi32 (),\ (__mmask8)-1)) #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \ @@ -4043,7 +4045,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, #define _mm512_shuffle_i32x4(X, Y, C) \ ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ (__v16si)(__m512i)(Y), (int)(C),\ - (__v16si)(__m512i)_mm512_undefined_si512 (),\ + (__v16si)(__m512i)_mm512_undefined_epi32 (),\ (__mmask16)-1)) #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \ @@ -4102,7 +4104,7 @@ _mm512_rolv_epi32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -4134,7 +4136,7 @@ _mm512_rorv_epi32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -4166,7 +4168,7 @@ _mm512_rolv_epi64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -4198,7 +4200,7 @@ _mm512_rorv_epi64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -4390,7 +4392,7 @@ _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, __R); } @@ -4420,7 +4422,7 @@ _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, __R); } @@ -4445,7 +4447,7 @@ _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) } #else #define _mm512_cvtt_roundps_epi32(A, B) \ - ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) + ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \ ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B)) @@ -4454,7 +4456,7 @@ _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) #define _mm512_cvtt_roundps_epu32(A, B) \ - ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) + ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \ ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B)) @@ -4470,7 +4472,7 @@ _mm512_cvt_roundps_epi32 (__m512 __A, const int __R) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, __R); } @@ -4500,7 +4502,7 @@ _mm512_cvt_roundps_epu32 (__m512 __A, const int __R) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, __R); } @@ -4525,7 +4527,7 @@ _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) } #else #define _mm512_cvt_roundps_epi32(A, B) \ - ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) + ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \ ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B)) @@ -4534,7 +4536,7 @@ _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) #define _mm512_cvt_roundps_epu32(A, B) \ - ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) + ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \ ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B)) @@ -4903,7 +4905,6 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cvtsepi64_epi32 (__m512i __A) { - __v8si __O; return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, (__v8si) _mm256_undefined_si256 (), @@ -5556,7 +5557,7 @@ _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm) (__v4di) __B, __imm, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -5651,7 +5652,7 @@ _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B, #define _mm512_inserti64x4(X, Y, C) \ ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ (__v4di)(__m256i) (Y), (int) (C), \ - (__v8di)(__m512i)_mm512_undefined_si512 (), \ + (__v8di)(__m512i)_mm512_undefined_epi32 (), \ (__mmask8)-1)) #define _mm512_mask_inserti64x4(W, U, X, Y, C) \ @@ -6177,7 +6178,7 @@ _mm512_permutex_epi64 (__m512i __X, const int __I) { return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) (-1)); } @@ -6248,7 +6249,7 @@ _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M) ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ (int)(I), \ (__v8di)(__m512i) \ - (_mm512_undefined_si512 ()),\ + (_mm512_undefined_epi32 ()),\ (__mmask8)(-1))) #define _mm512_maskz_permutex_epi64(M, X, I) \ @@ -6283,7 +6284,7 @@ _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, (__v8di) __X, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -6316,7 +6317,7 @@ _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, (__v16si) __X, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -6891,7 +6892,7 @@ _mm512_rol_epi32 (__m512i __A, const int __B) { return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -6920,7 +6921,7 @@ _mm512_ror_epi32 (__m512i __A, int __B) { return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -6949,7 +6950,7 @@ _mm512_rol_epi64 (__m512i __A, const int __B) { return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -6978,7 +6979,7 @@ _mm512_ror_epi64 (__m512i __A, int __B) { return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -7005,7 +7006,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) #define _mm512_rol_epi32(A, B) \ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ (int)(B), \ - (__v16si)_mm512_undefined_si512 (), \ + (__v16si)_mm512_undefined_epi32 (), \ (__mmask16)(-1))) #define _mm512_mask_rol_epi32(W, U, A, B) \ ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ @@ -7020,7 +7021,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) #define _mm512_ror_epi32(A, B) \ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ (int)(B), \ - (__v16si)_mm512_undefined_si512 (), \ + (__v16si)_mm512_undefined_epi32 (), \ (__mmask16)(-1))) #define _mm512_mask_ror_epi32(W, U, A, B) \ ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ @@ -7035,7 +7036,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) #define _mm512_rol_epi64(A, B) \ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ (int)(B), \ - (__v8di)_mm512_undefined_si512 (), \ + (__v8di)_mm512_undefined_epi32 (), \ (__mmask8)(-1))) #define _mm512_mask_rol_epi64(W, U, A, B) \ ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ @@ -7051,7 +7052,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) #define _mm512_ror_epi64(A, B) \ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ (int)(B), \ - (__v8di)_mm512_undefined_si512 (), \ + (__v8di)_mm512_undefined_epi32 (), \ (__mmask8)(-1))) #define _mm512_mask_ror_epi64(W, U, A, B) \ ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ @@ -7134,7 +7135,7 @@ _mm512_andnot_si512 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -7145,7 +7146,7 @@ _mm512_andnot_epi32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -7177,7 +7178,7 @@ _mm512_andnot_epi64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -7275,7 +7276,7 @@ _mm512_unpackhi_epi32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -7308,7 +7309,7 @@ _mm512_unpackhi_epi64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -7340,7 +7341,7 @@ _mm512_unpacklo_epi32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -7373,7 +7374,7 @@ _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -8512,7 +8513,7 @@ _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm) return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, (__v16si) __B, __imm, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -8546,7 +8547,7 @@ _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm) return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, (__v8di) __B, __imm, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -8575,7 +8576,7 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, #else #define _mm512_alignr_epi32(X, Y, C) \ ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ - (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\ + (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\ (__mmask16)-1)) #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \ @@ -8590,7 +8591,7 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, #define _mm512_alignr_epi64(X, Y, C) \ ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ - (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \ + (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \ (__mmask8)-1)) #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \ @@ -9130,9 +9131,9 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, (__mmask8)-1)) #define _mm512_cmp_epi32_mask(X, Y, P) \ - ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ - (__v16si)(__m512i)(Y), (int)(P),\ - (__mmask16)-1)) + ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P), \ + (__mmask16)-1)) #define _mm512_cmp_epu64_mask(X, Y, P) \ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ @@ -9140,66 +9141,66 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, (__mmask8)-1)) #define _mm512_cmp_epu32_mask(X, Y, P) \ - ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ - (__v16si)(__m512i)(Y), (int)(P),\ - (__mmask16)-1)) + ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P), \ + (__mmask16)-1)) -#define _mm512_cmp_round_pd_mask(X, Y, P, R) \ +#define _mm512_cmp_round_pd_mask(X, Y, P, R) \ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ (__v8df)(__m512d)(Y), (int)(P),\ (__mmask8)-1, R)) -#define _mm512_cmp_round_ps_mask(X, Y, P, R) \ +#define _mm512_cmp_round_ps_mask(X, Y, P, R) \ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ (__v16sf)(__m512)(Y), (int)(P),\ (__mmask16)-1, R)) -#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ +#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ (__v8di)(__m512i)(Y), (int)(P),\ (__mmask8)M)) -#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ - ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ - (__v16si)(__m512i)(Y), (int)(P),\ - (__mmask16)M)) +#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ + ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P), \ + (__mmask16)M)) -#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ +#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ (__v8di)(__m512i)(Y), (int)(P),\ (__mmask8)M)) -#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ - ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ - (__v16si)(__m512i)(Y), (int)(P),\ - (__mmask16)M)) +#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ + ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ + (__v16si)(__m512i)(Y), (int)(P), \ + (__mmask16)M)) -#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ +#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ (__v8df)(__m512d)(Y), (int)(P),\ (__mmask8)M, R)) -#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ +#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ (__v16sf)(__m512)(Y), (int)(P),\ (__mmask16)M, R)) -#define _mm_cmp_round_sd_mask(X, Y, P, R) \ +#define _mm_cmp_round_sd_mask(X, Y, P, R) \ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P),\ (__mmask8)-1, R)) -#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ +#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), (int)(P),\ (M), R)) -#define _mm_cmp_round_ss_mask(X, Y, P, R) \ +#define _mm_cmp_round_ss_mask(X, Y, P, R) \ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (__mmask8)-1, R)) -#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ +#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), (int)(P), \ (M), R)) @@ -9306,7 +9307,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale) { - __m512i v1_old = _mm512_undefined_si512 (); + __m512i v1_old = _mm512_undefined_epi32 (); __mmask16 mask = 0xFFFF; return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old, @@ -9330,7 +9331,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale) { - __m512i v1_old = _mm512_undefined_si512 (); + __m512i v1_old = _mm512_undefined_epi32 (); __mmask8 mask = 0xFF; return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old, @@ -9379,7 +9380,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale) { - __m512i v1_old = _mm512_undefined_si512 (); + __m512i v1_old = _mm512_undefined_epi32 (); __mmask8 mask = 0xFF; return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old, @@ -9591,7 +9592,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask, (__mmask8)MASK, (int)SCALE) #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \ + (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \ (int const *)ADDR, \ (__v16si)(__m512i)INDEX, \ (__mmask16)0xFFFF, (int)SCALE) @@ -9603,7 +9604,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask, (__mmask16)MASK, (int)SCALE) #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \ + (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \ (long long const *)ADDR, \ (__v8si)(__m256i)INDEX, \ (__mmask8)0xFF, (int)SCALE) @@ -9627,7 +9628,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask, (__mmask8)MASK, (int)SCALE) #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ - (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \ + (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \ (long long const *)ADDR, \ (__v8di)(__m512i)INDEX, \ (__mmask8)0xFF, (int)SCALE) @@ -10123,7 +10124,7 @@ _mm512_max_epi64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -10154,7 +10155,7 @@ _mm512_min_epi64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -10185,7 +10186,7 @@ _mm512_max_epu64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -10216,7 +10217,7 @@ _mm512_min_epu64 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, (__v8di) __B, (__v8di) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask8) -1); } @@ -10247,7 +10248,7 @@ _mm512_max_epi32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -10278,7 +10279,7 @@ _mm512_min_epi32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -10309,7 +10310,7 @@ _mm512_max_epu32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -10340,7 +10341,7 @@ _mm512_min_epu32 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, (__v16si) __B, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1); } @@ -11804,7 +11805,7 @@ _mm512_cvttps_epi32 (__m512 __A) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } @@ -11836,7 +11837,7 @@ _mm512_cvttps_epu32 (__m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } @@ -11868,7 +11869,7 @@ _mm512_cvtps_epi32 (__m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } @@ -11900,7 +11901,7 @@ _mm512_cvtps_epu32 (__m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, (__v16si) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h index c2f43111e1..c50aa65d02 100644 --- a/gcc/config/i386/avx512ifmaintrin.h +++ b/gcc/config/i386/avx512ifmaintrin.h @@ -41,7 +41,7 @@ _mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X, (__v8di) __Y, (__v8di) __Z, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __m512i @@ -51,7 +51,7 @@ _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X, (__v8di) __Y, (__v8di) __Z, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __m512i diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h index 9091f899ca..6c496f78ce 100644 --- a/gcc/config/i386/avx512ifmavlintrin.h +++ b/gcc/config/i386/avx512ifmavlintrin.h @@ -41,7 +41,7 @@ _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X, (__v2di) __Y, (__v2di) __Z, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __m128i @@ -51,7 +51,7 @@ _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X, (__v2di) __Y, (__v2di) __Z, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __m256i @@ -61,7 +61,7 @@ _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X, (__v4di) __Y, (__v4di) __Z, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __m256i @@ -71,7 +71,7 @@ _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X, (__v4di) __Y, (__v4di) __Z, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __m128i diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h index a00cf70f52..a2ad07a6a2 100644 --- a/gcc/config/i386/avx512vbmiintrin.h +++ b/gcc/config/i386/avx512vbmiintrin.h @@ -62,7 +62,7 @@ _mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y) return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, (__v64qi) __Y, (__v64qi) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask64) -1); } @@ -73,7 +73,7 @@ _mm512_permutexvar_epi8 (__m512i __A, __m512i __B) return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B, (__v64qi) __A, (__v64qi) - _mm512_undefined_si512 (), + _mm512_undefined_epi32 (), (__mmask64) -1); } @@ -108,8 +108,7 @@ _mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B) /* idx */ , (__v64qi) __A, (__v64qi) __B, - (__mmask64) - - 1); + (__mmask64) -1); } extern __inline __m512i diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h index 4af9fb9b01..04308ead42 100644 --- a/gcc/config/i386/avx512vbmivlintrin.h +++ b/gcc/config/i386/avx512vbmivlintrin.h @@ -173,8 +173,7 @@ _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B) /* idx */ , (__v32qi) __A, (__v32qi) __B, - (__mmask32) - - 1); + (__mmask32) -1); } extern __inline __m256i @@ -224,8 +223,7 @@ _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B) /* idx */ , (__v16qi) __A, (__v16qi) __B, - (__mmask16) - - 1); + (__mmask16) -1); } extern __inline __m128i diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index f260526d06..cf94755ea3 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -575,8 +575,7 @@ _mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B) /* idx */ , (__v16hi) __A, (__v16hi) __B, - (__mmask16) - - 1); + (__mmask16) -1); } extern __inline __m256i @@ -626,8 +625,7 @@ _mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B) /* idx */ , (__v8hi) __A, (__v8hi) __B, - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128i @@ -2009,7 +2007,7 @@ _mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 4, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2018,7 +2016,7 @@ _mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 1, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2027,7 +2025,7 @@ _mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 5, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -2036,7 +2034,7 @@ _mm256_cmple_epi8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 2, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask16 @@ -2045,7 +2043,7 @@ _mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 4, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2054,7 +2052,7 @@ _mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 1, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2063,7 +2061,7 @@ _mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 5, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2072,7 +2070,7 @@ _mm256_cmple_epi16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 2, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2081,7 +2079,7 @@ _mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 4, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2090,7 +2088,7 @@ _mm_cmplt_epu8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 1, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2099,7 +2097,7 @@ _mm_cmpge_epu8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 5, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2108,7 +2106,7 @@ _mm_cmple_epu8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 2, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask8 @@ -2117,7 +2115,7 @@ _mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -2126,7 +2124,7 @@ _mm_cmplt_epu16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -2135,7 +2133,7 @@ _mm_cmpge_epu16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -2144,7 +2142,7 @@ _mm_cmple_epu16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask16 @@ -2153,7 +2151,7 @@ _mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 4, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2162,7 +2160,7 @@ _mm_cmplt_epi8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 1, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2171,7 +2169,7 @@ _mm_cmpge_epi8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 5, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -2180,7 +2178,7 @@ _mm_cmple_epi8_mask (__m128i __X, __m128i __Y) { return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X, (__v16qi) __Y, 2, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask8 @@ -2189,7 +2187,7 @@ _mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -2198,7 +2196,7 @@ _mm_cmplt_epi16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -2207,7 +2205,7 @@ _mm_cmpge_epi16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -2216,7 +2214,7 @@ _mm_cmple_epi16_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X, (__v8hi) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __m256i @@ -3609,7 +3607,7 @@ _mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 4, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -3618,7 +3616,7 @@ _mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 1, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -3627,7 +3625,7 @@ _mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 5, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask32 @@ -3636,7 +3634,7 @@ _mm256_cmple_epu8_mask (__m256i __X, __m256i __Y) { return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, (__v32qi) __Y, 2, - (__mmask32) - 1); + (__mmask32) -1); } extern __inline __mmask16 @@ -3645,7 +3643,7 @@ _mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 4, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -3654,7 +3652,7 @@ _mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 1, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -3663,7 +3661,7 @@ _mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 5, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline __mmask16 @@ -3672,7 +3670,7 @@ _mm256_cmple_epu16_mask (__m256i __X, __m256i __Y) { return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, (__v16hi) __Y, 2, - (__mmask16) - 1); + (__mmask16) -1); } extern __inline void diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h index 697b81c401..5ff0a52639 100644 --- a/gcc/config/i386/avx512vldqintrin.h +++ b/gcc/config/i386/avx512vldqintrin.h @@ -389,8 +389,7 @@ _mm256_broadcast_f64x2 (__m128d __A) return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, (__v4df)_mm256_undefined_pd(), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256d @@ -421,8 +420,7 @@ _mm256_broadcast_i64x2 (__m128i __A) return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, (__v4di)_mm256_undefined_si256(), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256i @@ -452,8 +450,7 @@ _mm256_broadcast_f32x2 (__m128 __A) { return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, (__v8sf)_mm256_undefined_ps(), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256 @@ -482,8 +479,7 @@ _mm256_broadcast_i32x2 (__m128i __A) return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, (__v8si)_mm256_undefined_si256(), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256i @@ -514,8 +510,7 @@ _mm_broadcast_i32x2 (__m128i __A) return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, (__v4si)_mm_undefined_si128(), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128i @@ -1351,8 +1346,7 @@ _mm256_extractf64x2_pd (__m256d __A, const int __imm) __imm, (__v2df) _mm_setzero_pd (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128d @@ -1388,8 +1382,7 @@ _mm256_extracti64x2_epi64 (__m256i __A, const int __imm) __imm, (__v2di) _mm_setzero_di (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128i @@ -1743,8 +1736,7 @@ _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm) __imm, (__v4di) _mm256_setzero_si256 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256i @@ -1783,8 +1775,7 @@ _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm) __imm, (__v4df) _mm256_setzero_pd (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256d diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index d0ffb2b4d3..d627ad86da 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -2698,8 +2698,7 @@ _mm256_broadcast_f32x4 (__m128 __A) { return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, (__v8sf)_mm256_undefined_pd (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256 @@ -2728,8 +2727,7 @@ _mm256_broadcast_i32x4 (__m128i __A) return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A, (__v8si)_mm256_undefined_si256 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256i @@ -6293,8 +6291,7 @@ _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) /* idx */ , (__v4df) __A, (__v4df) __B, - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256d @@ -6584,8 +6581,7 @@ _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) /* idx */ , (__v2df) __A, (__v2df) __B, - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128d @@ -8259,8 +8255,7 @@ _mm256_conflict_epi64 (__m256i __A) return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, (__v4di) _mm256_setzero_si256 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256i @@ -8291,8 +8286,7 @@ _mm256_conflict_epi32 (__m256i __A) return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, (__v8si) _mm256_setzero_si256 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256i @@ -8381,8 +8375,7 @@ _mm_conflict_epi64 (__m128i __A) return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, (__v2di) _mm_setzero_di (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128i @@ -8413,8 +8406,7 @@ _mm_conflict_epi32 (__m128i __A) return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, (__v4si) _mm_setzero_si128 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128i @@ -9291,8 +9283,7 @@ _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm) __imm, (__v8si) _mm256_setzero_si256 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m256i @@ -9367,8 +9358,7 @@ _mm256_extracti32x4_epi32 (__m256i __A, const int __imm) __imm, (__v4si) _mm_setzero_si128 (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128i @@ -9404,8 +9394,7 @@ _mm256_extractf32x4_ps (__m256 __A, const int __imm) __imm, (__v4sf) _mm_setzero_ps (), - (__mmask8) - - 1); + (__mmask8) -1); } extern __inline __m128 @@ -11797,7 +11786,7 @@ _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, (__v8si) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11815,7 +11804,7 @@ _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, (__v8si) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11833,7 +11822,7 @@ _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, (__v8si) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11851,7 +11840,7 @@ _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X, (__v8si) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11869,7 +11858,7 @@ _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, (__v4di) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11887,7 +11876,7 @@ _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, (__v4di) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11905,7 +11894,7 @@ _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, (__v4di) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11923,7 +11912,7 @@ _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X, (__v4di) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11941,7 +11930,7 @@ _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, (__v8si) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11959,7 +11948,7 @@ _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, (__v8si) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11977,7 +11966,7 @@ _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, (__v8si) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -11995,7 +11984,7 @@ _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X, (__v8si) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12013,7 +12002,7 @@ _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, (__v4di) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12031,7 +12020,7 @@ _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, (__v4di) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12049,7 +12038,7 @@ _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, (__v4di) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12067,7 +12056,7 @@ _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y) { return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X, (__v4di) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12085,7 +12074,7 @@ _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, (__v4si) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12103,7 +12092,7 @@ _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, (__v4si) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12121,7 +12110,7 @@ _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, (__v4si) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12139,7 +12128,7 @@ _mm_cmple_epu32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X, (__v4si) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12157,7 +12146,7 @@ _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, (__v2di) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12175,7 +12164,7 @@ _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, (__v2di) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12193,7 +12182,7 @@ _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, (__v2di) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12211,7 +12200,7 @@ _mm_cmple_epu64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X, (__v2di) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12229,7 +12218,7 @@ _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, (__v4si) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12247,7 +12236,7 @@ _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, (__v4si) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12265,7 +12254,7 @@ _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, (__v4si) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12283,7 +12272,7 @@ _mm_cmple_epi32_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X, (__v4si) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12301,7 +12290,7 @@ _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, (__v2di) __Y, 4, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12319,7 +12308,7 @@ _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, (__v2di) __Y, 1, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12337,7 +12326,7 @@ _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, (__v2di) __Y, 5, - (__mmask8) - 1); + (__mmask8) -1); } extern __inline __mmask8 @@ -12355,7 +12344,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) { return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X, (__v2di) __Y, 2, - (__mmask8) - 1); + (__mmask8) -1); } #else diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index afdc546c06..1a4c701ad1 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -141,8 +141,12 @@ "(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS" "@internal Any x87 register when 80387 FP arithmetic is enabled.") +;; Yr constraint is meant to be used in noavx contexts only, for VEX and EVEX +;; the lower register numbers need the same instruction sizes as any other. +;; In case Yr constraint is misused, try to limit the damage, by treating +;; it as x constraint in avx mode, not v constraint. (define_register_constraint "Yr" - "TARGET_SSE ? (X86_TUNE_AVOID_4BYTE_PREFIXES ? NO_REX_SSE_REGS : ALL_SSE_REGS) : NO_REGS" + "TARGET_SSE ? ((TARGET_AVOID_4BYTE_PREFIXES && !TARGET_AVX) ? NO_REX_SSE_REGS : SSE_REGS) : NO_REGS" "@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.") ;; We use the B prefix to denote any number of internal operands: diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index b12146663e..a9d5135f67 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -637,33 +637,27 @@ const char *host_detect_local_cpu (int argc, const char **argv) } else if (vendor == signature_CENTAUR_ebx) { - if (arch) + processor = PROCESSOR_GENERIC; + + switch (family) { - switch (family) - { - case 6: - if (model > 9) - /* Use the default detection procedure. */ - processor = PROCESSOR_GENERIC; - else if (model == 9) - cpu = "c3-2"; - else if (model >= 6) - cpu = "c3"; - else - processor = PROCESSOR_GENERIC; - break; - case 5: - if (has_3dnow) - cpu = "winchip2"; - else if (has_mmx) - cpu = "winchip2-c6"; - else - processor = PROCESSOR_GENERIC; - break; - default: - /* We have no idea. */ - processor = PROCESSOR_GENERIC; - } + default: + /* We have no idea. */ + break; + + case 5: + if (has_3dnow || has_mmx) + processor = PROCESSOR_I486; + break; + + case 6: + if (model > 9 || has_longmode) + /* Use the default detection procedure. */ + ; + else if (model == 9) + processor = PROCESSOR_PENTIUMPRO; + else if (model >= 6) + processor = PROCESSOR_I486; } } else @@ -694,7 +688,18 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Default. */ break; case PROCESSOR_I486: - cpu = "i486"; + if (arch && vendor == signature_CENTAUR_ebx) + { + if (model >= 6) + cpu = "c3"; + else if (has_3dnow) + cpu = "winchip2"; + else + /* Assume WinChip C6. */ + cpu = "winchip-c6"; + } + else + cpu = "i486"; break; case PROCESSOR_PENTIUM: if (arch && has_mmx) @@ -817,8 +822,13 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* It is Pentium M. */ cpu = "pentium-m"; else if (has_sse) - /* It is Pentium III. */ - cpu = "pentium3"; + { + if (vendor == signature_CENTAUR_ebx) + cpu = "c3-2"; + else + /* It is Pentium III. */ + cpu = "pentium3"; + } else if (has_mmx) /* It is Pentium II. */ cpu = "pentium2"; @@ -902,6 +912,11 @@ const char *host_detect_local_cpu (int argc, const char **argv) else cpu = "prescott"; } + else if (has_longmode) + /* Perhaps some emulator? Assume x86-64, otherwise gcc + -march=native would be unusable for 64-bit compilations, + as all the CPUs below are 32-bit only. */ + cpu = "x86-64"; else if (has_sse2) cpu = "pentium4"; else if (has_cmov) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index b892f08679..86139db4ab 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -292,6 +292,7 @@ DEF_FUNCTION_TYPE (V8DF, V4DF) DEF_FUNCTION_TYPE (V8DF, V2DF) DEF_FUNCTION_TYPE (V16SI, V4SI) DEF_FUNCTION_TYPE (V16SI, V8SI) +DEF_FUNCTION_TYPE (V16SI, V16SF) DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, UHI) DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, UQI) DEF_FUNCTION_TYPE (V8DI, PV8DI) @@ -1035,14 +1036,17 @@ DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT) DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V8DF_FTYPE_V8DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V16SF_FTYPE_V16SF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V2DF_V2DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V4DF_V4DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V8DF_V8DF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SF, ROUND) DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SF, ROUND) +DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V16SF, ROUND) DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DF_V2DF, PTEST) DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DI_V2DI, PTEST) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3d044e8bd6..c1bdcc7ee2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18787,12 +18787,29 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) of the register, once we have that information we may be able to handle some of them more efficiently. */ if (can_create_pseudo_p () - && register_operand (op0, mode) && (CONSTANT_P (op1) || (SUBREG_P (op1) && CONSTANT_P (SUBREG_REG (op1)))) - && !standard_sse_constant_p (op1)) - op1 = validize_mem (force_const_mem (mode, op1)); + && ((register_operand (op0, mode) + && !standard_sse_constant_p (op1)) + /* ix86_expand_vector_move_misalign() does not like constants. */ + || (SSE_REG_MODE_P (mode) + && MEM_P (op0) + && MEM_ALIGN (op0) < align))) + { + if (SUBREG_P (op1)) + { + machine_mode imode = GET_MODE (SUBREG_REG (op1)); + rtx r = force_const_mem (imode, SUBREG_REG (op1)); + if (r) + r = validize_mem (r); + else + r = force_reg (imode, SUBREG_REG (op1)); + op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); + } + else + op1 = validize_mem (force_const_mem (mode, op1)); + } /* We need to check memory alignment for SSE mode since attribute can make operands unaligned. */ @@ -18803,13 +18820,8 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) { rtx tmp[2]; - /* ix86_expand_vector_move_misalign() does not like constants ... */ - if (CONSTANT_P (op1) - || (SUBREG_P (op1) - && CONSTANT_P (SUBREG_REG (op1)))) - op1 = validize_mem (force_const_mem (mode, op1)); - - /* ... nor both arguments in memory. */ + /* ix86_expand_vector_move_misalign() does not like both + arguments in memory. */ if (!register_operand (op0, mode) && !register_operand (op1, mode)) op1 = force_reg (mode, op1); @@ -18895,7 +18907,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) m = adjust_address (op0, mode, 0); emit_insn (extract (m, op1, const0_rtx)); m = adjust_address (op0, mode, 16); - emit_insn (extract (m, op1, const1_rtx)); + emit_insn (extract (m, copy_rtx (op1), const1_rtx)); } else emit_insn (store_unaligned (op0, op1)); @@ -19203,7 +19215,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) m = adjust_address (op0, V2SFmode, 0); emit_insn (gen_sse_storelps (m, op1)); m = adjust_address (op0, V2SFmode, 8); - emit_insn (gen_sse_storehps (m, op1)); + emit_insn (gen_sse_storehps (m, copy_rtx (op1))); } } } @@ -23027,17 +23039,33 @@ ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) switch (code) { case EQ: - return 0x08; + return 0x00; case NE: return 0x04; case GT: - return 0x16; + return 0x0e; case LE: - return 0x1a; + return 0x02; case GE: - return 0x15; + return 0x0d; case LT: - return 0x19; + return 0x01; + case UNLE: + return 0x0a; + case UNLT: + return 0x09; + case UNGE: + return 0x05; + case UNGT: + return 0x06; + case UNEQ: + return 0x18; + case LTGT: + return 0x0c; + case ORDERED: + return 0x07; + case UNORDERED: + return 0x03; default: gcc_unreachable (); } @@ -30686,7 +30714,7 @@ enum ix86_builtins IX86_BUILTIN_CVTPD2PS512, IX86_BUILTIN_CVTPD2UDQ512, IX86_BUILTIN_CVTPH2PS512, - IX86_BUILTIN_CVTPS2DQ512, + IX86_BUILTIN_CVTPS2DQ512_MASK, IX86_BUILTIN_CVTPS2PD512, IX86_BUILTIN_CVTPS2PH512, IX86_BUILTIN_CVTPS2UDQ512, @@ -32126,14 +32154,25 @@ enum ix86_builtins IX86_BUILTIN_COPYSIGNQ, /* Vectorizer support builtins. */ - IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, IX86_BUILTIN_CPYSGNPS, IX86_BUILTIN_CPYSGNPD, IX86_BUILTIN_CPYSGNPS256, IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD256, IX86_BUILTIN_CPYSGNPD512, + IX86_BUILTIN_FLOORPS512, + IX86_BUILTIN_FLOORPD512, + IX86_BUILTIN_CEILPS512, + IX86_BUILTIN_CEILPD512, + IX86_BUILTIN_TRUNCPS512, + IX86_BUILTIN_TRUNCPD512, + IX86_BUILTIN_CVTPS2DQ512, + IX86_BUILTIN_VEC_PACK_SFIX512, + IX86_BUILTIN_FLOORPS_SFIX512, IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, + IX86_BUILTIN_CEILPS_SFIX512, + IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, + IX86_BUILTIN_ROUNDPS_AZ_SFIX512, IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, @@ -32787,9 +32826,9 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID }, { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID }, { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT }, - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT }, + { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT }, { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT }, - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT }, + { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT }, /* FSGSBASE */ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, @@ -33724,12 +33763,12 @@ static const struct builtin_description bdesc_args[] = /* BMI */ { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, - { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, /* TBM */ { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, - { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_TBM | OPTION_MASK_ISA_64BIT, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, /* F16C */ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI }, @@ -33739,11 +33778,11 @@ static const struct builtin_description bdesc_args[] = /* BMI2 */ { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, - { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, - { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, - { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, /* AVX512F */ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI }, @@ -33948,6 +33987,17 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF }, { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, @@ -34864,7 +34914,7 @@ static const struct builtin_description bdesc_round_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT }, @@ -38441,10 +38491,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, { case V2DF_FTYPE_V2DF_ROUND: case V4DF_FTYPE_V4DF_ROUND: + case V8DF_FTYPE_V8DF_ROUND: case V4SF_FTYPE_V4SF_ROUND: case V8SF_FTYPE_V8SF_ROUND: + case V16SF_FTYPE_V16SF_ROUND: case V4SI_FTYPE_V4SF_ROUND: case V8SI_FTYPE_V8SF_ROUND: + case V16SI_FTYPE_V16SF_ROUND: return ix86_expand_sse_round (d, exp, target); case V4SI_FTYPE_V2DF_V2DF_ROUND: case V8SI_FTYPE_V4DF_V4DF_ROUND: @@ -38558,6 +38611,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SI_FTYPE_V8SI: case V16SF_FTYPE_V4SF: case V16SI_FTYPE_V4SI: + case V16SI_FTYPE_V16SF: case V16SF_FTYPE_V16SF: case V8DI_FTYPE_UQI: case V8DF_FTYPE_V4DF: @@ -39315,6 +39369,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case 5: pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, args[2].op, args[3].op, args[4].op); + break; case 6: pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, args[2].op, args[3].op, args[4].op, @@ -39689,6 +39744,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, case 5: pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op, args[4].op); + break; case 6: pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op, args[4].op, @@ -42263,6 +42319,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); } break; @@ -42288,6 +42346,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); } break; @@ -42300,6 +42360,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); else if (out_n == 8 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); + else if (out_n == 16 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); } if (out_mode == SImode && in_mode == SFmode) { @@ -42307,6 +42369,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); } break; @@ -42332,6 +42396,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); } break; @@ -42346,6 +42412,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_FLOORPD); else if (out_n == 4 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); } if (out_mode == SFmode && in_mode == SFmode) { @@ -42353,6 +42421,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_FLOORPS); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); } break; @@ -42367,6 +42437,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CEILPD); else if (out_n == 4 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_CEILPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_CEILPD512); } if (out_mode == SFmode && in_mode == SFmode) { @@ -42374,6 +42446,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CEILPS); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CEILPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CEILPS512); } break; @@ -42388,6 +42462,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); else if (out_n == 4 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); } if (out_mode == SFmode && in_mode == SFmode) { @@ -42395,6 +42471,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); } break; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d0b418b0fd..ec306f37d3 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -465,6 +465,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_SLOW_PSHUFB] #define TARGET_VECTOR_PARALLEL_EXECUTION \ ix86_tune_features[X86_TUNE_VECTOR_PARALLEL_EXECUTION] +#define TARGET_AVOID_4BYTE_PREFIXES \ + ix86_tune_features[X86_TUNE_AVOID_4BYTE_PREFIXES] #define TARGET_FUSE_CMP_AND_BRANCH_32 \ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_32] #define TARGET_FUSE_CMP_AND_BRANCH_64 \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index cf29e5d39d..7fbbea619e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3089,7 +3089,7 @@ (match_operand:TF 1 "general_operand" "C ,xm,x,*roF,*rC"))] "(TARGET_64BIT || TARGET_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && (!can_create_pseudo_p () + && (lra_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) || (optimize_function_for_size_p (cfun) @@ -3167,7 +3167,7 @@ (match_operand:XF 1 "general_operand" "fm,f,G,roF,r , *roF,*r,F ,C,roF,rF"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && (!can_create_pseudo_p () + && (lra_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) || (optimize_function_for_size_p (cfun) @@ -3240,7 +3240,7 @@ (match_operand:DF 1 "general_operand" "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r ,roF,rF,rmF,rC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && (!can_create_pseudo_p () + && (lra_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) || (optimize_function_for_size_p (cfun) @@ -3442,7 +3442,7 @@ (match_operand:SF 1 "general_operand" "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,Yj,r ,*y ,m ,*y,*Yn,r ,rmF,rF"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && (!can_create_pseudo_p () + && (lra_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) || (optimize_function_for_size_p (cfun) @@ -9332,7 +9332,7 @@ }) (define_split - [(set (match_operand:SF 0 "register_operand") + [(set (match_operand:SF 0 "general_reg_operand") (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand:V4SF 2)) (clobber (reg:CC FLAGS_REG))] @@ -9356,7 +9356,7 @@ }) (define_split - [(set (match_operand:DF 0 "register_operand") + [(set (match_operand:DF 0 "general_reg_operand") (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] @@ -9394,7 +9394,7 @@ }) (define_split - [(set (match_operand:XF 0 "register_operand") + [(set (match_operand:XF 0 "general_reg_operand") (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] @@ -11078,20 +11078,19 @@ (const_int 1)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (ior:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); - if (i >= 31) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_iordi3 (operands[0], operands[0], op1)); - DONE; }) (define_peephole2 @@ -11103,20 +11102,19 @@ (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (and:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode); - if (i >= 32) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_anddi3 (operands[0], operands[0], op1)); - DONE; }) (define_peephole2 @@ -11129,20 +11127,19 @@ (match_dup 0) (const_int 1) (match_dup 1)))) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (xor:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); - if (i >= 31) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_xordi3 (operands[0], operands[0], op1)); - DONE; }) (define_insn "*bt" @@ -11859,8 +11856,7 @@ "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) - && ! (GET_CODE (operands[4]) == CLOBBER - && reg_mentioned_p (operands[3], operands[4]))" + && ! reg_set_p (operands[3], operands[4])" [(parallel [(set (match_dup 5) (match_dup 0)) (match_dup 4)]) (set (strict_low_part (match_dup 6)) @@ -11904,8 +11900,7 @@ "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) - && ! (GET_CODE (operands[4]) == CLOBBER - && reg_mentioned_p (operands[3], operands[4]))" + && ! reg_set_p (operands[3], operands[4])" [(parallel [(set (match_dup 5) (match_dup 0)) (match_dup 4)]) (set (strict_low_part (match_dup 6)) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 14e80d9b48..93dda7bb0e 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -637,7 +637,7 @@ (op, mode == VOIDmode ? mode : Pmode)") (match_operand 0 "call_register_no_elim_operand") (ior (and (not (match_test "TARGET_X32")) - (match_operand 0 "sibcall_memory_operand")) + (match_operand 0 "memory_operand")) (and (match_test "TARGET_X32 && Pmode == DImode") (match_operand 0 "GOT_memory_operand"))))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1ffb3b9a86..42506efc52 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4488,7 +4488,7 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) -(define_insn "avx512f_fix_notruncv16sfv16si" +(define_insn "avx512f_fix_notruncv16sfv16si" [(set (match_operand:V16SI 0 "register_operand" "=v") (unspec:V16SI [(match_operand:V16SF 1 "" "")] @@ -5046,7 +5046,7 @@ (set_attr "ssememalign" "64") (set_attr "mode" "V2DF")]) -(define_insn "avx512f_cvtpd2dq512" +(define_insn "avx512f_cvtpd2dq512" [(set (match_operand:V8SI 0 "register_operand" "=v") (unspec:V8SI [(match_operand:V8DF 1 "" "")] @@ -6006,6 +6006,23 @@ DONE; }) +(define_expand "avx512f_vec_pack_sfix_v8df" + [(match_operand:V16SI 0 "register_operand") + (match_operand:V8DF 1 "nonimmediate_operand") + (match_operand:V8DF 2 "nonimmediate_operand")] + "TARGET_AVX512F" +{ + rtx r1, r2; + + r1 = gen_reg_rtx (V8SImode); + r2 = gen_reg_rtx (V8SImode); + + emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1])); + emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2])); + emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2)); + DONE; +}) + (define_expand "vec_pack_sfix_v4df" [(match_operand:V8SI 0 "register_operand") (match_operand:V4DF 1 "nonimmediate_operand") @@ -10566,22 +10583,23 @@ (set_attr "mode" "")]) (define_insn "3" - [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,x,v") (any_lshift:VI48_AVX2 - (match_operand:VI48_AVX2 1 "register_operand" "0,v") - (match_operand:SI 2 "nonmemory_operand" "xN,vN")))] + (match_operand:VI48_AVX2 1 "register_operand" "0,x,v") + (match_operand:SI 2 "nonmemory_operand" "xN,xN,vN")))] "TARGET_SSE2 && " "@ p\t{%2, %0|%0, %2} - vp\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + vp\t{%2, %1, %0|%0, %1, %2} + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sseishft") (set (attr "length_immediate") (if_then_else (match_operand 2 "const_int_operand") (const_string "1") (const_string "0"))) - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "")]) (define_insn "3" @@ -15246,13 +15264,25 @@ DONE; }) -(define_expand "avx512f_roundpd512" - [(match_operand:V8DF 0 "register_operand") - (match_operand:V8DF 1 "nonimmediate_operand") +(define_expand "avx512f_round512" + [(match_operand:VF_512 0 "register_operand") + (match_operand:VF_512 1 "nonimmediate_operand") (match_operand:SI 2 "const_0_to_15_operand")] "TARGET_AVX512F" { - emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2])); + emit_insn (gen_avx512f_rndscale (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "avx512f_roundps512_sfix" + [(match_operand:V16SI 0 "register_operand") + (match_operand:V16SF 1 "nonimmediate_operand") + (match_operand:SI 2 "const_0_to_15_operand")] + "TARGET_AVX512F" +{ + rtx tmp = gen_reg_rtx (V16SFmode); + emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2])); + emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp)); DONE; }) @@ -15352,7 +15382,7 @@ (define_expand "round2_sfix" [(match_operand: 0 "register_operand") - (match_operand:VF1_128_256 1 "register_operand")] + (match_operand:VF1 1 "register_operand")] "TARGET_ROUND && !flag_trapping_math" { rtx tmp = gen_reg_rtx (mode); diff --git a/gcc/config/microblaze/rtems.h b/gcc/config/microblaze/rtems.h index 68aa381b37..56f3f7087d 100644 --- a/gcc/config/microblaze/rtems.h +++ b/gcc/config/microblaze/rtems.h @@ -23,3 +23,10 @@ along with GCC; see the file COPYING3. If not see builtin_define( "__rtems__" ); \ builtin_assert( "system=rtems" ); \ } while (0) + +/* Redefine to include only items relevant for RTEMS */ +#undef LINK_SPEC +#define LINK_SPEC "%{shared:-shared} -N -relax \ + %{mbig-endian:-EB --oformat=elf32-microblaze} \ + %{mlittle-endian:-EL --oformat=elf32-microblazeel} \ + %{mxl-gp-opt:%{G*}} %{!mxl-gp-opt: -G 0}" diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index b088cf89bb..a6c90b633f 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -483,7 +483,7 @@ nvptx_strict_argument_naming (cumulative_args_t cum_v) static rtx nvptx_libcall_value (machine_mode mode, const_rtx) { - if (!cfun->machine->doing_call) + if (!cfun || !cfun->machine->doing_call) /* Pretend to return in a hard reg for early uses before pseudos can be generated. */ return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM); @@ -502,6 +502,7 @@ nvptx_function_value (const_tree type, const_tree ARG_UNUSED (func), if (outgoing) { + gcc_assert (cfun); cfun->machine->return_mode = mode; return gen_rtx_REG (mode, NVPTX_RETURN_REGNUM); } diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 8b1c8327c7..5fb1da78a3 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -4541,63 +4541,78 @@ hppa_profile_hook (int label_no) lcla2 and load_offset_label_address insn patterns. */ rtx reg = gen_reg_rtx (SImode); rtx_code_label *label_rtx = gen_label_rtx (); - rtx begin_label_rtx; + rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount")); + int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE); + rtx arg_bytes, begin_label_rtx; rtx_insn *call_insn; char begin_label_name[16]; + bool use_mcount_pcrel_call; + + /* If we can reach _mcount with a pc-relative call, we can optimize + loading the address of the current function. This requires linker + long branch stub support. */ + if (!TARGET_PORTABLE_RUNTIME + && !TARGET_LONG_CALLS + && (TARGET_SOM || flag_function_sections)) + use_mcount_pcrel_call = TRUE; + else + use_mcount_pcrel_call = FALSE; ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL, label_no); begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name)); - if (TARGET_64BIT) - emit_move_insn (arg_pointer_rtx, - gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, - GEN_INT (64))); - emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2)); - /* The address of the function is loaded into %r25 with an instruction- - relative sequence that avoids the use of relocations. The sequence - is split so that the load_offset_label_address instruction can - occupy the delay slot of the call to _mcount. */ - if (TARGET_PA_20) - emit_insn (gen_lcla2 (reg, label_rtx)); - else - emit_insn (gen_lcla1 (reg, label_rtx)); - - emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), - reg, begin_label_rtx, label_rtx)); - -#if !NO_DEFERRED_PROFILE_COUNTERS - { - rtx count_label_rtx, addr, r24; - char count_label_name[16]; + if (!use_mcount_pcrel_call) + { + /* The address of the function is loaded into %r25 with an instruction- + relative sequence that avoids the use of relocations. The sequence + is split so that the load_offset_label_address instruction can + occupy the delay slot of the call to _mcount. */ + if (TARGET_PA_20) + emit_insn (gen_lcla2 (reg, label_rtx)); + else + emit_insn (gen_lcla1 (reg, label_rtx)); - funcdef_nos.safe_push (label_no); - ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); - count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name)); + emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), + reg, + begin_label_rtx, + label_rtx)); + } - addr = force_reg (Pmode, count_label_rtx); - r24 = gen_rtx_REG (Pmode, 24); - emit_move_insn (r24, addr); + if (!NO_DEFERRED_PROFILE_COUNTERS) + { + rtx count_label_rtx, addr, r24; + char count_label_name[16]; - call_insn = - emit_call_insn (gen_call (gen_rtx_MEM (Pmode, - gen_rtx_SYMBOL_REF (Pmode, - "_mcount")), - GEN_INT (TARGET_64BIT ? 24 : 12))); + funcdef_nos.safe_push (label_no); + ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no); + count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, + ggc_strdup (count_label_name)); - use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); - } -#else + addr = force_reg (Pmode, count_label_rtx); + r24 = gen_rtx_REG (Pmode, 24); + emit_move_insn (r24, addr); - call_insn = - emit_call_insn (gen_call (gen_rtx_MEM (Pmode, - gen_rtx_SYMBOL_REF (Pmode, - "_mcount")), - GEN_INT (TARGET_64BIT ? 16 : 8))); + arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12); + if (use_mcount_pcrel_call) + call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, + begin_label_rtx)); + else + call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); -#endif + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24); + } + else + { + arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8); + if (use_mcount_pcrel_call) + call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes, + begin_label_rtx)); + else + call_insn = emit_call_insn (gen_call (mcount, arg_bytes)); + } use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25)); use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26)); @@ -4605,6 +4620,10 @@ hppa_profile_hook (int label_no) /* Indicate the _mcount call cannot throw, nor will it execute a non-local goto. */ make_reg_eh_region_note_nothrow_nononlocal (call_insn); + + /* Allocate space for fixed arguments. */ + if (reg_parm_stack_space > crtl->outgoing_args_size) + crtl->outgoing_args_size = reg_parm_stack_space; } /* Fetch the return address for the frame COUNT steps up from diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 75100d2d69..2c52465ca8 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -8216,6 +8216,170 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)] (symbol_ref "pa_attr_length_indirect_call (insn)")))]) +/* Expand special pc-relative call to _mcount. */ + +(define_expand "call_mcount" + [(parallel [(call (match_operand:SI 0 "" "") + (match_operand 1 "" "")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2))])] + "!TARGET_PORTABLE_RUNTIME" + " +{ + rtx op = XEXP (operands[0], 0); + rtx nb = operands[1]; + rtx lab = operands[2]; + + if (TARGET_64BIT) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_move_insn (arg_pointer_rtx, + gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx, + GEN_INT (64))); + emit_call_insn (gen_call_mcount_64bit (op, nb, lab, r4)); + } + else + { + if (flag_pic) + { + rtx r4 = gen_rtx_REG (word_mode, 4); + emit_call_insn (gen_call_mcount_pic (op, nb, lab, r4)); + } + else + emit_call_insn (gen_call_mcount_nonpic (op, nb, lab)); + } + + DONE; +}") + +(define_insn "call_mcount_nonpic" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + pa_output_arg_descriptor (insn); + return \"{bl|b,l} %0,%%r2\;ldo %2-.-4(%%r2),%%r25\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "call_mcount_pic" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (clobber (match_operand 3)) + (use (reg:SI 19))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "#") + +(define_split + [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (clobber (match_operand 3)) + (use (reg:SI 19))])] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed" + [(set (match_dup 3) (reg:SI 19)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_dup 2) + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (use (reg:SI 19))]) + (set (reg:SI 19) (match_dup 3))] + "") + +(define_insn "*call_mcount_pic_post_reload" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:SI 2)) + (use (reg:SI 19))] + "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT" + "* +{ + pa_output_arg_descriptor (insn); + return \"{bl|b,l} %0,%%r2\;ldo %2-.-4(%%r2),%%r25\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "call_mcount_64bit" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (clobber (match_operand 3)) + (use (reg:DI 27)) + (use (reg:DI 29))] + "TARGET_64BIT" + "#") + +(define_split + [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (clobber (match_operand 3)) + (use (reg:DI 27)) + (use (reg:DI 29))])] + "TARGET_64BIT && reload_completed" + [(set (match_dup 3) (reg:DI 27)) + (parallel [(call (mem:SI (match_dup 0)) + (match_dup 1)) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_dup 2) + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29))]) + (set (reg:DI 27) (match_dup 3))] + "") + +(define_insn "*call_mcount_64bit_post_reload" + [(call (mem:SI (match_operand 0 "call_operand_address" "")) + (match_operand 1 "" "i")) + (set (reg:SI 25) + (plus:SI (reg:SI 2) + (minus:SI (match_operand 2 "" "") + (plus:SI (pc) (const_int 4))))) + (clobber (reg:DI 2)) + (use (reg:DI 27)) + (use (reg:DI 29))] + "TARGET_64BIT" + "{bl|b,l} %0,%%r2\;ldo %2-.-4(%%r2),%%r25" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + ;; Call subroutine returning any type. (define_expand "untyped_call" diff --git a/gcc/config/rl78/rl78-expand.md b/gcc/config/rl78/rl78-expand.md index 331eec1e90..4fd195865a 100644 --- a/gcc/config/rl78/rl78-expand.md +++ b/gcc/config/rl78/rl78-expand.md @@ -159,7 +159,7 @@ [(set (match_operand:HI 0 "register_operand") (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand")) (zero_extend:HI (match_operand:QI 2 "register_operand"))))] - "!TARGET_G10" + "" "" ) diff --git a/gcc/config/rl78/rl78-real.md b/gcc/config/rl78/rl78-real.md index aacaefff9e..530b2fe90f 100644 --- a/gcc/config/rl78/rl78-real.md +++ b/gcc/config/rl78/rl78-real.md @@ -179,7 +179,7 @@ [(set (match_operand:HI 0 "register_operand" "=A,A") (mult:HI (match_operand:HI 1 "rl78_nonfar_operand" "0,0") (match_operand:HI 2 "rl78_24_operand" "N,i")))] - "rl78_real_insns_ok () && !TARGET_G10" + "rl78_real_insns_ok ()" "@ shlw\t%0, 1 shlw\t%0, 2" @@ -189,7 +189,7 @@ [(set (match_operand:HI 0 "nonimmediate_operand" "=A") (mult:HI (zero_extend:HI (match_operand:QI 1 "general_operand" "%a")) (zero_extend:HI (match_operand:QI 2 "general_operand" "x"))))] - "rl78_real_insns_ok () && !TARGET_G10" + "rl78_real_insns_ok ()" "mulu\t%2" ) diff --git a/gcc/config/rl78/rl78-virt.md b/gcc/config/rl78/rl78-virt.md index e2e7f4750e..8d1b2a8731 100644 --- a/gcc/config/rl78/rl78-virt.md +++ b/gcc/config/rl78/rl78-virt.md @@ -116,7 +116,7 @@ [(set (match_operand:HI 0 "register_operand" "=v") (mult:HI (match_operand:HI 1 "rl78_nonfar_operand" "%vim") (match_operand:HI 2 "rl78_24_operand" "Ni")))] - "rl78_virt_insns_ok () && !TARGET_G10" + "rl78_virt_insns_ok ()" "v.mulu\t%0, %1, %2" [(set_attr "valloc" "umul")] ) @@ -125,7 +125,7 @@ [(set (match_operand:HI 0 "register_operand" "=v") (mult:HI (zero_extend:HI (match_operand:QI 1 "rl78_nonfar_operand" "%vim")) (zero_extend:HI (match_operand:QI 2 "general_operand" "vim"))))] - "rl78_virt_insns_ok () && !TARGET_G10" + "rl78_virt_insns_ok ()" "v.mulu\t%0, %2" [(set_attr "valloc" "umul")] ) diff --git a/gcc/config/rs6000/40x.md b/gcc/config/rs6000/40x.md index 91e5cffaa3..98d9ae02ba 100644 --- a/gcc/config/rs6000/40x.md +++ b/gcc/config/rs6000/40x.md @@ -119,6 +119,6 @@ "bpu_40x") (define_insn_reservation "ppc405-float" 11 - (and (eq_attr "type" "fpload,fpstore,fpcompare,fp,dmul,sdiv,ddiv") + (and (eq_attr "type" "fpload,fpstore,fpcompare,fp,fpsimple,dmul,sdiv,ddiv") (eq_attr "cpu" "ppc405")) "fpu_405*10") diff --git a/gcc/config/rs6000/440.md b/gcc/config/rs6000/440.md index 6d07ef3ea3..c33f4accb0 100644 --- a/gcc/config/rs6000/440.md +++ b/gcc/config/rs6000/440.md @@ -107,7 +107,7 @@ "ppc440_issue,ppc440_f_pipe+ppc440_i_pipe") (define_insn_reservation "ppc440-fp" 5 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul") (eq_attr "cpu" "ppc440")) "ppc440_issue,ppc440_f_pipe") diff --git a/gcc/config/rs6000/476.md b/gcc/config/rs6000/476.md index 8c266b992d..4cae8fcc9e 100644 --- a/gcc/config/rs6000/476.md +++ b/gcc/config/rs6000/476.md @@ -124,7 +124,7 @@ ppc476_f_pipe+ppc476_i_pipe") (define_insn_reservation "ppc476-fp" 6 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul") (eq_attr "cpu" "ppc476")) "ppc476_issue_fp,\ ppc476_f_pipe") diff --git a/gcc/config/rs6000/601.md b/gcc/config/rs6000/601.md index e34c9bf20f..aa869d86d8 100644 --- a/gcc/config/rs6000/601.md +++ b/gcc/config/rs6000/601.md @@ -86,7 +86,7 @@ "(fpu_ppc601+iu_ppc601*2),nothing*2,bpu_ppc601") (define_insn_reservation "ppc601-fp" 4 - (and (eq_attr "type" "fp") + (and (eq_attr "type" "fp,fpsimple") (eq_attr "cpu" "ppc601")) "fpu_ppc601") diff --git a/gcc/config/rs6000/603.md b/gcc/config/rs6000/603.md index 3b07461bf0..052c1c1c95 100644 --- a/gcc/config/rs6000/603.md +++ b/gcc/config/rs6000/603.md @@ -105,7 +105,7 @@ "(fpu_603+iu_603*2),bpu_603") (define_insn_reservation "ppc603-fp" 3 - (and (eq_attr "type" "fp") + (and (eq_attr "type" "fp,fpsimple") (eq_attr "cpu" "ppc603")) "fpu_603") diff --git a/gcc/config/rs6000/6xx.md b/gcc/config/rs6000/6xx.md index 29893aeeef..3ab80a2b26 100644 --- a/gcc/config/rs6000/6xx.md +++ b/gcc/config/rs6000/6xx.md @@ -160,7 +160,7 @@ "fpu_6xx") (define_insn_reservation "ppc604-fp" 3 - (and (eq_attr "type" "fp") + (and (eq_attr "type" "fp,fpsimple") (eq_attr "cpu" "ppc604,ppc604e,ppc620")) "fpu_6xx") diff --git a/gcc/config/rs6000/7450.md b/gcc/config/rs6000/7450.md index 8146369399..0ebf6fa0cd 100644 --- a/gcc/config/rs6000/7450.md +++ b/gcc/config/rs6000/7450.md @@ -120,7 +120,7 @@ "ppc7450_du,fpu_7450") (define_insn_reservation "ppc7450-fp" 5 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul") (eq_attr "cpu" "ppc7450")) "ppc7450_du,fpu_7450") @@ -162,7 +162,7 @@ ;; Altivec (define_insn_reservation "ppc7450-vecsimple" 1 - (and (eq_attr "type" "vecsimple") + (and (eq_attr "type" "vecsimple,veclogical,vecmove") (eq_attr "cpu" "ppc7450")) "ppc7450_du,ppc7450_vec_du,vecsmpl_7450") @@ -172,7 +172,7 @@ "ppc7450_du,ppc7450_vec_du,veccmplx_7450") (define_insn_reservation "ppc7450-veccmp" 2 - (and (eq_attr "type" "veccmp") + (and (eq_attr "type" "veccmp,veccmpfx") (eq_attr "cpu" "ppc7450")) "ppc7450_du,ppc7450_vec_du,veccmplx_7450") diff --git a/gcc/config/rs6000/7xx.md b/gcc/config/rs6000/7xx.md index 1da48b77fd..70e2eb17f1 100644 --- a/gcc/config/rs6000/7xx.md +++ b/gcc/config/rs6000/7xx.md @@ -113,7 +113,7 @@ "ppc750_du,fpu_7xx") (define_insn_reservation "ppc750-fp" 3 - (and (eq_attr "type" "fp") + (and (eq_attr "type" "fp,fpsimple") (eq_attr "cpu" "ppc750,ppc7400")) "ppc750_du,fpu_7xx") @@ -165,7 +165,7 @@ ;; Altivec (define_insn_reservation "ppc7400-vecsimple" 1 - (and (eq_attr "type" "vecsimple,veccmp") + (and (eq_attr "type" "vecsimple,veclogical,vecmove,veccmp,veccmpfx") (eq_attr "cpu" "ppc7400")) "ppc750_du,ppc7400_vec_du,veccmplx_7xx") diff --git a/gcc/config/rs6000/8540.md b/gcc/config/rs6000/8540.md index ae4e45f89b..f39f1f6751 100644 --- a/gcc/config/rs6000/8540.md +++ b/gcc/config/rs6000/8540.md @@ -190,7 +190,7 @@ ;; Simple vector (define_insn_reservation "ppc8540_simple_vector" 1 - (and (eq_attr "type" "vecsimple") + (and (eq_attr "type" "vecsimple,veclogical,vecmove") (eq_attr "cpu" "ppc8540,ppc8548")) "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire") @@ -202,7 +202,7 @@ ;; Vector compare (define_insn_reservation "ppc8540_vector_compare" 1 - (and (eq_attr "type" "veccmp") + (and (eq_attr "type" "veccmp,veccmpfx") (eq_attr "cpu" "ppc8540,ppc8548")) "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire") diff --git a/gcc/config/rs6000/a2.md b/gcc/config/rs6000/a2.md index 1fcf1cfb20..e0b800ce61 100644 --- a/gcc/config/rs6000/a2.md +++ b/gcc/config/rs6000/a2.md @@ -81,7 +81,7 @@ ;; D.8.1 (define_insn_reservation "ppca2-fp" 6 - (and (eq_attr "type" "fp") ;; Ignore fpsimple insn types (SPE only). + (and (eq_attr "type" "fp,fpsimple") (eq_attr "cpu" "ppca2")) "axu") diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index ea6af8d192..f77d446646 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -327,8 +327,8 @@ #define vec_sqrt __builtin_vec_sqrt #define vec_vsx_ld __builtin_vec_vsx_ld #define vec_vsx_st __builtin_vec_vsx_st -#define vec_xl __builtin_vec_vsx_ld -#define vec_xst __builtin_vec_vsx_st +#define vec_xl __builtin_vec_xl +#define vec_xst __builtin_vec_xst /* Note, xxsldi and xxpermdi were added as __builtin_vsx_ functions instead of __builtin_vec_ */ @@ -384,6 +384,31 @@ #define vec_vupklsw __builtin_vec_vupklsw #endif +#ifdef _ARCH_PWR9 +/* Vector additions added in ISA 3.0. */ +#define vec_vctz __builtin_vec_vctz +#define vec_cntlz __builtin_vec_vctz +#define vec_vctzb __builtin_vec_vctzb +#define vec_vctzd __builtin_vec_vctzd +#define vec_vctzh __builtin_vec_vctzh +#define vec_vctzw __builtin_vec_vctzw +#define vec_vprtyb __builtin_vec_vprtyb +#define vec_vprtybd __builtin_vec_vprtybd +#define vec_vprtybw __builtin_vec_vprtybw + +#ifdef _ARCH_PPC64 +#define vec_vprtybq __builtin_vec_vprtybq +#endif + +#define vec_slv __builtin_vec_vslv +#define vec_srv __builtin_vec_vsrv + +#define vec_absd __builtin_vec_vadu +#define vec_absdb __builtin_vec_vadub +#define vec_absdh __builtin_vec_vaduh +#define vec_absdw __builtin_vec_vaduw +#endif + /* Predicates. For C++, we use templates in order to allow non-parenthesized arguments. For C, instead, we use macros since non-parenthesized arguments were diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 9c3084dcb8..362fa221c1 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -58,6 +58,7 @@ UNSPEC_VSUM2SWS UNSPEC_VSUMSWS UNSPEC_VPERM + UNSPEC_VPERMR UNSPEC_VPERM_UNS UNSPEC_VRFIN UNSPEC_VCFUX @@ -73,6 +74,9 @@ UNSPEC_VUNPACK_LO_SIGN_DIRECT UNSPEC_VUPKHPX UNSPEC_VUPKLPX + UNSPEC_DARN + UNSPEC_DARN_32 + UNSPEC_DARN_RAW UNSPEC_DST UNSPEC_DSTT UNSPEC_DSTST @@ -110,6 +114,9 @@ UNSPEC_STVLXL UNSPEC_STVRX UNSPEC_STVRXL + UNSPEC_VSLV + UNSPEC_VSRV + UNSPEC_VADU UNSPEC_VMULWHUB UNSPEC_VMULWLUB UNSPEC_VMULWHSB @@ -189,6 +196,13 @@ (KF "FLOAT128_VECTOR_P (KFmode)") (TF "FLOAT128_VECTOR_P (TFmode)")]) +;; Specific iterator for parity which does not have a byte/half-word form, but +;; does have a quad word form +(define_mode_iterator VParity [V4SI + V2DI + V1TI + (TI "TARGET_VSX_TIMODE")]) + (define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) (define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") @@ -203,6 +217,9 @@ (define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")]) (define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")]) +;; Vector negate +(define_mode_iterator VNEG [V4SI V2DI]) + ;; Vector move instructions. (define_insn "*altivec_mov" [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v,*r") @@ -225,7 +242,7 @@ default: gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*,*") + [(set_attr "type" "vecstore,vecload,veclogical,store,load,*,veclogical,*,*") (set_attr "length" "4,4,4,20,20,20,4,8,32")]) ;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode @@ -251,7 +268,7 @@ default: gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")]) + [(set_attr "type" "vecstore,vecload,veclogical,store,load,*,veclogical,*")]) ;; Load up a vector with the most significant bit set by loading up -1 and ;; doing a shift left @@ -586,7 +603,7 @@ (match_operand:VI2 2 "altivec_register_operand" "v")))] "" "vcmpequ %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_gt" [(set (match_operand:VI2 0 "altivec_register_operand" "=v") @@ -594,7 +611,7 @@ (match_operand:VI2 2 "altivec_register_operand" "v")))] "" "vcmpgts %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_gtu" [(set (match_operand:VI2 0 "altivec_register_operand" "=v") @@ -602,7 +619,7 @@ (match_operand:VI2 2 "altivec_register_operand" "v")))] "" "vcmpgtu %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_eqv4sf" [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") @@ -637,7 +654,7 @@ (match_operand:VM 3 "altivec_register_operand" "v")))] "VECTOR_MEM_ALTIVEC_P (mode)" "vsel %0,%3,%2,%1" - [(set_attr "type" "vecperm")]) + [(set_attr "type" "vecmove")]) (define_insn "*altivec_vsel_uns" [(set (match_operand:VM 0 "altivec_register_operand" "=v") @@ -648,7 +665,7 @@ (match_operand:VM 3 "altivec_register_operand" "v")))] "VECTOR_MEM_ALTIVEC_P (mode)" "vsel %0,%3,%2,%1" - [(set_attr "type" "vecperm")]) + [(set_attr "type" "vecmove")]) ;; Fused multiply add. @@ -1617,6 +1634,24 @@ "vslo %0,%1,%2" [(set_attr "type" "vecperm")]) +(define_insn "vslv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSLV))] + "TARGET_P9_VECTOR" + "vslv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +(define_insn "vsrv" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")] + UNSPEC_VSRV))] + "TARGET_P9_VECTOR" + "vsrv %0,%1,%2" + [(set_attr "type" "vecsimple")]) + (define_insn "*altivec_vsl" [(set (match_operand:VI2 0 "register_operand" "=v") (ashift:VI2 (match_operand:VI2 1 "register_operand" "v") @@ -1949,32 +1984,30 @@ ;; Slightly prefer vperm, since the target does not overlap the source (define_insn "*altivec_vperm__internal" - [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo") - (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo") - (match_operand:VM 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x1,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_insn "altivec_vperm_v8hiv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo") - (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo") - (match_operand:V8HI 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:V16QI 0 "register_operand" "=v,?wo") + (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,wo") + (match_operand:V8HI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x1,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_expand "altivec_vperm__uns" [(set (match_operand:VM 0 "register_operand" "") @@ -1992,18 +2025,17 @@ }) (define_insn "*altivec_vperm__uns_internal" - [(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo") - (unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo") - (match_operand:VM 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x1,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_expand "vec_permv16qi" [(set (match_operand:V16QI 0 "register_operand" "") @@ -2032,6 +2064,19 @@ FAIL; }) +(define_insn "*altivec_vpermr__internal" + [(set (match_operand:VM 0 "register_operand" "=v,?wo") + (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") + (match_operand:VM 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] + UNSPEC_VPERMR))] + "TARGET_P9_VECTOR" + "@ + vpermr %0,%2,%1,%3 + xxpermr %x0,%x1,%x3" + [(set_attr "type" "vecperm") + (set_attr "length" "4")]) + (define_insn "altivec_vrfip" ; ceil [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] @@ -2238,7 +2283,7 @@ (match_dup 2)))] "" "vcmpequ. %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_vcmpgts_p" [(set (reg:CC 74) @@ -2250,7 +2295,7 @@ (match_dup 2)))] "" "vcmpgts. %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_vcmpgtu_p" [(set (reg:CC 74) @@ -2262,7 +2307,7 @@ (match_dup 2)))] "" "vcmpgtu. %0,%1,%2" - [(set_attr "type" "veccmp")]) + [(set_attr "type" "veccmpfx")]) (define_insn "*altivec_vcmpeqfp_p" [(set (reg:CC 74) @@ -2690,20 +2735,28 @@ DONE; }) +(define_insn "*p9_neg2" + [(set (match_operand:VNEG 0 "altivec_register_operand" "=v") + (neg:VNEG (match_operand:VNEG 1 "altivec_register_operand" "v")))] + "TARGET_P9_VECTOR" + "vneg %0,%1" + [(set_attr "type" "vecsimple")]) + (define_expand "neg2" - [(use (match_operand:VI 0 "register_operand" "")) - (use (match_operand:VI 1 "register_operand" ""))] - "TARGET_ALTIVEC" - " + [(set (match_operand:VI2 0 "register_operand" "") + (neg:VI2 (match_operand:VI2 1 "register_operand" "")))] + "" { - rtx vzero; + if (!TARGET_P9_VECTOR || (mode != V4SImode && mode != V2DImode)) + { + rtx vzero; - vzero = gen_reg_rtx (GET_MODE (operands[0])); - emit_insn (gen_altivec_vspltis (vzero, const0_rtx)); - emit_insn (gen_sub3 (operands[0], vzero, operands[1])); - - DONE; -}") + vzero = gen_reg_rtx (GET_MODE (operands[0])); + emit_move_insn (vzero, CONST0_RTX (mode)); + emit_insn (gen_sub3 (operands[0], vzero, operands[1])); + DONE; + } +}) (define_expand "udot_prod" [(set (match_operand:V4SI 0 "register_operand" "=v") @@ -2791,32 +2844,30 @@ "") (define_insn "vperm_v8hiv4si" - [(set (match_operand:V4SI 0 "register_operand" "=v,?wo,?&wo") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0,wo") - (match_operand:V4SI 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:V4SI 0 "register_operand" "=v,?wo") + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,wo") + (match_operand:V4SI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERMSI))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x1,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_insn "vperm_v16qiv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=v,?wo,?&wo") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0,wo") - (match_operand:V8HI 2 "register_operand" "v,wo,wo") - (match_operand:V16QI 3 "register_operand" "v,wo,wo")] + [(set (match_operand:V8HI 0 "register_operand" "=v,?wo") + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,wo") + (match_operand:V8HI 2 "register_operand" "v,0") + (match_operand:V16QI 3 "register_operand" "v,wo")] UNSPEC_VPERMHI))] "TARGET_ALTIVEC" "@ vperm %0,%1,%2,%3 - xxperm %x0,%x2,%x3 - xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3" + xxperm %x0,%x1,%x3" [(set_attr "type" "vecperm") - (set_attr "length" "4,4,8")]) + (set_attr "length" "4")]) (define_expand "vec_unpacku_hi_v16qi" @@ -3353,7 +3404,7 @@ }") -;; Power8 vector instructions encoded as Altivec instructions +;; Power8/power9 vector instructions encoded as Altivec instructions ;; Vector count leading zeros (define_insn "*p8v_clz2" @@ -3364,6 +3415,33 @@ [(set_attr "length" "4") (set_attr "type" "vecsimple")]) +;; Vector absolute difference unsigned +(define_expand "vadu3" + [(set (match_operand:VI 0 "register_operand") + (unspec:VI [(match_operand:VI 1 "register_operand") + (match_operand:VI 2 "register_operand")] + UNSPEC_VADU))] + "TARGET_P9_VECTOR") + +;; Vector absolute difference unsigned +(define_insn "*p9_vadu3" + [(set (match_operand:VI 0 "register_operand" "=v") + (unspec:VI [(match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")] + UNSPEC_VADU))] + "TARGET_P9_VECTOR" + "vabsdu %0,%1,%2" + [(set_attr "type" "vecsimple")]) + +;; Vector count trailing zeros +(define_insn "*p9v_ctz2" + [(set (match_operand:VI2 0 "register_operand" "=v") + (ctz:VI2 (match_operand:VI2 1 "register_operand" "v")))] + "TARGET_P9_VECTOR" + "vctz %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + ;; Vector population count (define_insn "*p8v_popcount2" [(set (match_operand:VI2 0 "register_operand" "=v") @@ -3373,6 +3451,15 @@ [(set_attr "length" "4") (set_attr "type" "vecsimple")]) +;; Vector parity +(define_insn "*p9v_parity2" + [(set (match_operand:VParity 0 "register_operand" "=v") + (parity:VParity (match_operand:VParity 1 "register_operand" "v")))] + "TARGET_P9_VECTOR" + "vprtyb %0,%1" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + ;; Vector Gather Bits by Bytes by Doubleword (define_insn "p8v_vgbbd" [(set (match_operand:V16QI 0 "register_operand" "=v") @@ -3540,6 +3627,27 @@ [(set_attr "length" "4") (set_attr "type" "vecsimple")]) +(define_insn "darn_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_DARN_32))] + "TARGET_P9_MISC" + "darn %0,0" + [(set_attr "type" "integer")]) + +(define_insn "darn_raw" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,2" + [(set_attr "type" "integer")]) + +(define_insn "darn" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_DARN))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,1" + [(set_attr "type" "integer")]) + (define_expand "bcd_" [(parallel [(set (reg:CCFP 74) (compare:CCFP diff --git a/gcc/config/rs6000/cell.md b/gcc/config/rs6000/cell.md index b780f09efe..7eee77cd5f 100644 --- a/gcc/config/rs6000/cell.md +++ b/gcc/config/rs6000/cell.md @@ -306,7 +306,7 @@ ; Basic FP latency is 10 cycles, thoughput is 1/cycle (define_insn_reservation "cell-fp" 10 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul") (eq_attr "cpu" "cell")) "slot01,vsu1_cell,vsu1_cell*8") @@ -329,7 +329,7 @@ ; VMX (define_insn_reservation "cell-vecsimple" 4 - (and (eq_attr "type" "vecsimple") + (and (eq_attr "type" "vecsimple,veclogical,vecmove") (eq_attr "cpu" "cell")) "slot01,vsu1_cell,vsu1_cell*2") @@ -341,7 +341,7 @@ ;; TODO: add support for recording instructions (define_insn_reservation "cell-veccmp" 4 - (and (eq_attr "type" "veccmp") + (and (eq_attr "type" "veccmp,veccmpfx") (eq_attr "cpu" "cell")) "slot01,vsu1_cell,vsu1_cell*2") diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index ea15764e51..465ad6d921 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -140,6 +140,10 @@ (and (match_code "const_int") (match_test "TARGET_VSX && (ival == VECTOR_ELEMENT_SCALAR_64BIT)"))) +(define_constraint "wE" + "Vector constant that can be loaded with the XXSPLTIB instruction." + (match_test "xxspltib_constant_nosplit (op, mode)")) + ;; Extended fusion store (define_memory_constraint "wF" "Memory operand suitable for power9 fusion load/stores" @@ -156,11 +160,35 @@ (and (match_test "TARGET_DIRECT_MOVE_128") (match_test "(ival == VECTOR_ELEMENT_MFVSRLD_64BIT)")))) +;; Generate the XXORC instruction to set a register to all 1's +(define_constraint "wM" + "Match vector constant with all 1's if the XXLORC instruction is available" + (and (match_test "TARGET_P8_VECTOR") + (match_operand 0 "all_ones_constant"))) + +;; ISA 3.0 vector d-form addresses +(define_memory_constraint "wO" + "Memory operand suitable for the ISA 3.0 vector d-form instructions." + (match_operand 0 "vsx_quad_dform_memory_operand")) + ;; Lq/stq validates the address for load/store quad (define_memory_constraint "wQ" "Memory operand suitable for the load/store quad instructions" (match_operand 0 "quad_memory_operand")) +(define_constraint "wS" + "Vector constant that can be loaded with XXSPLTIB & sign extension." + (match_test "xxspltib_constant_split (op, mode)")) + +;; ISA 3.0 DS-form instruction that has the bottom 2 bits 0 and no update form. +;; Used by LXSD/STXSD/LXSSP/STXSSP. In contrast to "Y", the multiple-of-four +;; offset is enforced for 32-bit too. +(define_memory_constraint "wY" + "Offsettable memory operand, with bottom 2 bits 0" + (and (match_code "mem") + (not (match_test "update_address_mem (op, mode)")) + (match_test "mem_operand_ds_form (op, mode)"))) + ;; Altivec style load/store that ignores the bottom bits of the address (define_memory_constraint "wZ" "Indexed or indirect memory operand, ignoring the bottom 4 bits" diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md index 5957abb8f5..83a26aef36 100644 --- a/gcc/config/rs6000/crypto.md +++ b/gcc/config/rs6000/crypto.md @@ -107,4 +107,4 @@ UNSPEC_VSHASIGMA))] "TARGET_CRYPTO" "vshasigma %0,%1,%2,%3" - [(set_attr "type" "crypto")]) + [(set_attr "type" "vecsimple")]) diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index a631ff5fd9..e6ed70ed8e 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -58,7 +58,7 @@ (float_extend:DD (match_operand:SD 1 "gpc_reg_operand" "f")))] "TARGET_DFP" "dctdp %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_expand "extendsdtd2" [(set (match_operand:TD 0 "gpc_reg_operand" "=d") @@ -76,7 +76,7 @@ (float_truncate:SD (match_operand:DD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "drsp %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_expand "negdd2" [(set (match_operand:DD 0 "gpc_reg_operand" "") @@ -89,7 +89,7 @@ (neg:DD (match_operand:DD 1 "gpc_reg_operand" "d")))] "TARGET_HARD_FLOAT && TARGET_FPRS" "fneg %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "fpsimple")]) (define_expand "absdd2" [(set (match_operand:DD 0 "gpc_reg_operand" "") @@ -102,14 +102,14 @@ (abs:DD (match_operand:DD 1 "gpc_reg_operand" "d")))] "TARGET_HARD_FLOAT && TARGET_FPRS" "fabs %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "fpsimple")]) (define_insn "*nabsdd2_fpr" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") (neg:DD (abs:DD (match_operand:DD 1 "gpc_reg_operand" "d"))))] "TARGET_HARD_FLOAT && TARGET_FPRS" "fnabs %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "fpsimple")]) (define_expand "negtd2" [(set (match_operand:TD 0 "gpc_reg_operand" "") @@ -124,7 +124,7 @@ "@ fneg %0,%1 fneg %0,%1\;fmr %L0,%L1" - [(set_attr "type" "fp") + [(set_attr "type" "fpsimple") (set_attr "length" "4,8")]) (define_expand "abstd2" @@ -140,7 +140,7 @@ "@ fabs %0,%1 fabs %0,%1\;fmr %L0,%L1" - [(set_attr "type" "fp") + [(set_attr "type" "fpsimple") (set_attr "length" "4,8")]) (define_insn "*nabstd2_fpr" @@ -150,7 +150,7 @@ "@ fnabs %0,%1 fnabs %0,%1\;fmr %L0,%L1" - [(set_attr "type" "fp") + [(set_attr "type" "fpsimple") (set_attr "length" "4,8")]) ;; Hardware support for decimal floating point operations. @@ -160,7 +160,7 @@ (float_extend:TD (match_operand:DD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "dctqpq %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) ;; The result of drdpq is an even/odd register pair with the converted ;; value in the even register and zero in the odd register. @@ -173,7 +173,7 @@ (clobber (match_scratch:TD 2 "=d"))] "TARGET_DFP" "drdpq %2,%1\;fmr %0,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "adddd3" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") @@ -181,7 +181,7 @@ (match_operand:DD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dadd %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "addtd3" [(set (match_operand:TD 0 "gpc_reg_operand" "=d") @@ -189,7 +189,7 @@ (match_operand:TD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "daddq %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "subdd3" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") @@ -197,7 +197,7 @@ (match_operand:DD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dsub %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "subtd3" [(set (match_operand:TD 0 "gpc_reg_operand" "=d") @@ -205,7 +205,7 @@ (match_operand:TD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dsubq %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "muldd3" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") @@ -213,7 +213,7 @@ (match_operand:DD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dmul %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "multd3" [(set (match_operand:TD 0 "gpc_reg_operand" "=d") @@ -221,7 +221,7 @@ (match_operand:TD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dmulq %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "divdd3" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") @@ -229,7 +229,7 @@ (match_operand:DD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "ddiv %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "divtd3" [(set (match_operand:TD 0 "gpc_reg_operand" "=d") @@ -237,7 +237,7 @@ (match_operand:TD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "ddivq %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "*cmpdd_internal1" [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") @@ -245,7 +245,7 @@ (match_operand:DD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dcmpu %0,%1,%2" - [(set_attr "type" "fpcompare")]) + [(set_attr "type" "dfp")]) (define_insn "*cmptd_internal1" [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") @@ -253,21 +253,21 @@ (match_operand:TD 2 "gpc_reg_operand" "d")))] "TARGET_DFP" "dcmpuq %0,%1,%2" - [(set_attr "type" "fpcompare")]) + [(set_attr "type" "dfp")]) (define_insn "floatdidd2" [(set (match_operand:DD 0 "gpc_reg_operand" "=d") (float:DD (match_operand:DI 1 "gpc_reg_operand" "d")))] "TARGET_DFP && TARGET_POPCNTD" "dcffix %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "floatditd2" [(set (match_operand:TD 0 "gpc_reg_operand" "=d") (float:TD (match_operand:DI 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "dcffixq %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) ;; Convert a decimal64 to a decimal64 whose value is an integer. ;; This is the first stage of converting it to an integer type. @@ -277,7 +277,7 @@ (fix:DD (match_operand:DD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "drintn. 0,%0,%1,1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) ;; Convert a decimal64 whose value is an integer to an actual integer. ;; This is the second stage of converting decimal float to integer type. @@ -287,7 +287,7 @@ (fix:DI (match_operand:DD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "dctfix %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) ;; Convert a decimal128 to a decimal128 whose value is an integer. ;; This is the first stage of converting it to an integer type. @@ -297,7 +297,7 @@ (fix:TD (match_operand:TD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "drintnq. 0,%0,%1,1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) ;; Convert a decimal128 whose value is an integer to an actual integer. ;; This is the second stage of converting decimal float to integer type. @@ -307,7 +307,7 @@ (fix:DI (match_operand:TD 1 "gpc_reg_operand" "d")))] "TARGET_DFP" "dctfixq %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) ;; Decimal builtin support @@ -318,8 +318,11 @@ UNSPEC_DXEX UNSPEC_DIEX UNSPEC_DSCLI + UNSPEC_DTSTSFI UNSPEC_DSCRI]) +(define_code_iterator DFP_TEST [eq lt gt unordered]) + (define_mode_iterator D64_D128 [DD TD]) (define_mode_attr dfp_suffix [(DD "") @@ -332,7 +335,7 @@ UNSPEC_DDEDPD))] "TARGET_DFP" "ddedpd %1,%0,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "dfp_denbcd_" [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") @@ -341,7 +344,7 @@ UNSPEC_DENBCD))] "TARGET_DFP" "denbcd %1,%0,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "dfp_dxex_" [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") @@ -349,7 +352,7 @@ UNSPEC_DXEX))] "TARGET_DFP" "dxex %0,%1" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "dfp_diex_" [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") @@ -358,6 +361,42 @@ UNSPEC_DXEX))] "TARGET_DFP" "diex %0,%1,%2" + [(set_attr "type" "dfp")]) + +(define_expand "dfptstsfi__" + [(set (match_dup 3) + (compare:CCFP + (unspec:D64_D128 + [(match_operand:SI 1 "const_int_operand" "n") + (match_operand:D64_D128 2 "gpc_reg_operand" "d")] + UNSPEC_DTSTSFI) + (match_dup 4))) + (set (match_operand:SI 0 "register_operand" "") + (DFP_TEST:SI (match_dup 3) + (const_int 0))) + ] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCFPmode); + operands[4] = const0_rtx; +}) + +(define_insn "*dfp_sgnfcnc_" + [(set (match_operand:CCFP 0 "" "=y") + (compare:CCFP + (unspec:D64_D128 [(match_operand:SI 1 "const_int_operand" "n") + (match_operand:D64_D128 2 "gpc_reg_operand" "d")] + UNSPEC_DTSTSFI) + (match_operand:SI 3 "zero_constant" "j")))] + "TARGET_P9_MISC" +{ + /* If immediate operand is greater than 63, it will behave as if + the value had been 63. The code generator does not support + immediate operand values greater than 63. */ + if (!(IN_RANGE (INTVAL (operands[1]), 0, 63))) + operands[1] = GEN_INT (63); + return "dtstsfi %0,%1,%2"; +} [(set_attr "type" "fp")]) (define_insn "dfp_dscli_" @@ -367,7 +406,7 @@ UNSPEC_DSCLI))] "TARGET_DFP" "dscli %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) (define_insn "dfp_dscri_" [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d") @@ -376,4 +415,4 @@ UNSPEC_DSCRI))] "TARGET_DFP" "dscri %0,%1,%2" - [(set_attr "type" "fp")]) + [(set_attr "type" "dfp")]) diff --git a/gcc/config/rs6000/e300c2c3.md b/gcc/config/rs6000/e300c2c3.md index 5865e95e2d..e48979979a 100644 --- a/gcc/config/rs6000/e300c2c3.md +++ b/gcc/config/rs6000/e300c2c3.md @@ -150,7 +150,7 @@ "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire") (define_insn_reservation "ppce300c3_fp" 3 - (and (eq_attr "type" "fp") + (and (eq_attr "type" "fp,fpsimple") (eq_attr "cpu" "ppce300c3")) "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire") diff --git a/gcc/config/rs6000/e6500.md b/gcc/config/rs6000/e6500.md index 428222d14b..e094192d61 100644 --- a/gcc/config/rs6000/e6500.md +++ b/gcc/config/rs6000/e6500.md @@ -205,7 +205,7 @@ ;; VSFX. (define_insn_reservation "e6500_vecsimple" 1 - (and (eq_attr "type" "vecsimple,veccmp") + (and (eq_attr "type" "vecsimple,veclogical,vecmove,veccmp,veccmpfx") (eq_attr "cpu" "ppce6500")) "e6500_decode,e6500_vec") diff --git a/gcc/config/rs6000/htm.md b/gcc/config/rs6000/htm.md index 0d0823824a..c0203a9c0c 100644 --- a/gcc/config/rs6000/htm.md +++ b/gcc/config/rs6000/htm.md @@ -72,7 +72,7 @@ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabort. %0" - [(set_attr "type" "htm") + [(set_attr "type" "htmsimple") (set_attr "length" "4")]) (define_expand "tabortc" @@ -98,7 +98,7 @@ (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabortc. %0,%1,%2" - [(set_attr "type" "htm") + [(set_attr "type" "htmsimple") (set_attr "length" "4")]) (define_expand "tabortci" @@ -124,7 +124,7 @@ (set (match_operand:BLK 4) (unspec:BLK [(match_dup 4)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabortci. %0,%1,%2" - [(set_attr "type" "htm") + [(set_attr "type" "htmsimple") (set_attr "length" "4")]) (define_expand "tbegin" @@ -208,7 +208,7 @@ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "trechkpt." - [(set_attr "type" "htm") + [(set_attr "type" "htmsimple") (set_attr "length" "4")]) (define_expand "treclaim" @@ -230,7 +230,7 @@ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "treclaim. %0" - [(set_attr "type" "htm") + [(set_attr "type" "htmsimple") (set_attr "length" "4")]) (define_expand "tsr" @@ -252,7 +252,7 @@ (set (match_operand:BLK 2) (unspec:BLK [(match_dup 2)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tsr. %0" - [(set_attr "type" "htm") + [(set_attr "type" "htmsimple") (set_attr "length" "4")]) (define_expand "ttest" @@ -272,7 +272,7 @@ (set (match_operand:BLK 1) (unspec:BLK [(match_dup 1)] UNSPEC_HTM_FENCE))] "TARGET_HTM" "tabortwci. 0,1,0" - [(set_attr "type" "htm") + [(set_attr "type" "htmsimple") (set_attr "length" "4")]) (define_insn "htm_mfspr_" diff --git a/gcc/config/rs6000/mpc.md b/gcc/config/rs6000/mpc.md index 010dc9444e..42cb11a598 100644 --- a/gcc/config/rs6000/mpc.md +++ b/gcc/config/rs6000/mpc.md @@ -81,7 +81,7 @@ "fpu_mpc,bpu_mpc") (define_insn_reservation "mpccore-fp" 4 - (and (eq_attr "type" "fp") + (and (eq_attr "type" "fp,fpsimple") (eq_attr "cpu" "mpccore")) "fpu_mpc*2") diff --git a/gcc/config/rs6000/power4.md b/gcc/config/rs6000/power4.md index 7b0ccbedaa..84ac439fe9 100644 --- a/gcc/config/rs6000/power4.md +++ b/gcc/config/rs6000/power4.md @@ -381,7 +381,7 @@ ; Basic FP latency is 6 cycles (define_insn_reservation "power4-fp" 6 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul") (eq_attr "cpu" "power4")) "fpq_power4") @@ -410,7 +410,7 @@ ; VMX (define_insn_reservation "power4-vecsimple" 2 - (and (eq_attr "type" "vecsimple") + (and (eq_attr "type" "vecsimple,veclogical,vecmove") (eq_attr "cpu" "power4")) "vq_power4") @@ -421,7 +421,7 @@ ; vecfp compare (define_insn_reservation "power4-veccmp" 8 - (and (eq_attr "type" "veccmp") + (and (eq_attr "type" "veccmp,veccmpfx") (eq_attr "cpu" "power4")) "vq_power4") diff --git a/gcc/config/rs6000/power5.md b/gcc/config/rs6000/power5.md index 2d7c15e59c..b00d5ead14 100644 --- a/gcc/config/rs6000/power5.md +++ b/gcc/config/rs6000/power5.md @@ -322,7 +322,7 @@ ; Basic FP latency is 6 cycles (define_insn_reservation "power5-fp" 6 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul") (eq_attr "cpu" "power5")) "fpq_power5") diff --git a/gcc/config/rs6000/power6.md b/gcc/config/rs6000/power6.md index 15d31eb81a..a94052417e 100644 --- a/gcc/config/rs6000/power6.md +++ b/gcc/config/rs6000/power6.md @@ -500,7 +500,7 @@ (define_bypass 9 "power6-mtcr" "power6-branch") (define_insn_reservation "power6-fp" 6 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul,dfp") (eq_attr "cpu" "power6")) "FPU_power6") @@ -556,7 +556,7 @@ "LSF_power6") (define_insn_reservation "power6-vecsimple" 3 - (and (eq_attr "type" "vecsimple") + (and (eq_attr "type" "vecsimple,veclogical,vecmove") (eq_attr "cpu" "power6")) "FPU_power6") @@ -568,7 +568,7 @@ (define_bypass 4 "power6-vecsimple" "power6-vecstore" ) (define_insn_reservation "power6-veccmp" 1 - (and (eq_attr "type" "veccmp") + (and (eq_attr "type" "veccmp,veccmpfx") (eq_attr "cpu" "power6")) "FPU_power6") diff --git a/gcc/config/rs6000/power7.md b/gcc/config/rs6000/power7.md index 9c6326dd26..91ebbf97f9 100644 --- a/gcc/config/rs6000/power7.md +++ b/gcc/config/rs6000/power7.md @@ -292,7 +292,7 @@ ; VS Unit (includes FP/VSX/VMX/DFP) (define_insn_reservation "power7-fp" 6 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul,dfp") (eq_attr "cpu" "power7")) "DU_power7,VSU_power7") @@ -324,7 +324,7 @@ "DU_power7,VSU_power7") (define_insn_reservation "power7-vecsimple" 2 - (and (eq_attr "type" "vecsimple,veccmp") + (and (eq_attr "type" "vecsimple,veclogical,vecmove,veccmp,veccmpfx") (eq_attr "cpu" "power7")) "DU_power7,vsu1_power7") diff --git a/gcc/config/rs6000/power8.md b/gcc/config/rs6000/power8.md index 6b6f0ffb8d..4bb323ff43 100644 --- a/gcc/config/rs6000/power8.md +++ b/gcc/config/rs6000/power8.md @@ -317,7 +317,7 @@ ; VS Unit (includes FP/VSX/VMX/DFP/Crypto) (define_insn_reservation "power8-fp" 6 - (and (eq_attr "type" "fp,dmul") + (and (eq_attr "type" "fp,fpsimple,dmul,dfp") (eq_attr "cpu" "power8")) "DU_any_power8,VSU_power8") @@ -350,7 +350,8 @@ "DU_any_power8,VSU_power8") (define_insn_reservation "power8-vecsimple" 2 - (and (eq_attr "type" "vecperm,vecsimple,veccmp") + (and (eq_attr "type" "vecperm,vecsimple,veclogical,vecmove,veccmp, + veccmpfx") (eq_attr "cpu" "power8")) "DU_any_power8,VSU_power8") diff --git a/gcc/config/rs6000/power9.md b/gcc/config/rs6000/power9.md new file mode 100644 index 0000000000..015b5ba58b --- /dev/null +++ b/gcc/config/rs6000/power9.md @@ -0,0 +1,477 @@ +;; Scheduling description for IBM POWER9 processor. +;; Copyright (C) 2016 Free Software Foundation, Inc. +;; +;; Contributed by Pat Haugen (pthaugen@us.ibm.com). + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "power9dsp,power9lsu,power9vsu,power9misc") + +(define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu") +(define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu") +; Two vector permute units, part of vsu +(define_cpu_unit "prm0_power9,prm1_power9" "power9vsu") +; Two fixed point divide units, not pipelined +(define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc") +(define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc") + +(define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9, + x2_power9,x3_power9,xb0_power9,xb1_power9, + br0_power9,br1_power9" "power9dsp") + + +; Dispatch port reservations +; +; Power9 can dispatch a maximum of 6 iops per cycle with the following +; general restrictions (other restrictions also apply): +; 1) At most 2 iops per execution slice +; 2) At most 2 iops to the branch unit +; Note that insn position in a dispatch group of 6 insns does not infer which +; execution slice the insn is routed to. The units are used to infer the +; conflicts that exist (i.e. an 'even' requirement will preclude dispatch +; with 2 insns with 'superslice' requirement). + +; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but +; are listed as separate units to allow those insns that preclude its use to +; still be scheduled two to a superslice while reserving the 3rd slot. The +; same applies for xb0/xb1. +(define_reservation "DU_xa_power9" "xa0_power9+xa1_power9") +(define_reservation "DU_xb_power9" "xb0_power9+xb1_power9") + +; Any execution slice dispatch +(define_reservation "DU_any_power9" + "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9| + DU_xb_power9") + +; Even slice, actually takes even/odd slots +(define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9") + +; Slice plus 3rd slot +(define_reservation "DU_slice_3_power9" + "x0_power9+xa0_power9|x1_power9+xa1_power9| + x2_power9+xb0_power9|x3_power9+xb1_power9") + +; Superslice +(define_reservation "DU_super_power9" + "x0_power9+x1_power9|x2_power9+x3_power9") + +; 2-way cracked +(define_reservation "DU_C2_power9" "x0_power9+x1_power9| + x1_power9+DU_xa_power9| + x1_power9+x2_power9| + DU_xa_power9+x2_power9| + x2_power9+x3_power9| + x3_power9+DU_xb_power9") + +; 2-way cracked plus 3rd slot +(define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9| + x1_power9+x2_power9+xa0_power9| + x1_power9+x2_power9+xb0_power9| + x2_power9+x3_power9+xb0_power9") + +; 3-way cracked (consumes whole decode/dispatch cycle) +(define_reservation "DU_C3_power9" + "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+ + x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9") + +; Branch ports +(define_reservation "DU_branch_power9" "br0_power9|br1_power9") + + +; Execution unit reservations +(define_reservation "LSU_power9" + "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9") + +(define_reservation "LSU_pair_power9" + "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9| + lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9") + +(define_reservation "VSU_power9" + "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9") + +(define_reservation "VSU_super_power9" + "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9") + +(define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9") + + +; LS Unit +(define_insn_reservation "power9-load" 4 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "no") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_power9") + +(define_insn_reservation "power9-load-update" 4 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "no") + (eq_attr "update" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-load-ext" 6 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "no") + (eq_attr "cpu" "power9")) + "DU_C2_power9,LSU_power9") + +(define_insn_reservation "power9-load-ext-update" 6 + (and (eq_attr "type" "load") + (eq_attr "sign_extend" "yes") + (eq_attr "update" "yes") + (eq_attr "cpu" "power9")) + "DU_C3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-fpload-double" 4 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "size" "64") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +(define_insn_reservation "power9-fpload-update-double" 4 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "size" "64") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +; SFmode loads are cracked and have additional 2 cycles over DFmode +(define_insn_reservation "power9-fpload-single" 6 + (and (eq_attr "type" "fpload") + (eq_attr "update" "no") + (eq_attr "size" "32") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9") + +(define_insn_reservation "power9-fpload-update-single" 6 + (and (eq_attr "type" "fpload") + (eq_attr "update" "yes") + (eq_attr "size" "32") + (eq_attr "cpu" "power9")) + "DU_C3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-vecload" 5 + (and (eq_attr "type" "vecload") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_pair_power9") + +; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store +(define_insn_reservation "power9-store" 0 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +(define_insn_reservation "power9-store-indexed" 0 + (and (eq_attr "type" "store") + (eq_attr "update" "no") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +; Update forms have 2 cycle latency for updated addr reg +(define_insn_reservation "power9-store-update" 2 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "no") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +; Update forms have 2 cycle latency for updated addr reg +(define_insn_reservation "power9-store-update-indexed" 2 + (and (eq_attr "type" "store") + (eq_attr "update" "yes") + (eq_attr "indexed" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-fpstore" 0 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "no") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,LSU_power9") + +; Update forms have 2 cycle latency for updated addr reg +(define_insn_reservation "power9-fpstore-update" 2 + (and (eq_attr "type" "fpstore") + (eq_attr "update" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-vecstore" 0 + (and (eq_attr "type" "vecstore") + (eq_attr "cpu" "power9")) + "DU_super_power9,LSU_pair_power9") + +(define_insn_reservation "power9-larx" 4 + (and (eq_attr "type" "load_l") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_power9") + +(define_insn_reservation "power9-stcx" 2 + (and (eq_attr "type" "store_c") + (eq_attr "cpu" "power9")) + "DU_C2_3_power9,LSU_power9+VSU_power9") + +(define_insn_reservation "power9-sync" 4 + (and (eq_attr "type" "sync,isync") + (eq_attr "cpu" "power9")) + "DU_any_power9,LSU_power9") + + +; VSU Execution Unit + +; Fixed point ops + +; Most ALU insns are simple 2 cycle, including record form +(define_insn_reservation "power9-alu" 2 + (and (ior (eq_attr "type" "add,cmp,exts,integer,logical,isel") + (and (eq_attr "type" "insert,shift") + (eq_attr "dot" "no"))) + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +; Record form rotate/shift are cracked +(define_insn_reservation "power9-cracked-alu" 2 + (and (eq_attr "type" "insert,shift") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_power9,VSU_power9") +; 4 cycle CR latency +(define_bypass 4 "power9-cracked-alu" + "power9-crlogical,power9-mfcr,power9-mfcrf,power9-branch") + +(define_insn_reservation "power9-alu2" 3 + (and (eq_attr "type" "cntlz,popcnt,trap") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +; Treat 'two' and 'three' types as 2 or 3 way cracked +(define_insn_reservation "power9-two" 4 + (and (eq_attr "type" "two") + (eq_attr "cpu" "power9")) + "DU_C2_power9,VSU_power9") + +(define_insn_reservation "power9-three" 6 + (and (eq_attr "type" "three") + (eq_attr "cpu" "power9")) + "DU_C3_power9,VSU_power9") + +(define_insn_reservation "power9-mul" 4 + (and (eq_attr "type" "mul") + (eq_attr "dot" "no") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +(define_insn_reservation "power9-mul-compare" 4 + (and (eq_attr "type" "mul") + (eq_attr "dot" "yes") + (eq_attr "cpu" "power9")) + "DU_C2_power9,VSU_power9") +; 6 cycle CR latency +(define_bypass 6 "power9-mul-compare" + "power9-crlogical,power9-mfcr,power9-mfcrf,power9-branch") + +; Fixed point divides reserve the divide units for a minimum of 8 cycles +(define_insn_reservation "power9-idiv" 16 + (and (eq_attr "type" "div") + (eq_attr "size" "32") + (eq_attr "cpu" "power9")) + "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") + +(define_insn_reservation "power9-ldiv" 24 + (and (eq_attr "type" "div") + (eq_attr "size" "64") + (eq_attr "cpu" "power9")) + "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") + +(define_insn_reservation "power9-crlogical" 2 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +(define_insn_reservation "power9-mfcrf" 2 + (and (eq_attr "type" "mfcrf") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +(define_insn_reservation "power9-mfcr" 6 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "power9")) + "DU_C3_power9,VSU_power9") + +; Should differentiate between 1 cr field and > 1 since target of > 1 cr +; is cracked +(define_insn_reservation "power9-mtcr" 2 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +; Move to LR/CTR are executed in VSU +(define_insn_reservation "power9-mtjmpr" 5 + (and (eq_attr "type" "mtjmpr") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + +; Floating point/Vector ops +(define_insn_reservation "power9-fpsimple" 2 + (and (eq_attr "type" "fpsimple") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-fp" 7 + (and (eq_attr "type" "fp,dmul") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-fpcompare" 3 + (and (eq_attr "type" "fpcompare") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other +; divide insns, but for the most part do not block pipelined ops. +(define_insn_reservation "power9-sdiv" 22 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-ddiv" 33 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-sqrt" 26 + (and (eq_attr "type" "ssqrt") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-dsqrt" 36 + (and (eq_attr "type" "dsqrt") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-vec-2cyc" 2 + (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-veccmp" 3 + (and (eq_attr "type" "veccmp") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecsimple" 3 + (and (eq_attr "type" "vecsimple") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecnormal" 7 + (and (eq_attr "type" "vecfloat,vecdouble") + (eq_attr "size" "!128") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +; Quad-precision FP ops, execute in DFU +(define_insn_reservation "power9-qp" 12 + (and (eq_attr "type" "vecfloat,vecdouble") + (eq_attr "size" "128") + (eq_attr "cpu" "power9")) + "DU_super_power9,dfu_power9") + +(define_insn_reservation "power9-vecperm" 3 + (and (eq_attr "type" "vecperm") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_PRM_power9") + +(define_insn_reservation "power9-veccomplex" 7 + (and (eq_attr "type" "veccomplex") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecfdiv" 28 + (and (eq_attr "type" "vecfdiv") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-vecdiv" 32 + (and (eq_attr "type" "vecdiv") + (eq_attr "size" "!128") + (eq_attr "cpu" "power9")) + "DU_super_power9,VSU_super_power9") + +(define_insn_reservation "power9-qpdiv" 56 + (and (eq_attr "type" "vecdiv") + (eq_attr "size" "128") + (eq_attr "cpu" "power9")) + "DU_super_power9,dfu_power9") + +(define_insn_reservation "power9-mffgpr" 2 + (and (eq_attr "type" "mffgpr") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + +(define_insn_reservation "power9-mftgpr" 2 + (and (eq_attr "type" "mftgpr") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + + +; Branch Unit +; Move from LR/CTR are executed in BRU but consume a writeback port from an +; execution slice. +(define_insn_reservation "power9-mfjmpr" 6 + (and (eq_attr "type" "mfjmpr") + (eq_attr "cpu" "power9")) + "DU_branch_power9,bru_power9+VSU_power9") + +; Branch is 2 cycles +(define_insn_reservation "power9-branch" 2 + (and (eq_attr "type" "jmpreg,branch") + (eq_attr "cpu" "power9")) + "DU_branch_power9,bru_power9") + + +; Crypto Unit +(define_insn_reservation "power9-crypto" 6 + (and (eq_attr "type" "crypto") + (eq_attr "cpu" "power9")) + "DU_super_power9,cryptu_power9") + + +; HTM Unit +(define_insn_reservation "power9-htm" 4 + (and (eq_attr "type" "htm") + (eq_attr "cpu" "power9")) + "DU_C2_power9,LSU_power9") + +(define_insn_reservation "power9-htm-simple" 2 + (and (eq_attr "type" "htmsimple") + (eq_attr "cpu" "power9")) + "DU_any_power9,VSU_power9") + + +; DFP Unit +(define_insn_reservation "power9-dfp" 12 + (and (eq_attr "type" "dfp") + (eq_attr "cpu" "power9")) + "DU_even_power9,dfu_power9") + diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 71fac765e2..41694a51f1 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -572,6 +572,38 @@ } }) +;; Return 1 if the operand is a CONST_VECTOR or VEC_DUPLICATE of a constant +;; that can loaded with a XXSPLTIB instruction and then a VUPKHSB, VECSB2W or +;; VECSB2D instruction. + +(define_predicate "xxspltib_constant_split" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) + return false; + + return num_insns > 1; +}) + + +;; Return 1 if the operand is a CONST_VECTOR that can loaded directly with a +;; XXSPLTIB instruction. + +(define_predicate "xxspltib_constant_nosplit" + (match_code "const_vector,vec_duplicate,const_int") +{ + int value = 256; + int num_insns = -1; + + if (!xxspltib_constant_p (op, mode, &num_insns, &value)) + return false; + + return num_insns == 1; +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" @@ -590,7 +622,14 @@ if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) { - if (zero_constant (op, mode)) + int value = 256; + int num_insns = -1; + + if (zero_constant (op, mode) || all_ones_constant (op, mode)) + return true; + + if (TARGET_P9_VECTOR + && xxspltib_constant_p (op, mode, &num_insns, &value)) return true; return easy_altivec_constant (op, mode); @@ -669,6 +708,11 @@ (and (match_code "const_int,const_double,const_wide_int,const_vector") (match_test "op == CONST0_RTX (mode)"))) +;; Return 1 if operand is constant -1 (scalars and vectors). +(define_predicate "all_ones_constant" + (and (match_code "const_int,const_double,const_wide_int,const_vector") + (match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)"))) + ;; Return 1 if operand is 0.0. (define_predicate "zero_fp_constant" (and (match_code "const_double") @@ -698,48 +742,25 @@ (define_predicate "quad_memory_operand" (match_code "mem") { - rtx addr, op0, op1; - int ret; - if (!TARGET_QUAD_MEMORY && !TARGET_SYNC_TI) - ret = 0; - - else if (!memory_operand (op, mode)) - ret = 0; - - else if (GET_MODE_SIZE (GET_MODE (op)) != 16) - ret = 0; - - else if (MEM_ALIGN (op) < 128) - ret = 0; - - else - { - addr = XEXP (op, 0); - if (int_reg_operand (addr, Pmode)) - ret = 1; + return false; - else if (GET_CODE (addr) != PLUS) - ret = 0; + if (GET_MODE_SIZE (mode) != 16 || !MEM_P (op) || MEM_ALIGN (op) < 128) + return false; - else - { - op0 = XEXP (addr, 0); - op1 = XEXP (addr, 1); - ret = (int_reg_operand (op0, Pmode) - && GET_CODE (op1) == CONST_INT - && IN_RANGE (INTVAL (op1), -32768, 32767) - && (INTVAL (op1) & 15) == 0); - } - } + return quad_address_p (XEXP (op, 0), mode, false); +}) - if (TARGET_DEBUG_ADDR) - { - fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false"); - debug_rtx (op); - } +;; Return 1 if the operand is suitable for load/store to vector registers with +;; d-form addressing (register+offset), which was added in ISA 3.0. +;; Unlike quad_memory_operand, we do not have to check for alignment. +(define_predicate "vsx_quad_dform_memory_operand" + (match_code "mem") +{ + if (!TARGET_P9_DFORM_VECTOR || !MEM_P (op) || GET_MODE_SIZE (mode) != 16) + return false; - return ret; + return quad_address_p (XEXP (op, 0), mode, false); }) ;; Return 1 if the operand is an indexed or indirect memory operand. @@ -1054,6 +1075,10 @@ mode = V2DFmode; else if (mode == DImode) mode = V2DImode; + else if (mode == SImode && TARGET_P9_VECTOR) + mode = V4SImode; + else if (mode == SFmode && TARGET_P9_VECTOR) + mode = V4SFmode; else gcc_unreachable (); return memory_address_addr_space_p (mode, XEXP (op, 0), @@ -1091,10 +1116,6 @@ (define_special_predicate "equality_operator" (match_code "eq,ne")) -;; Return true if operand is MIN or MAX operator. -(define_predicate "min_max_operator" - (match_code "smin,smax,umin,umax")) - ;; Return 1 if OP is a comparison operation that is valid for a branch ;; instruction. We check the opcode against the mode of the CC value. ;; validate_condition_mode is an assertion. @@ -1137,6 +1158,11 @@ (and (match_operand 0 "branch_comparison_operator") (match_code "ne,le,ge,leu,geu,ordered"))) +;; Return 1 if OP is a comparison operator suitable for vector/scalar +;; comparisons that generate a -1/0 mask. +(define_predicate "fpmask_comparison_operator" + (match_code "eq,gt,ge")) + ;; Return 1 if OP is a comparison operation that is valid for a branch ;; insn, which is true if the corresponding bit in the CC register is set. (define_predicate "branch_positive_comparison_operator" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 5b82b00449..a33faa6e5b 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -24,6 +24,7 @@ . */ /* Before including this file, some macros must be defined: + RS6000_BUILTIN_0 -- 0 arg builtins RS6000_BUILTIN_1 -- 1 arg builtins RS6000_BUILTIN_2 -- 2 arg builtins RS6000_BUILTIN_3 -- 3 arg builtins @@ -43,6 +44,10 @@ ATTR builtin attribute information. ICODE Insn code of the function that implents the builtin. */ +#ifndef RS6000_BUILTIN_0 + #error "RS6000_BUILTIN_0 is not defined." +#endif + #ifndef RS6000_BUILTIN_1 #error "RS6000_BUILTIN_1 is not defined." #endif @@ -637,6 +642,91 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +/* Miscellaneous builtins for instructions added in ISA 3.0. These + instructions don't require either the DFP or VSX options, just the basic + ISA 3.0 enablement since they operate on general purpose registers. */ +#define BU_P9_MISC_0(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_0 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9_MISC_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Miscellaneous builtins for instructions added in ISA 3.0. These + instructions don't require either the DFP or VSX options, just the basic + ISA 3.0 enablement since they operate on general purpose registers, + and they require 64-bit addressing. */ +#define BU_P9_64BIT_MISC_0(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_0 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC \ + | RS6000_BTM_64BIT, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Miscellaneous builtins for decimal floating point instructions + added in ISA 3.0. These instructions don't require the VSX + options, just the basic ISA 3.0 enablement since they operate on + general purpose registers. */ +#define BU_P9_DFP_MISC_0(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_0 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9_DFP_MISC_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9_DFP_MISC_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_SPECIAL), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* Decimal floating point overloaded functions added in ISA 3.0 */ +#define BU_P9_DFP_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P9_BUILTIN_DFP_ ## ENUM, /* ENUM */ \ + "__builtin_dfp_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9_DFP_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P9_BUILTIN_DFP_ ## ENUM, /* ENUM */ \ + "__builtin_dfp_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9_DFP_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (P9_BUILTIN_DFP_ ## ENUM, /* ENUM */ \ + "__builtin_dfp_" NAME, /* NAME */ \ + RS6000_BTM_P9_MISC, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + /* 128-bit long double floating point builtins. */ #define BU_LDBL128_2(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ @@ -647,8 +737,94 @@ | RS6000_BTC_BINARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +/* IEEE 128-bit floating-point builtins. */ +#define BU_FLOAT128_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_FLOAT128_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_" NAME, /* NAME */ \ + RS6000_BTM_FLOAT128, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* ISA 3.0 (power9) vector convenience macros. */ +/* For the instructions that are encoded as altivec instructions use + __builtin_altivec_ as the builtin name. */ +#define BU_P9V_AV_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_AV_2(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_AV_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_AV_P(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_P (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_PREDICATE), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +/* For the instructions encoded as VSX instructions use __builtin_vsx as the + builtin name. */ +#define BU_P9V_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_OVERLOAD_1(ENUM, NAME) \ + RS6000_BUILTIN_1 (P9V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9V_OVERLOAD_2(ENUM, NAME) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_nothing) /* ICODE */ + +#define BU_P9V_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (P9V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P9_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ #endif + /* Insure 0 is not a legitimate index. */ BU_SPECIAL_X (RS6000_BUILTIN_NONE, NULL, 0, RS6000_BTC_MISC) @@ -1391,13 +1567,25 @@ BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM) BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM) BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM) BU_VSX_X (STXSDX, "stxsdx", MEM) -BU_VSX_X (STXVD2X_V1TI, "stxsdx_v1ti", MEM) -BU_VSX_X (STXVD2X_V2DF, "stxsdx_v2df", MEM) -BU_VSX_X (STXVD2X_V2DI, "stxsdx_v2di", MEM) -BU_VSX_X (STXVW4X_V4SF, "stxsdx_v4sf", MEM) -BU_VSX_X (STXVW4X_V4SI, "stxsdx_v4si", MEM) -BU_VSX_X (STXVW4X_V8HI, "stxsdx_v8hi", MEM) -BU_VSX_X (STXVW4X_V16QI, "stxsdx_v16qi", MEM) +BU_VSX_X (STXVD2X_V1TI, "stxvd2x_v1ti", MEM) +BU_VSX_X (STXVD2X_V2DF, "stxvd2x_v2df", MEM) +BU_VSX_X (STXVD2X_V2DI, "stxvd2x_v2di", MEM) +BU_VSX_X (STXVW4X_V4SF, "stxvw4x_v4sf", MEM) +BU_VSX_X (STXVW4X_V4SI, "stxvw4x_v4si", MEM) +BU_VSX_X (STXVW4X_V8HI, "stxvw4x_v8hi", MEM) +BU_VSX_X (STXVW4X_V16QI, "stxvw4x_v16qi", MEM) +BU_VSX_X (LD_ELEMREV_V2DF, "ld_elemrev_v2df", MEM) +BU_VSX_X (LD_ELEMREV_V2DI, "ld_elemrev_v2di", MEM) +BU_VSX_X (LD_ELEMREV_V4SF, "ld_elemrev_v4sf", MEM) +BU_VSX_X (LD_ELEMREV_V4SI, "ld_elemrev_v4si", MEM) +BU_VSX_X (LD_ELEMREV_V8HI, "ld_elemrev_v8hi", MEM) +BU_VSX_X (LD_ELEMREV_V16QI, "ld_elemrev_v16qi", MEM) +BU_VSX_X (ST_ELEMREV_V2DF, "st_elemrev_v2df", MEM) +BU_VSX_X (ST_ELEMREV_V2DI, "st_elemrev_v2di", MEM) +BU_VSX_X (ST_ELEMREV_V4SF, "st_elemrev_v4sf", MEM) +BU_VSX_X (ST_ELEMREV_V4SI, "st_elemrev_v4si", MEM) +BU_VSX_X (ST_ELEMREV_V8HI, "st_elemrev_v8hi", MEM) +BU_VSX_X (ST_ELEMREV_V16QI, "st_elemrev_v16qi", MEM) BU_VSX_X (XSABSDP, "xsabsdp", CONST) BU_VSX_X (XSADDDP, "xsadddp", FP) BU_VSX_X (XSCMPODP, "xscmpodp", FP) @@ -1455,6 +1643,8 @@ BU_VSX_OVERLOAD_1 (DOUBLE, "double") /* VSX builtins that are handled as special cases. */ BU_VSX_OVERLOAD_X (LD, "ld") BU_VSX_OVERLOAD_X (ST, "st") +BU_VSX_OVERLOAD_X (XL, "xl") +BU_VSX_OVERLOAD_X (XST, "xst") /* 1 argument VSX instructions added in ISA 2.07. */ BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) @@ -1589,6 +1779,25 @@ BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm") BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq") BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm") +/* ISA 3.0 vector overloaded 2-argument functions. */ +BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv) +BU_P9V_AV_2 (VSRV, "vsrv", CONST, vsrv) + +/* ISA 3.0 vector overloaded 2-argument functions. */ +BU_P9V_OVERLOAD_2 (VSLV, "vslv") +BU_P9V_OVERLOAD_2 (VSRV, "vsrv") + +/* 2 argument vector functions added in ISA 3.0 (power9). */ +BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3) +BU_P9V_AV_2 (VADUH, "vaduh", CONST, vaduv8hi3) +BU_P9V_AV_2 (VADUW, "vaduw", CONST, vaduv4si3) + +/* ISA 3.0 vector overloaded 2 argument functions. */ +BU_P9V_OVERLOAD_2 (VADU, "vadu") +BU_P9V_OVERLOAD_2 (VADUB, "vadub") +BU_P9V_OVERLOAD_2 (VADUH, "vaduh") +BU_P9V_OVERLOAD_2 (VADUW, "vaduw") + /* 2 argument extended divide functions added in ISA 2.06. */ BU_P7_MISC_2 (DIVWE, "divwe", CONST, dive_si) @@ -1639,12 +1848,72 @@ BU_P8V_MISC_3 (BCDSUB_OV, "bcdsub_ov", CONST, bcdsub_unordered) BU_DFP_MISC_2 (PACK_TD, "pack_dec128", CONST, packtd) BU_DFP_MISC_2 (UNPACK_TD, "unpack_dec128", CONST, unpacktd) +/* 0 argument general-purpose register functions added in ISA 3.0 (power9). */ +BU_P9_MISC_0 (DARN_32, "darn_32", MISC, darn_32) +BU_P9_64BIT_MISC_0 (DARN_RAW, "darn_raw", MISC, darn_raw) +BU_P9_64BIT_MISC_0 (DARN, "darn", MISC, darn) + BU_LDBL128_2 (PACK_TF, "pack_longdouble", CONST, packtf) BU_LDBL128_2 (UNPACK_TF, "unpack_longdouble", CONST, unpacktf) BU_P7_MISC_2 (PACK_V1TI, "pack_vector_int128", CONST, packv1ti) BU_P7_MISC_2 (UNPACK_V1TI, "unpack_vector_int128", CONST, unpackv1ti) +/* 2 argument DFP (Decimal Floating Point) functions added in ISA 3.0. */ +BU_P9_DFP_MISC_2 (TSTSFI_LT_DD, "dtstsfi_lt_dd", CONST, dfptstsfi_lt_dd) +BU_P9_DFP_MISC_2 (TSTSFI_LT_TD, "dtstsfi_lt_td", CONST, dfptstsfi_lt_td) + +BU_P9_DFP_MISC_2 (TSTSFI_EQ_DD, "dtstsfi_eq_dd", CONST, dfptstsfi_eq_dd) +BU_P9_DFP_MISC_2 (TSTSFI_EQ_TD, "dtstsfi_eq_td", CONST, dfptstsfi_eq_td) + +BU_P9_DFP_MISC_2 (TSTSFI_GT_DD, "dtstsfi_gt_dd", CONST, dfptstsfi_gt_dd) +BU_P9_DFP_MISC_2 (TSTSFI_GT_TD, "dtstsfi_gt_td", CONST, dfptstsfi_gt_td) + +BU_P9_DFP_MISC_2 (TSTSFI_OV_DD, "dtstsfi_ov_dd", CONST, dfptstsfi_unordered_dd) +BU_P9_DFP_MISC_2 (TSTSFI_OV_TD, "dtstsfi_ov_td", CONST, dfptstsfi_unordered_td) + +/* 2 argument overloaded DFP functions added in ISA 3.0. */ +BU_P9_DFP_OVERLOAD_2 (TSTSFI_LT, "dtstsfi_lt") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_LT_DD, "dtstsfi_lt_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_LT_TD, "dtstsfi_lt_td") + +BU_P9_DFP_OVERLOAD_2 (TSTSFI_EQ, "dtstsfi_eq") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_EQ_DD, "dtstsfi_eq_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_EQ_TD, "dtstsfi_eq_td") + +BU_P9_DFP_OVERLOAD_2 (TSTSFI_GT, "dtstsfi_gt") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_GT_DD, "dtstsfi_gt_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_GT_TD, "dtstsfi_gt_td") + +BU_P9_DFP_OVERLOAD_2 (TSTSFI_OV, "dtstsfi_ov") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_OV_DD, "dtstsfi_ov_dd") +BU_P9_DFP_OVERLOAD_2 (TSTSFI_OV_TD, "dtstsfi_ov_td") + +/* 1 argument vector functions added in ISA 3.0 (power9). */ +BU_P9V_AV_1 (VCTZB, "vctzb", CONST, ctzv16qi2) +BU_P9V_AV_1 (VCTZH, "vctzh", CONST, ctzv8hi2) +BU_P9V_AV_1 (VCTZW, "vctzw", CONST, ctzv4si2) +BU_P9V_AV_1 (VCTZD, "vctzd", CONST, ctzv2di2) +BU_P9V_AV_1 (VPRTYBD, "vprtybd", CONST, parityv2di2) +BU_P9V_AV_1 (VPRTYBQ, "vprtybq", CONST, parityv1ti2) +BU_P9V_AV_1 (VPRTYBW, "vprtybw", CONST, parityv4si2) + +/* ISA 3.0 vector overloaded 1 argument functions. */ +BU_P9V_OVERLOAD_1 (VCTZ, "vctz") +BU_P9V_OVERLOAD_1 (VCTZB, "vctzb") +BU_P9V_OVERLOAD_1 (VCTZH, "vctzh") +BU_P9V_OVERLOAD_1 (VCTZW, "vctzw") +BU_P9V_OVERLOAD_1 (VCTZD, "vctzd") +BU_P9V_OVERLOAD_1 (VPRTYB, "vprtyb") +BU_P9V_OVERLOAD_1 (VPRTYBD, "vprtybd") +BU_P9V_OVERLOAD_1 (VPRTYBQ, "vprtybq") +BU_P9V_OVERLOAD_1 (VPRTYBW, "vprtybw") + +/* 1 argument IEEE 128-bit floating-point functions. */ +BU_FLOAT128_1 (FABSQ, "fabsq", CONST, abskf2) + +/* 2 argument IEEE 128-bit floating-point functions. */ +BU_FLOAT128_2 (COPYSIGNQ, "copysignq", CONST, copysignkf3) /* 1 argument crypto functions. */ BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox) @@ -2022,6 +2291,18 @@ BU_SPECIAL_X (RS6000_BUILTIN_CPU_IS, "__builtin_cpu_is", BU_SPECIAL_X (RS6000_BUILTIN_CPU_SUPPORTS, "__builtin_cpu_supports", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) +BU_SPECIAL_X (RS6000_BUILTIN_NANQ, "__builtin_nanq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + +BU_SPECIAL_X (RS6000_BUILTIN_NANSQ, "__builtin_nansq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + +BU_SPECIAL_X (RS6000_BUILTIN_INFQ, "__builtin_infq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + +BU_SPECIAL_X (RS6000_BUILTIN_HUGE_VALQ, "__builtin_huge_valq", + RS6000_BTM_FLOAT128, RS6000_BTC_CONST) + /* Darwin CfString builtin. */ BU_SPECIAL_X (RS6000_BUILTIN_CFSTRING, "__builtin_cfstring", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index ceb80b216b..9eb6d545c5 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -2726,6 +2726,49 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_SUMS, ALTIVEC_BUILTIN_VSUMSWS, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, + { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, @@ -3475,6 +3518,55 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL, RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_long_long }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V8HI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI }, + { VSX_BUILTIN_VEC_XST, VSX_BUILTIN_ST_ELEMREV_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTQI }, { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE }, { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI, @@ -4123,6 +4215,105 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + { P9_BUILTIN_DFP_TSTSFI_LT, MISC_BUILTIN_TSTSFI_LT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_LT, MISC_BUILTIN_TSTSFI_LT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_LT_TD, MISC_BUILTIN_TSTSFI_LT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_LT_DD, MISC_BUILTIN_TSTSFI_LT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_EQ, MISC_BUILTIN_TSTSFI_EQ_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_EQ, MISC_BUILTIN_TSTSFI_EQ_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_EQ_TD, MISC_BUILTIN_TSTSFI_EQ_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_EQ_DD, MISC_BUILTIN_TSTSFI_EQ_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_GT, MISC_BUILTIN_TSTSFI_GT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_GT, MISC_BUILTIN_TSTSFI_GT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_GT_TD, MISC_BUILTIN_TSTSFI_GT_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_GT_DD, MISC_BUILTIN_TSTSFI_GT_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_OV, MISC_BUILTIN_TSTSFI_OV_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_OV, MISC_BUILTIN_TSTSFI_OV_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9_BUILTIN_DFP_TSTSFI_OV_TD, MISC_BUILTIN_TSTSFI_OV_TD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat128, 0 }, + { P9_BUILTIN_DFP_TSTSFI_OV_DD, MISC_BUILTIN_TSTSFI_OV_DD, + RS6000_BTI_INTSI, RS6000_BTI_UINTSI, RS6000_BTI_dfloat64, 0 }, + + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZ, P9V_BUILTIN_VCTZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZB, P9V_BUILTIN_VCTZB, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZB, P9V_BUILTIN_VCTZB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZH, P9V_BUILTIN_VCTZH, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZH, P9V_BUILTIN_VCTZH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZW, P9V_BUILTIN_VCTZW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZW, P9V_BUILTIN_VCTZW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P9V_BUILTIN_VEC_VCTZD, P9V_BUILTIN_VCTZD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VCTZD, P9V_BUILTIN_VCTZD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P9V_BUILTIN_VEC_VADU, P9V_BUILTIN_VADUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + + { P9V_BUILTIN_VEC_VADUB, P9V_BUILTIN_VADUB, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + + { P9V_BUILTIN_VEC_VADUH, P9V_BUILTIN_VADUH, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + + { P9V_BUILTIN_VEC_VADUW, P9V_BUILTIN_VADUW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, @@ -4252,6 +4443,42 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYB, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0, 0 }, + + { P9V_BUILTIN_VEC_VPRTYBW, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBW, P9V_BUILTIN_VPRTYBW, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, + + { P9V_BUILTIN_VEC_VPRTYBD, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBD, P9V_BUILTIN_VPRTYBD, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, + + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_INTTI, RS6000_BTI_INTTI, 0, 0 }, + { P9V_BUILTIN_VEC_VPRTYBQ, P9V_BUILTIN_VPRTYBQ, + RS6000_BTI_UINTTI, RS6000_BTI_UINTTI, 0, 0 }, + { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM, @@ -4328,6 +4555,13 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, RS6000_BTI_unsigned_V16QI, 0, 0, 0 }, + { P9V_BUILTIN_VEC_VSLV, P9V_BUILTIN_VSLV, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P9V_BUILTIN_VEC_VSRV, P9V_BUILTIN_VSRV, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + /* Crypto builtins. */ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 275404a63a..7d97f7f84e 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -60,15 +60,26 @@ | OPTION_MASK_UPPER_REGS_SF) /* Add ISEL back into ISA 3.0, since it is supposed to be a win. Do not add - P9_DFORM or P9_MINMAX until they are fully debugged. */ + P9_MINMAX until the hardware that supports it is available. Do not add + FLOAT128_HW here until we are ready to make -mfloat128 on by default. */ #define ISA_3_0_MASKS_SERVER (ISA_2_7_MASKS_SERVER \ - | OPTION_MASK_FLOAT128_HW \ | OPTION_MASK_ISEL \ | OPTION_MASK_MODULO \ | OPTION_MASK_P9_FUSION \ - | OPTION_MASK_P9_DFORM \ + | OPTION_MASK_P9_DFORM_SCALAR \ + | OPTION_MASK_P9_DFORM_VECTOR \ + | OPTION_MASK_P9_MISC \ | OPTION_MASK_P9_VECTOR) +/* Support for the IEEE 128-bit floating point hardware requires a lot of the + VSX instructions that are part of ISA 3.0. */ +#define ISA_3_0_MASKS_IEEE (OPTION_MASK_VSX \ + | OPTION_MASK_P8_VECTOR \ + | OPTION_MASK_P9_VECTOR \ + | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_UPPER_REGS_DF \ + | OPTION_MASK_UPPER_REGS_SF) + #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) /* Deal with ports that do not have -mstrict-align. */ @@ -94,6 +105,7 @@ | OPTION_MASK_FPRND \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ + | OPTION_MASK_LRA \ | OPTION_MASK_MFCRF \ | OPTION_MASK_MFPGPR \ | OPTION_MASK_MODULO \ @@ -101,9 +113,11 @@ | OPTION_MASK_NO_UPDATE \ | OPTION_MASK_P8_FUSION \ | OPTION_MASK_P8_VECTOR \ - | OPTION_MASK_P9_DFORM \ + | OPTION_MASK_P9_DFORM_SCALAR \ + | OPTION_MASK_P9_DFORM_VECTOR \ | OPTION_MASK_P9_FUSION \ | OPTION_MASK_P9_MINMAX \ + | OPTION_MASK_P9_MISC \ | OPTION_MASK_P9_VECTOR \ | OPTION_MASK_POPCNTB \ | OPTION_MASK_POPCNTD \ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d9a6b1f784..3bb25c0493 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -31,6 +31,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, #endif /* TREE_CODE */ extern bool easy_altivec_constant (rtx, machine_mode); +extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *); extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); @@ -38,6 +39,7 @@ extern int num_insns_constant (rtx, machine_mode); extern int num_insns_constant_wide (HOST_WIDE_INT); extern int small_data_operand (rtx, machine_mode); extern bool mem_operand_gpr (rtx, machine_mode); +extern bool mem_operand_ds_form (rtx, machine_mode); extern bool toc_relative_expr_p (const_rtx, bool); extern bool invalid_e500_subreg (rtx, machine_mode); extern void validate_condition_mode (enum rtx_code, machine_mode); @@ -86,6 +88,7 @@ extern int registers_ok_for_quad_peep (rtx, rtx); extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); extern bool direct_move_p (rtx, rtx); +extern bool quad_address_p (rtx, machine_mode, bool); extern bool quad_load_store_p (rtx, rtx); extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx); extern void expand_fusion_gpr_load (rtx *); @@ -133,6 +136,7 @@ extern bool rs6000_emit_set_const (rtx, rtx); extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx); +extern void rs6000_split_signbit (rtx, rtx); extern void rs6000_expand_atomic_compare_and_swap (rtx op[]); extern void rs6000_expand_atomic_exchange (rtx op[]); extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1d0076c41f..9c24ad516a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -452,6 +452,7 @@ typedef unsigned char addr_mask_type; #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ +#define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ /* Register type masks based on the type, of valid addressing modes. */ struct rs6000_reg_addr { @@ -499,6 +500,16 @@ mode_supports_vmx_dform (machine_mode mode) return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0); } +/* Return true if we have D-form addressing in VSX registers. This addressing + is more limited than normal d-form addressing in that the offset must be + aligned on a 16-byte boundary. */ +static inline bool +mode_supports_vsx_dform_quad (machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET) + != 0); +} + /* Target cpu costs. */ @@ -1093,16 +1104,16 @@ struct processor_costs power9_cost = { COSTS_N_INSNS (3), /* mulsi_const */ COSTS_N_INSNS (3), /* mulsi_const9 */ COSTS_N_INSNS (3), /* muldi */ - COSTS_N_INSNS (19), /* divsi */ - COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (8), /* divsi */ + COSTS_N_INSNS (12), /* divdi */ COSTS_N_INSNS (3), /* fp */ COSTS_N_INSNS (3), /* dmul */ - COSTS_N_INSNS (14), /* sdiv */ - COSTS_N_INSNS (17), /* ddiv */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (18), /* ddiv */ 128, /* cache line size */ 32, /* l1 cache */ - 256, /* l2 cache */ - 12, /* prefetch streams */ + 512, /* l2 cache */ + 8, /* prefetch streams */ COSTS_N_INSNS (3), /* SF->DF convert */ }; @@ -1128,6 +1139,7 @@ struct processor_costs ppca2_cost = { /* Table that classifies rs6000 builtin functions (pure, const, etc.). */ +#undef RS6000_BUILTIN_0 #undef RS6000_BUILTIN_1 #undef RS6000_BUILTIN_2 #undef RS6000_BUILTIN_3 @@ -1140,6 +1152,9 @@ struct processor_costs ppca2_cost = { #undef RS6000_BUILTIN_S #undef RS6000_BUILTIN_X +#define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \ + { NAME, ICODE, MASK, ATTR }, + #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \ { NAME, ICODE, MASK, ATTR }, @@ -1185,6 +1200,7 @@ static const struct rs6000_builtin_info_type rs6000_builtin_info[] = #include "rs6000-builtin.def" }; +#undef RS6000_BUILTIN_0 #undef RS6000_BUILTIN_1 #undef RS6000_BUILTIN_2 #undef RS6000_BUILTIN_3 @@ -1312,6 +1328,7 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type, bool); rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused)); +static tree rs6000_fold_builtin (tree, int, tree *, bool); /* Hash table stuff for keeping track of TOC entries. */ @@ -1586,6 +1603,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_BUILTIN_DECL #define TARGET_BUILTIN_DECL rs6000_builtin_decl +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN rs6000_fold_builtin + #undef TARGET_EXPAND_BUILTIN #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin @@ -1866,7 +1886,7 @@ rs6000_hard_regno_nregs_internal (int regno, machine_mode mode) 128-bit floating point that can go in vector registers, which has VSX memory addressing. */ if (FP_REGNO_P (regno)) - reg_size = (VECTOR_MEM_VSX_P (mode) + reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode) ? UNITS_PER_VSX_WORD : UNITS_PER_FP_WORD); @@ -1898,6 +1918,9 @@ rs6000_hard_regno_mode_ok (int regno, machine_mode mode) { int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; + if (COMPLEX_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + /* PTImode can only go in GPRs. Quad word memory operations require even/odd register combinations, and use PTImode where we need to deal with quad word memory operations. Don't allow quad words in the argument or frame @@ -2105,7 +2128,9 @@ rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces) else if (keep_spaces) *p++ = ' '; - if ((mask & RELOAD_REG_OFFSET) != 0) + if ((mask & RELOAD_REG_QUAD_OFFSET) != 0) + *p++ = 'O'; + else if ((mask & RELOAD_REG_OFFSET) != 0) *p++ = 'o'; else if (keep_spaces) *p++ = ' '; @@ -2642,8 +2667,7 @@ rs6000_debug_reg_global (void) if (TARGET_LINK_STACK) fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); - if (targetm.lra_p ()) - fprintf (stderr, DEBUG_FMT_S, "lra", "true"); + fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false"); if (TARGET_P8_FUSION) { @@ -2699,8 +2723,17 @@ rs6000_setup_reg_addr_masks (void) for (m = 0; m < NUM_MACHINE_MODES; ++m) { - machine_mode m2 = (machine_mode)m; - unsigned short msize = GET_MODE_SIZE (m2); + machine_mode m2 = (machine_mode) m; + bool complex_p = false; + size_t msize; + + if (COMPLEX_MODE_P (m2)) + { + complex_p = true; + m2 = GET_MODE_INNER (m2); + } + + msize = GET_MODE_SIZE (m2); /* SDmode is special in that we want to access it only via REG+REG addressing on power7 and above, since we want to use the LFIWZX and @@ -2722,7 +2755,7 @@ rs6000_setup_reg_addr_masks (void) /* Indicate if the mode takes more than 1 physical register. If it takes a single register, indicate it can do REG+REG addressing. */ - if (nregs > 1 || m == BLKmode) + if (nregs > 1 || m == BLKmode || complex_p) addr_mask |= RELOAD_REG_MULTIPLE; else addr_mask |= RELOAD_REG_INDEXED; @@ -2738,7 +2771,7 @@ rs6000_setup_reg_addr_masks (void) && msize <= 8 && !VECTOR_MODE_P (m2) && !FLOAT128_VECTOR_P (m2) - && !COMPLEX_MODE_P (m2) + && !complex_p && (m2 != DFmode || !TARGET_UPPER_REGS_DF) && (m2 != SFmode || !TARGET_UPPER_REGS_SF) && !(TARGET_E500_DOUBLE && msize == 8)) @@ -2769,17 +2802,31 @@ rs6000_setup_reg_addr_masks (void) } /* GPR and FPR registers can do REG+OFFSET addressing, except - possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form - addressing for scalars to altivec registers. */ + possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing + for 64-bit scalars and 32-bit SFmode to altivec registers. */ if ((addr_mask != 0) && !indexed_only_p && msize <= 8 && (rc == RELOAD_REG_GPR - || rc == RELOAD_REG_FPR - || (rc == RELOAD_REG_VMX - && TARGET_P9_DFORM - && (m2 == DFmode || m2 == SFmode)))) + || ((msize == 8 || m2 == SFmode) + && (rc == RELOAD_REG_FPR + || (rc == RELOAD_REG_VMX + && TARGET_P9_DFORM_SCALAR))))) addr_mask |= RELOAD_REG_OFFSET; + /* VSX registers can do REG+OFFSET addresssing if ISA 3.0 + instructions are enabled. The offset for 128-bit VSX registers is + only 12-bits. While GPRs can handle the full offset range, VSX + registers can only handle the restricted range. */ + else if ((addr_mask != 0) && !indexed_only_p + && msize == 16 && TARGET_P9_DFORM_VECTOR + && (ALTIVEC_OR_VSX_VECTOR_MODE (m2) + || (m2 == TImode && TARGET_VSX_TIMODE))) + { + addr_mask |= RELOAD_REG_OFFSET; + if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) + addr_mask |= RELOAD_REG_QUAD_OFFSET; + } + /* VMX registers can do (REG & -16) and ((REG+REG) & -16) addressing on 128-bit types. */ if (rc == RELOAD_REG_VMX && msize == 16 @@ -3102,7 +3149,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) } /* Support for new D-form instructions. */ - if (TARGET_P9_DFORM) + if (TARGET_P9_DFORM_SCALAR) rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS; /* Support for ISA 3.0 (power9) vectors. */ @@ -3621,11 +3668,16 @@ rs6000_builtin_mask_calculate (void) | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) + | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0) + | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0) + | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0) + | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0) | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0) | ((TARGET_HTM) ? RS6000_BTM_HTM : 0) | ((TARGET_DFP) ? RS6000_BTM_DFP : 0) | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0) - | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)); + | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0) + | ((TARGET_FLOAT128) ? RS6000_BTM_FLOAT128 : 0)); } /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered @@ -3784,22 +3836,7 @@ rs6000_option_override_internal (bool global_init_p) if (rs6000_tune_index >= 0) tune_index = rs6000_tune_index; else if (have_cpu) - { - /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */ - if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9) - rs6000_tune_index = tune_index = cpu_index; - else - { - size_t i; - tune_index = -1; - for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) - if (processor_target_table[i].processor == PROCESSOR_POWER8) - { - rs6000_tune_index = tune_index = i; - break; - } - } - } + rs6000_tune_index = tune_index = cpu_index; else { size_t i; @@ -3974,7 +4011,8 @@ rs6000_option_override_internal (bool global_init_p) /* For the newer switches (vsx, dfp, etc.) set some of the older options, unless the user explicitly used the -mno-