diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 17:20:51 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 17:20:51 +0000 |
commit | 2d9d01985a7a7866916fafa19c5c296702e69714 (patch) | |
tree | 259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config/rs6000 | |
parent | c8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff) | |
download | gcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz |
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{merging with even more of GCC 6, using subversion 1.9
svn merge -r227001:227400 ^/trunk ;
there is some gengtype issue before svn r228000... }}
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233281 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r-- | gcc/config/rs6000/altivec.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/predicates.md | 31 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-builtin.def | 5 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-c.c | 21 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-cpus.def | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 343 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 458 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/sysv4.h | 6 | ||||
-rw-r--r-- | gcc/config/rs6000/sysv4le.h | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/vector.md | 23 |
12 files changed, 757 insertions, 141 deletions
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 3ef6bc85ecd..1c00099c78d 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -208,6 +208,8 @@ #define vec_lvebx __builtin_vec_lvebx #define vec_lvehx __builtin_vec_lvehx #define vec_lvewx __builtin_vec_lvewx +#define vec_pmsum_be __builtin_vec_vpmsum +#define vec_shasigma_be __builtin_crypto_vshasigma /* Cell only intrinsics. */ #ifdef __PPU__ #define vec_lvlx __builtin_vec_lvlx diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index ae74796849d..3edb4774e75 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -239,6 +239,25 @@ return INT_REGNO_P (REGNO (op)); }) +;; Like int_reg_operand, but don't return true for pseudo registers +(define_predicate "int_reg_operand_not_pseudo" + (match_operand 0 "register_operand") +{ + if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode)) + return 0; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (!REG_P (op)) + return 0; + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 0; + + return INT_REGNO_P (REGNO (op)); +}) + ;; Like int_reg_operand, but only return true for base registers (define_predicate "base_reg_operand" (match_operand 0 "int_reg_operand") @@ -883,12 +902,12 @@ (define_predicate "current_file_function_operand" (and (match_code "symbol_ref") (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op)) - && ((SYMBOL_REF_LOCAL_P (op) - && ((DEFAULT_ABI != ABI_AIX - && DEFAULT_ABI != ABI_ELFv2) - || !SYMBOL_REF_EXTERNAL_P (op))) - || (op == XEXP (DECL_RTL (current_function_decl), - 0)))"))) + && (SYMBOL_REF_LOCAL_P (op) + || op == XEXP (DECL_RTL (current_function_decl), 0)) + && !((DEFAULT_ABI == ABI_AIX + || DEFAULT_ABI == ABI_ELFv2) + && (SYMBOL_REF_EXTERNAL_P (op) + || SYMBOL_REF_WEAK (op)))"))) ;; Return 1 if this operand is a valid input for a move insn. (define_predicate "input_operand" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 7beddf64d1b..85082ec0ee2 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1489,6 +1489,10 @@ BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum) BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpksdus) +BU_P8V_AV_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_P8V_AV_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_P8V_AV_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_P8V_AV_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3) BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3) BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) @@ -1570,6 +1574,7 @@ BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss") BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus") BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum") BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus") +BU_P8V_OVERLOAD_2 (VPMSUM, "vpmsum") BU_P8V_OVERLOAD_2 (VRLD, "vrld") BU_P8V_OVERLOAD_2 (VSLD, "vsld") BU_P8V_OVERLOAD_2 (VSRAD, "vsrad") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index d45bc93b10a..5fc2b53adfe 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -2937,6 +2937,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, @@ -4171,6 +4179,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 9fd565286f2..03764aef740 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -53,6 +53,7 @@ | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DIRECT_MOVE \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_HTM \ | OPTION_MASK_QUAD_MEMORY \ | OPTION_MASK_QUAD_MEMORY_ATOMIC \ @@ -78,6 +79,7 @@ | OPTION_MASK_DFP \ | OPTION_MASK_DIRECT_MOVE \ | OPTION_MASK_DLMZB \ + | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_FPRND \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 7262a151438..7be529fab49 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -54,6 +54,7 @@ extern const char *output_vec_const_move (rtx *); extern const char *rs6000_output_move_128bit (rtx *); extern bool rs6000_move_128bit_ok_p (rtx []); extern bool rs6000_split_128bit_ok_p (rtx []); +extern void rs6000_expand_float128_convert (rtx, rtx, bool); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2a969782f26..8107bec8e6e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3716,6 +3716,45 @@ rs6000_option_override_internal (bool global_init_p) else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX) error ("-mfloat128-software requires VSX support"); + /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 + support. If we only have ISA 2.06 support, and the user did not specify + the switch, leave it set to -1 so the movmisalign patterns are enabled, + but we don't enable the full vectorization support */ + if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) + TARGET_ALLOW_MOVMISALIGN = 1; + + else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) + { + if (TARGET_ALLOW_MOVMISALIGN > 0) + error ("-mallow-movmisalign requires -mvsx"); + + TARGET_ALLOW_MOVMISALIGN = 0; + } + + /* Determine when unaligned vector accesses are permitted, and when + they are preferred over masked Altivec loads. Note that if + TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then + TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is + not true. */ + if (TARGET_EFFICIENT_UNALIGNED_VSX) + { + if (!TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mvsx"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + + else if (!TARGET_ALLOW_MOVMISALIGN) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mallow-movmisalign"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + } + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -4275,22 +4314,6 @@ rs6000_option_override_internal (bool global_init_p) } } - /* Determine when unaligned vector accesses are permitted, and when - they are preferred over masked Altivec loads. Note that if - TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then - TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is - not true. */ - if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) { - if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8 - && TARGET_ALLOW_MOVMISALIGN != 0) - TARGET_EFFICIENT_UNALIGNED_VSX = 1; - else - TARGET_EFFICIENT_UNALIGNED_VSX = 0; - } - - if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8) - TARGET_ALLOW_MOVMISALIGN = 1; - /* Set the builtin mask of the various options used that could affect which builtins were used. In the past we used target_flags, but we've run out of bits, and some options like SPE and PAIRED are no longer in @@ -8462,7 +8485,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) during expand. */ gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); - /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, + /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, V1TImode). */ if (mode == TImode || mode == V1TImode) { @@ -18519,6 +18542,8 @@ rs6000_cannot_change_mode_class (machine_mode from, { unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to]; unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from]; + bool to_float128_vector_p = FLOAT128_VECTOR_P (to); + bool from_float128_vector_p = FLOAT128_VECTOR_P (from); /* Don't allow 64-bit types to overlap with 128-bit types that take a single register under VSX because the scalar part of the register @@ -18527,7 +18552,10 @@ rs6000_cannot_change_mode_class (machine_mode from, IEEE floating point can't overlap, and neither can small values. */ - if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode)) + if (to_float128_vector_p && from_float128_vector_p) + return false; + + else if (to_float128_vector_p || from_float128_vector_p) return true; /* TDmode in floating-mode registers must always go into a register @@ -18555,6 +18583,8 @@ rs6000_cannot_change_mode_class (machine_mode from, if (TARGET_E500_DOUBLE && ((((to) == DFmode) + ((from) == DFmode)) == 1 || (((to) == TFmode) + ((from) == TFmode)) == 1 + || (((to) == IFmode) + ((from) == IFmode)) == 1 + || (((to) == KFmode) + ((from) == KFmode)) == 1 || (((to) == DDmode) + ((from) == DDmode)) == 1 || (((to) == TDmode) + ((from) == TDmode)) == 1 || (((to) == DImode) + ((from) == DImode)) == 1)) @@ -18751,13 +18781,7 @@ rs6000_output_move_128bit (rtx operands[]) return output_vec_const_move (operands); } - if (TARGET_DEBUG_ADDR) - { - fprintf (stderr, "\n===== Bad 128 bit move:\n"); - debug_rtx (gen_rtx_SET (dest, src)); - } - - gcc_unreachable (); + fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); } /* Validate a 128-bit move. */ @@ -19801,6 +19825,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfeq_gpr (compare_result, op0, op1) : gen_cmptfeq_gpr (compare_result, op0, op1); @@ -19828,6 +19854,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfgt_gpr (compare_result, op0, op1) : gen_cmptfgt_gpr (compare_result, op0, op1); @@ -19855,6 +19883,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttflt_gpr (compare_result, op0, op1) : gen_cmptflt_gpr (compare_result, op0, op1); @@ -19892,6 +19922,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfeq_gpr (compare_result2, op0, op1) : gen_cmptfeq_gpr (compare_result2, op0, op1); @@ -19914,14 +19946,117 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) emit_insn (cmp); } + + /* IEEE 128-bit support in VSX registers. The comparison function (__cmpkf2) + returns 0..15 that is laid out the same way as the PowerPC CR register + would for a normal floating point comparison. */ + else if (FLOAT128_IEEE_P (mode)) + { + rtx and_reg = gen_reg_rtx (SImode); + rtx dest = gen_reg_rtx (SImode); + rtx libfunc = optab_libfunc (cmp_optab, mode); + HOST_WIDE_INT mask_value = 0; + + /* Values that __cmpkf2 returns. */ +#define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */ +#define PPC_CMP_EQUAL 0x2 /* a == b. */ +#define PPC_CMP_GREATER_THEN 0x4 /* a > b. */ +#define PPC_CMP_LESS_THEN 0x8 /* a < b. */ + + switch (code) + { + case EQ: + mask_value = PPC_CMP_EQUAL; + code = NE; + break; + + case NE: + mask_value = PPC_CMP_EQUAL; + code = EQ; + break; + + case GT: + mask_value = PPC_CMP_GREATER_THEN; + code = NE; + break; + + case GE: + mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL; + code = NE; + break; + + case LT: + mask_value = PPC_CMP_LESS_THEN; + code = NE; + break; + + case LE: + mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL; + code = NE; + break; + + case UNLE: + mask_value = PPC_CMP_GREATER_THEN; + code = EQ; + break; + + case UNLT: + mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL; + code = EQ; + break; + + case UNGE: + mask_value = PPC_CMP_LESS_THEN; + code = EQ; + break; + + case UNGT: + mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL; + code = EQ; + break; + + case UNEQ: + mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED; + code = NE; + + case LTGT: + mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED; + code = EQ; + break; + + case UNORDERED: + mask_value = PPC_CMP_UNORDERED; + code = NE; + break; + + case ORDERED: + mask_value = PPC_CMP_UNORDERED; + code = EQ; + break; + + default: + gcc_unreachable (); + } + + gcc_assert (mask_value != 0); + and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2, + op0, mode, op1, mode); + + emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value))); + compare_result = gen_reg_rtx (CCmode); + comp_mode = CCmode; + + emit_insn (gen_rtx_SET (compare_result, + gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); + } + else { /* Generate XLC-compatible TFmode compare as PARALLEL with extra CLOBBERs to match cmptf_internal2 pattern. */ if (comp_mode == CCFPmode && TARGET_XL_COMPAT - && GET_MODE (op0) == TFmode - && !TARGET_IEEEQUAD - && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128) + && FLOAT128_IBM_P (GET_MODE (op0)) + && TARGET_HARD_FLOAT && TARGET_FPRS) emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (10, gen_rtx_SET (compare_result, @@ -19954,6 +20089,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) /* Some kinds of FP comparisons need an OR operation; under flag_finite_math_only we don't bother. */ if (FLOAT_MODE_P (mode) + && !FLOAT128_IEEE_P (mode) && !flag_finite_math_only && !(TARGET_HARD_FLOAT && !TARGET_FPRS) && (code == LE || code == GE @@ -19993,6 +20129,68 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) } +/* Expand floating point conversion to/from __float128 and __ibm128. */ + +void +rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) +{ + machine_mode dest_mode = GET_MODE (dest); + machine_mode src_mode = GET_MODE (src); + convert_optab cvt = unknown_optab; + rtx libfunc = NULL_RTX; + rtx dest2; + + if (dest_mode == src_mode) + gcc_unreachable (); + + if (FLOAT128_IEEE_P (dest_mode)) + { + if (src_mode == SFmode + || src_mode == DFmode + || FLOAT128_IBM_P (src_mode)) + cvt = sext_optab; + + else if (GET_MODE_CLASS (src_mode) == MODE_INT) + cvt = (unsigned_p) ? ufloat_optab : sfloat_optab; + + else if (FLOAT128_IEEE_P (src_mode)) + emit_move_insn (dest, gen_lowpart (dest_mode, src)); + + else + gcc_unreachable (); + } + + else if (FLOAT128_IEEE_P (src_mode)) + { + if (dest_mode == SFmode + || dest_mode == DFmode + || FLOAT128_IBM_P (dest_mode)) + cvt = trunc_optab; + + else if (GET_MODE_CLASS (dest_mode) == MODE_INT) + cvt = (unsigned_p) ? ufix_optab : sfix_optab; + + else + gcc_unreachable (); + } + + else + gcc_unreachable (); + + gcc_assert (cvt != unknown_optab); + libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); + gcc_assert (libfunc != NULL_RTX); + + dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src, + src_mode); + + gcc_assert (dest != NULL_RTX); + if (!rtx_equal_p (dest, dest2)) + emit_move_insn (dest, dest2); + + return; +} + /* Emit the RTL for an sISEL pattern. */ void @@ -22635,6 +22833,7 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp) || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && decl && !DECL_EXTERNAL (decl) + && !DECL_WEAK (decl) && (*targetm.binds_local_p) (decl)) || (DEFAULT_ABI == ABI_V4 && (!TARGET_SECURE_PLT @@ -32921,6 +33120,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "crypto", OPTION_MASK_CRYPTO, false, true }, { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, { "dlmzb", OPTION_MASK_DLMZB, false, true }, + { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, + false, true }, { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, { "htm", OPTION_MASK_HTM, false, true }, @@ -34786,7 +34987,7 @@ class swap_web_entry : public web_entry_base /* A nonzero value indicates what kind of special handling for this insn is required if doublewords are swapped. Undefined if is_swappable is not set. */ - unsigned int special_handling : 3; + unsigned int special_handling : 4; /* Set if the web represented by this entry cannot be optimized. */ unsigned int web_not_optimizable : 1; /* Set if this insn should be deleted. */ @@ -34800,7 +35001,9 @@ enum special_handling_values { SH_NOSWAP_LD, SH_NOSWAP_ST, SH_EXTRACT, - SH_SPLAT + SH_SPLAT, + SH_XXPERMDI, + SH_CONCAT }; /* Union INSN with all insns containing definitions that reach USE. @@ -34992,6 +35195,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special) *special = SH_EXTRACT; return 1; } + /* An XXPERMDI is ok if we adjust the lanes. Note that if the + XXPERMDI is a swap operation, it will be identified by + insn_is_swap_p and therefore we won't get here. */ + else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT + && (GET_MODE (XEXP (op, 0)) == V4DFmode + || GET_MODE (XEXP (op, 0)) == V4DImode) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 2 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT + && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT) + { + *special = SH_XXPERMDI; + return 1; + } else return 0; @@ -35169,6 +35386,17 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, return 1; } + /* A concatenation of two doublewords is ok if we reverse the + order of the inputs. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == VEC_CONCAT + && (GET_MODE (SET_SRC (body)) == V2DFmode + || GET_MODE (SET_SRC (body)) == V2DImode)) + { + *special = SH_CONCAT; + return 1; + } + /* Otherwise check the operands for vector lane violations. */ return rtx_is_swappable_p (body, special); } @@ -35458,6 +35686,49 @@ adjust_splat (rtx_insn *insn) fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); } +/* Given OP that contains an XXPERMDI operation (that is not a doubleword + swap), reverse the order of the source operands and adjust the indices + of the source lanes to account for doubleword reversal. */ +static void +adjust_xxpermdi (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx select = XEXP (set, 1); + rtx concat = XEXP (select, 0); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + rtx parallel = XEXP (select, 1); + int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); + int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); + int new_lane0 = 3 - lane1; + int new_lane1 = 3 - lane0; + XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); + XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); +} + +/* Given OP that contains a VEC_CONCAT operation of two doublewords, + reverse the order of those inputs. */ +static void +adjust_concat (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx concat = XEXP (set, 1); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); +} + /* The insn described by INSN_ENTRY[I] can be swapped, but only with special handling. Take care of that here. */ static void @@ -35504,6 +35775,14 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) /* Change the lane on a direct-splat operation. */ adjust_splat (insn); break; + case SH_XXPERMDI: + /* Change the lanes on an XXPERMDI operation. */ + adjust_xxpermdi (insn); + break; + case SH_CONCAT: + /* Reverse the order of a concatenation operation. */ + adjust_concat (insn); + break; } } @@ -35576,6 +35855,10 @@ dump_swap_insn_table (swap_web_entry *insn_entry) fputs ("special:extract ", dump_file); else if (insn_entry[i].special_handling == SH_SPLAT) fputs ("special:splat ", dump_file); + else if (insn_entry[i].special_handling == SH_XXPERMDI) + fputs ("special:xxpermdi ", dump_file); + else if (insn_entry[i].special_handling == SH_CONCAT) + fputs ("special:concat ", dump_file); } if (insn_entry[i].web_not_optimizable) fputs ("unoptimizable ", dump_file); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 527ad985423..cfdb286a2cb 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -348,6 +348,8 @@ && TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128") + (IF "TARGET_FLOAT128") + (KF "TARGET_FLOAT128") (DD "TARGET_DFP") (TD "TARGET_DFP")]) @@ -365,9 +367,14 @@ (define_mode_iterator FMOVE32 [SF SD]) (define_mode_iterator FMOVE64 [DF DD]) (define_mode_iterator FMOVE64X [DI DF DD]) -(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128") +(define_mode_iterator FMOVE128 [(TF "TARGET_LONG_DOUBLE_128") + (IF "TARGET_LONG_DOUBLE_128") (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) +(define_mode_iterator FMOVE128_FPR [(TF "FLOAT128_2REG_P (TFmode)") + (IF "FLOAT128_2REG_P (IFmode)") + (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) + ; Iterators for 128 bit types for direct move (define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE") (V16QI "") @@ -376,7 +383,13 @@ (V4SF "") (V2DI "") (V2DF "") - (V1TI "")]) + (V1TI "") + (KF "") + (TF "") + (IF "")]) + +; Iterator for 128-bit VSX types for pack/unpack +(define_mode_iterator FMOVE128_VSX [V1TI KF]) ; Whether a floating point move is ok, don't allow SD without hardware FP (define_mode_attr fmove_ok [(SF "") @@ -432,6 +445,25 @@ ; Iterator for just SF/DF (define_mode_iterator SFDF [SF DF]) +; Iterator for float128 floating conversions +(define_mode_iterator FLOAT128_SFDFTF [ + (SF "TARGET_FLOAT128") + (DF "TARGET_FLOAT128") + (TF "FLOAT128_IBM_P (TFmode)") + (IF "TARGET_FLOAT128")]) + +; Iterator for special 128-bit floating point. This is for non-default +; conversions, so TFmode is not used here. +(define_mode_iterator IFKF [IF KF]) + +; Iterator for 128-bit floating point that uses the IBM double-double format +(define_mode_iterator IBM128 [IF TF]) + +; Iterator for 128-bit floating point +(define_mode_iterator TFIFKF [(KF "TARGET_FLOAT128") + (IF "TARGET_FLOAT128") + (TF "TARGET_LONG_DOUBLE_128")]) + ; SF/DF suffix for traditional floating instructions (define_mode_attr Ftrad [(SF "s") (DF "")]) @@ -596,7 +628,7 @@ ;; Reload iterator for creating the function to allocate a base register to ;; supplement addressing modes. (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI PTI]) + SF SD SI DF DD DI TI PTI KF IF TF]) ;; Start with fixed-point load and store insns. Here we put only the more @@ -3037,15 +3069,15 @@ (set_attr "dot" "yes") (set_attr "length" "4,8")]) -(define_insn_and_split "*and<mode>3_imm_dot_shifted" - [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y") +(define_insn "*and<mode>3_imm_dot_shifted" + [(set (match_operand:CC 3 "cc_reg_operand" "=x") (compare:CC (and:GPR - (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r") - (match_operand:SI 4 "const_int_operand" "n,n")) - (match_operand:GPR 2 "const_int_operand" "n,n")) + (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r") + (match_operand:SI 4 "const_int_operand" "n")) + (match_operand:GPR 2 "const_int_operand" "n")) (const_int 0))) - (clobber (match_scratch:GPR 0 "=r,r"))] + (clobber (match_scratch:GPR 0 "=r"))] "logical_const_operand (GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4])), DImode) @@ -3054,23 +3086,10 @@ && rs6000_gen_cell_microcode" { operands[2] = GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4])); - if (which_alternative == 0) - return "andi%e2. %0,%1,%u2"; - else - return "#"; + return "andi%e2. %0,%1,%u2"; } - "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)" - [(set (match_dup 0) - (and:GPR (lshiftrt:GPR (match_dup 1) - (match_dup 4)) - (match_dup 2))) - (set (match_dup 3) - (compare:CC (match_dup 0) - (const_int 0)))] - "" [(set_attr "type" "logical") - (set_attr "dot" "yes") - (set_attr "length" "4,8")]) + (set_attr "dot" "yes")]) (define_insn "and<mode>3_mask" @@ -3664,10 +3683,10 @@ ; an insert instruction, in many cases. (define_insn_and_split "*ior<mode>_mask" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") - (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") - (match_operand:GPR 2 "const_int_operand" "n")))] - "can_create_pseudo_p () - && !logical_const_operand (operands[2], <MODE>mode) + (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "0") + (match_operand:GPR 2 "const_int_operand" "n"))) + (clobber (match_scratch:GPR 3 "=r"))] + "!logical_const_operand (operands[2], <MODE>mode) && rs6000_is_valid_mask (operands[2], NULL, NULL, <MODE>mode)" "#" "&& 1" @@ -3682,7 +3701,8 @@ { int nb, ne; rs6000_is_valid_mask (operands[2], &nb, &ne, <MODE>mode); - operands[3] = gen_reg_rtx (<MODE>mode); + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = gen_reg_rtx (<MODE>mode); operands[4] = GEN_INT (ne); operands[5] = GEN_INT (~UINTVAL (operands[2])); } @@ -4216,19 +4236,18 @@ ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in ;; builtins.c and optabs.c that are not correct for IBM long double ;; when little-endian. -(define_expand "signbittf2" +(define_expand "signbit<mode>2" [(set (match_dup 2) - (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" ""))) + (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" ""))) (set (match_dup 3) (subreg:DI (match_dup 2) 0)) (set (match_dup 4) (match_dup 5)) (set (match_operand:SI 0 "gpc_reg_operand" "") (match_dup 6))] - "!TARGET_IEEEQUAD + "FLOAT128_IBM_P (<MODE>mode) && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) - && TARGET_LONG_DOUBLE_128" + && (TARGET_FPRS || TARGET_E500_DOUBLE)" { operands[2] = gen_reg_rtx (DFmode); operands[3] = gen_reg_rtx (DImode); @@ -6402,9 +6421,10 @@ ;; problematical. Don't allow direct move for this case. (define_insn_and_split "*mov<mode>_64bit_dm" - [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm") - (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))] + [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm") + (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 + && FLOAT128_2REG_P (<MODE>mode) && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -6427,9 +6447,12 @@ [(set_attr "length" "8,8,8,8,12,12,8")]) (define_insn_and_split "*mov<mode>_32bit" - [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r") - (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r"))] + [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r") + (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r"))] "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64 + && (FLOAT128_2REG_P (<MODE>mode) + || int_reg_operand_not_pseudo (operands[0], <MODE>mode) + || int_reg_operand_not_pseudo (operands[1], <MODE>mode)) && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" "#" @@ -6453,12 +6476,12 @@ (define_expand "extenddftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") (float_extend:TF (match_operand:DF 1 "input_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - if (TARGET_E500_DOUBLE) + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) emit_insn (gen_spe_extenddftf2 (operands[0], operands[1])); else emit_insn (gen_extenddftf2_fprs (operands[0], operands[1])); @@ -6507,25 +6530,34 @@ (define_expand "extendsftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") (float_extend:TF (match_operand:SF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - rtx tmp = gen_reg_rtx (DFmode); - emit_insn (gen_extendsfdf2 (tmp, operands[1])); - emit_insn (gen_extenddftf2 (operands[0], tmp)); + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else + { + rtx tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extendsfdf2 (tmp, operands[1])); + emit_insn (gen_extenddftf2 (operands[0], tmp)); + } DONE; }) (define_expand "trunctfdf2" [(set (match_operand:DF 0 "gpc_reg_operand" "") (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" - "") +{ + if (TARGET_IEEEQUAD) + { + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; + } +}) (define_insn_and_split "trunctfdf2_internal1" [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d") @@ -6556,12 +6588,13 @@ (define_expand "trunctfsf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") (float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - if (TARGET_E500_DOUBLE) + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1])); else emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1])); @@ -6612,10 +6645,12 @@ (define_expand "fix_trunctfsi2" [(set (match_operand:SI 0 "gpc_reg_operand" "") (fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD && TARGET_HARD_FLOAT + "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128" { - if (TARGET_E500_DOUBLE) + if (TARGET_IEEEQUAD) + rs6000_expand_float128_convert (operands[0], operands[1], false); + else if (TARGET_E500_DOUBLE) emit_insn (gen_spe_fix_trunctfsi2 (operands[0], operands[1])); else emit_insn (gen_fix_trunctfsi2_fprs (operands[0], operands[1])); @@ -6663,20 +6698,73 @@ DONE; }) -(define_expand "negtf2" - [(set (match_operand:TF 0 "gpc_reg_operand" "") - (neg:TF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) - && TARGET_LONG_DOUBLE_128" - "") +(define_expand "fix_trunctfdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:TF 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "fixuns_trunctf<mode>2" + [(set (match_operand:SDI 0 "nonimmediate_operand" "") + (unsigned_fix:SDI (match_operand:TF 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + +(define_expand "floatditf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float:TF (match_operand:DI 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "floatuns<mode>tf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (unsigned_float:TF (match_operand:SDI 1 "gpc_reg_operand" "")))] + "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + +(define_expand "neg<mode>2" + [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "") + (neg:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))] + "FLOAT128_IEEE_P (<MODE>mode) + || (FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE))" + " +{ + if (FLOAT128_IEEE_P (<MODE>mode)) + { + if (TARGET_FLOAT128) + emit_insn (gen_ieee_128bit_vsx_neg<mode>2 (operands[0], operands[1])); + else + { + rtx libfunc = optab_libfunc (neg_optab, <MODE>mode); + rtx target = emit_library_call_value (libfunc, operands[0], LCT_CONST, + <MODE>mode, 1, + operands[1], <MODE>mode); + + if (target && !rtx_equal_p (target, operands[0])) + emit_move_insn (operands[0], target); + } + DONE; + } +}") (define_insn "negtf2_internal" [(set (match_operand:TF 0 "gpc_reg_operand" "=d") (neg:TF (match_operand:TF 1 "gpc_reg_operand" "d")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128" + "TARGET_HARD_FLOAT && TARGET_FPRS && FLOAT128_IBM_P (TFmode)" "* { if (REGNO (operands[0]) == REGNO (operands[1]) + 1) @@ -6687,16 +6775,29 @@ [(set_attr "type" "fp") (set_attr "length" "8")]) -(define_expand "abstf2" - [(set (match_operand:TF 0 "gpc_reg_operand" "") - (abs:TF (match_operand:TF 1 "gpc_reg_operand" "")))] - "!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE) - && TARGET_LONG_DOUBLE_128" +(define_expand "abs<mode>2" + [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "") + (abs:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))] + "FLOAT128_IEEE_P (<MODE>mode) + || (FLOAT128_IBM_P (<MODE>mode) + && TARGET_HARD_FLOAT + && (TARGET_FPRS || TARGET_E500_DOUBLE))" " { - rtx label = gen_label_rtx (); + rtx label; + + if (FLOAT128_IEEE_P (<MODE>mode)) + { + if (TARGET_FLOAT128) + { + emit_insn (gen_ieee_128bit_vsx_abs<mode>2 (operands[0], operands[1])); + DONE; + } + else + FAIL; + } + + label = gen_label_rtx (); if (TARGET_E500_DOUBLE) { if (flag_finite_math_only && !flag_trapping_math) @@ -6732,6 +6833,184 @@ operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word); operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word); }") + + +;; Generate IEEE 128-bit -0.0 (0x80000000000000000000000000000000) in a vector +;; register + +(define_expand "ieee_128bit_negative_zero" + [(set (match_operand:V16QI 0 "register_operand" "") (match_dup 1))] + "TARGET_FLOAT128" +{ + rtvec v = rtvec_alloc (16); + int i, high; + + for (i = 0; i < 16; i++) + RTVEC_ELT (v, i) = const0_rtx; + + high = (BYTES_BIG_ENDIAN) ? 0 : 15; + RTVEC_ELT (v, high) = GEN_INT (0x80); + + rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v)); + DONE; +}) + +;; IEEE 128-bit negate + +;; We have 2 insns here for negate and absolute value. The first uses +;; match_scratch so that phases like combine can recognize neg/abs as generic +;; insns, and second insn after the first split pass loads up the bit to +;; twiddle the sign bit. Later GCSE passes can then combine multiple uses of +;; neg/abs to create the constant just once. + +(define_insn_and_split "ieee_128bit_vsx_neg<mode>2" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (neg:TFIFKF (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_neg<mode>2_internal" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (use (match_operand:V16QI 2 "register_operand" "=v"))] + "TARGET_FLOAT128" + "xxlxor %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +;; IEEE 128-bit absolute value +(define_insn_and_split "ieee_128bit_vsx_abs<mode>2" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (abs:TFIFKF (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_abs<mode>2_internal" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa"))) + (use (match_operand:V16QI 2 "register_operand" "=v"))] + "TARGET_FLOAT128" + "xxlandc %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +;; IEEE 128-bit negative absolute value +(define_insn_and_split "*ieee_128bit_vsx_nabs<mode>2" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF + (abs:TFIFKF + (match_operand:TFIFKF 1 "register_operand" "wa")))) + (clobber (match_scratch:V16QI 2 "=v"))] + "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (abs:TFIFKF (match_dup 1))) + (use (match_dup 2))])] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (V16QImode); + + operands[3] = gen_reg_rtx (V16QImode); + emit_insn (gen_ieee_128bit_negative_zero (operands[2])); +} + [(set_attr "length" "8") + (set_attr "type" "vecsimple")]) + +(define_insn "*ieee_128bit_vsx_nabs<mode>2_internal" + [(set (match_operand:TFIFKF 0 "register_operand" "=wa") + (neg:TFIFKF + (abs:TFIFKF + (match_operand:TFIFKF 1 "register_operand" "wa")))) + (use (match_operand:V16QI 2 "register_operand" "=v"))] + "TARGET_FLOAT128" + "xxlor %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + +;; Float128 conversion functions. These expand to library function calls. + +(define_expand "extend<FLOAT128_SFDFTF:mode><IFKF:mode>2" + [(set (match_operand:IFKF 0 "nonimmediate_operand" "") + (float_extend:IFKF + (match_operand:FLOAT128_SFDFTF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "trunc<IFKF:mode><FLOAT128_SFDFTF:mode>2" + [(set (match_operand:FLOAT128_SFDFTF 0 "nonimmediate_operand" "") + (float_truncate:FLOAT128_SFDFTF + (match_operand:IFKF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "fix_trunc<IFKF:mode><SDI:mode>2" + [(set (match_operand:SDI 0 "nonimmediate_operand" "") + (fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "fixuns_trunc<IFKF:mode><SDI:mode>2" + [(set (match_operand:SDI 0 "nonimmediate_operand" "") + (unsigned_fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + +(define_expand "float<SDI:mode><IFKF:mode>2" + [(set (match_operand:IFKF 0 "nonimmediate_operand" "") + (float:KF (match_operand:SDI 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], false); + DONE; +}) + +(define_expand "floatuns<SDI:mode><IFKF:mode>2" + [(set (match_operand:IFKF 0 "nonimmediate_operand" "") + (unsigned_float:IFKF (match_operand:SDI 1 "gpc_reg_operand" "")))] + "TARGET_FLOAT128" +{ + rs6000_expand_float128_convert (operands[0], operands[1], true); + DONE; +}) + ;; Reload helper functions used by rs6000_secondary_reload. The patterns all ;; must have 3 arguments, and scratch register constraint must be a single @@ -9516,7 +9795,7 @@ { int i; - emit_call_insn (GEN_CALL (operands[0], const0_rtx, const0_rtx, const0_rtx)); + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -12134,7 +12413,10 @@ ;; Pack/unpack 128-bit floating point types that take 2 scalar registers ; Type of the 64-bit part when packing/unpacking 128-bit floating point types -(define_mode_attr FP128_64 [(TF "DF") (TD "DI")]) +(define_mode_attr FP128_64 [(TF "DF") + (IF "DF") + (TD "DI") + (KF "DI")]) (define_expand "unpack<mode>" [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "") @@ -12142,7 +12424,7 @@ [(match_operand:FMOVE128 1 "register_operand" "") (match_operand:QI 2 "const_0_to_1_operand" "")] UNSPEC_UNPACK_128BIT))] - "" + "FLOAT128_2REG_P (<MODE>mode)" "") (define_insn_and_split "unpack<mode>_dm" @@ -12151,7 +12433,7 @@ [(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r") (match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")] UNSPEC_UNPACK_128BIT))] - "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && FLOAT128_2REG_P (<MODE>mode)" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 3))] @@ -12175,7 +12457,7 @@ [(match_operand:FMOVE128 1 "register_operand" "d,d") (match_operand:QI 2 "const_0_to_1_operand" "i,i")] UNSPEC_UNPACK_128BIT))] - "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE" + "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (<MODE>mode)" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 3))] @@ -12199,7 +12481,7 @@ [(match_operand:<FP128_64> 1 "register_operand" "0,d") (match_operand:<FP128_64> 2 "register_operand" "d,d")] UNSPEC_PACK_128BIT))] - "" + "FLOAT128_2REG_P (<MODE>mode)" "@ fmr %L0,%2 #" @@ -12219,12 +12501,12 @@ [(set_attr "type" "fp,fp") (set_attr "length" "4,8")]) -(define_insn "unpackv1ti" +(define_insn "unpack<mode>" [(set (match_operand:DI 0 "register_operand" "=d,d") - (unspec:DI [(match_operand:V1TI 1 "register_operand" "0,wa") + (unspec:DI [(match_operand:FMOVE128_VSX 1 "register_operand" "0,wa") (match_operand:QI 2 "const_0_to_1_operand" "O,i")] UNSPEC_UNPACK_128BIT))] - "TARGET_VSX" + "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" { if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0) return ASM_COMMENT_START " xxpermdi to same register"; @@ -12232,19 +12514,17 @@ operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3); return "xxpermdi %x0,%x1,%x1,%3"; } - [(set_attr "type" "vecperm") - (set_attr "length" "4")]) + [(set_attr "type" "vecperm")]) -(define_insn "packv1ti" - [(set (match_operand:V1TI 0 "register_operand" "=wa") - (unspec:V1TI +(define_insn "pack<mode>" + [(set (match_operand:FMOVE128_VSX 0 "register_operand" "=wa") + (unspec:FMOVE128_VSX [(match_operand:DI 1 "register_operand" "d") (match_operand:DI 2 "register_operand" "d")] UNSPEC_PACK_128BIT))] "TARGET_VSX" "xxpermdi %x0,%x1,%x2,0" - [(set_attr "type" "vecperm") - (set_attr "length" "4")]) + [(set_attr "type" "vecperm")]) diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 18ea27a3d90..6d11ff7dfdb 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -212,7 +212,7 @@ Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) Save ; Allow/disallow the movmisalign in DF/DI vectors mefficient-unaligned-vector -Target Undocumented Report Var(TARGET_EFFICIENT_UNALIGNED_VSX) Init(-1) +Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags) ; Consider unaligned VSX accesses to be efficient/inefficient mallow-df-permute diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h index 24618e309f1..f48af43e7c5 100644 --- a/gcc/config/rs6000/sysv4.h +++ b/gcc/config/rs6000/sysv4.h @@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) %{R*} \ %(link_shlib) \ %{!T*: %(link_start) } \ -%(link_target) \ %(link_os)" /* Shared libraries are not default. */ @@ -584,10 +583,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) %{shared:-G -dy -z text } \ %{symbolic:-Bsymbolic -G -dy -z text }" -/* Override the default target of the linker. */ -#define LINK_TARGET_SPEC \ - ENDIAN_SELECT("", " --oformat elf32-powerpcle", "") - /* Any specific OS flags. */ #define LINK_OS_SPEC "\ %{mads : %(link_os_ads) ; \ @@ -873,7 +868,6 @@ ncrtn.o%s" { "endfile_openbsd", ENDFILE_OPENBSD_SPEC }, \ { "endfile_default", ENDFILE_DEFAULT_SPEC }, \ { "link_shlib", LINK_SHLIB_SPEC }, \ - { "link_target", LINK_TARGET_SPEC }, \ { "link_start", LINK_START_SPEC }, \ { "link_start_ads", LINK_START_ADS_SPEC }, \ { "link_start_yellowknife", LINK_START_YELLOWKNIFE_SPEC }, \ diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h index 7b1d6a1b4de..66ee7cadfe4 100644 --- a/gcc/config/rs6000/sysv4le.h +++ b/gcc/config/rs6000/sysv4le.h @@ -25,10 +25,6 @@ #undef DEFAULT_ASM_ENDIAN #define DEFAULT_ASM_ENDIAN " -mlittle" -#undef LINK_TARGET_SPEC -#define LINK_TARGET_SPEC \ - ENDIAN_SELECT(" --oformat elf32-powerpc", "", "") - #undef MULTILIB_DEFAULTS #define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" } diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 4a62fbbbdd4..8821dec5989 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -977,6 +977,8 @@ ;; General shift amounts can be supported using vsro + vsr. We're ;; not expecting to see these yet (the vectorizer currently ;; generates only shifts by a whole number of vector elements). +;; Note that the vec_shr operation is actually defined as +;; 'shift toward element 0' so is a shr for LE and shl for BE. (define_expand "vec_shr_<mode>" [(match_operand:VEC_L 0 "vlogical_operand" "") (match_operand:VEC_L 1 "vlogical_operand" "") @@ -987,6 +989,7 @@ rtx bitshift = operands[2]; rtx shift; rtx insn; + rtx zero_reg, op1, op2; HOST_WIDE_INT bitshift_val; HOST_WIDE_INT byteshift_val; @@ -996,19 +999,29 @@ if (bitshift_val & 0x7) FAIL; byteshift_val = (bitshift_val >> 3); + zero_reg = gen_reg_rtx (<MODE>mode); + emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode)); if (!BYTES_BIG_ENDIAN) - byteshift_val = 16 - byteshift_val; + { + byteshift_val = 16 - byteshift_val; + op1 = zero_reg; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = zero_reg; + } + if (TARGET_VSX && (byteshift_val & 0x3) == 0) { shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2); - insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1], - shift); + insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift); } else { shift = gen_rtx_CONST_INT (QImode, byteshift_val); - insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1], - shift); + insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift); } emit_insn (insn); |